diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index 58b001be24..c7dd945f5f 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -30,12 +30,12 @@ jobs:
         fetch-depth: 2
 
     - name: Setup Python
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: '3.x'
 
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@v2
+      uses: github/codeql-action/init@v3
       with:
         languages: ${{ matrix.language }}
         config-file: ./.github/codeql/${{ matrix.language }}.yml
@@ -55,4 +55,4 @@ jobs:
         cmake --build . --parallel 2
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@v2
+      uses: github/codeql-action/analyze@v3
diff --git a/.github/workflows/compile-msvc.yml b/.github/workflows/compile-msvc.yml
index 5c6ceeefb4..1a0f1ea62f 100644
--- a/.github/workflows/compile-msvc.yml
+++ b/.github/workflows/compile-msvc.yml
@@ -24,7 +24,7 @@ jobs:
         fetch-depth: 2
 
     - name: Select Python version
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: '3.11'
 
diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml
index 00a4596cc8..c0c3e3f89a 100644
--- a/.github/workflows/coverity.yml
+++ b/.github/workflows/coverity.yml
@@ -25,7 +25,7 @@ jobs:
 
     - name: Cache Coverity
       id: cache-coverity
-      uses: actions/cache@v3
+      uses: actions/cache@v4
       with:
         path: ./download/
         key: ${{ runner.os }}-download-${{ hashFiles('**/coverity_tool.*') }}
diff --git a/.github/workflows/unittest-macos.yml b/.github/workflows/unittest-macos.yml
index 6970faceaa..f9c2a838d6 100644
--- a/.github/workflows/unittest-macos.yml
+++ b/.github/workflows/unittest-macos.yml
@@ -32,7 +32,7 @@ jobs:
       run: mkdir build
 
     - name: Set up ccache
-      uses: actions/cache@v3
+      uses: actions/cache@v4
       with:
         path: ${{ env.CCACHE_DIR }}
         key: macos-ccache-${{ github.sha }}
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index 28e02bbee7..f7e9b314bd 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -971,33 +971,53 @@ if(PKG_KOKKOS)
   endif()
 endif()
 if(PKG_KSPACE)
-  message(STATUS "<<< FFT settings >>>
--- Primary FFT lib:  ${FFT}")
-  if(FFT_SINGLE)
-    message(STATUS "Using single precision FFTs")
-  else()
-    message(STATUS "Using double precision FFTs")
-  endif()
-  if(FFT_FFTW_THREADS OR FFT_MKL_THREADS)
-    message(STATUS "Using threaded FFTs")
-  else()
-    message(STATUS "Using non-threaded FFTs")
-  endif()
-  if(PKG_KOKKOS)
-    if(Kokkos_ENABLE_CUDA)
-      if(FFT STREQUAL "KISS")
-        message(STATUS "Kokkos FFT: KISS")
-      else()
-        message(STATUS "Kokkos FFT: cuFFT")
-      endif()
-    elseif(Kokkos_ENABLE_HIP)
-      if(FFT STREQUAL "KISS")
-        message(STATUS "Kokkos FFT: KISS")
-      else()
-        message(STATUS "Kokkos FFT: hipFFT")
-      endif()
+  if (LMP_HEFFTE)
+    message(STATUS "<<< FFT settings >>>
+-- Primary FFT lib:  heFFTe")
+    if (HEFFTE_BACKEND)
+      message(STATUS "heFFTe backend:  ${HEFFTE_BACKEND}")
     else()
-      message(STATUS "Kokkos FFT: ${FFT}")
+      message(STATUS "heFFTe backend:  stock (builtin FFT implementation, tested for corrected but not optimized for production)")
+    endif()
+    if(FFT_SINGLE)
+      message(STATUS "Using single precision FFTs")
+    else()
+      message(STATUS "Using double precision FFTs")
+    endif()
+  else()
+    message(STATUS "<<< FFT settings >>>
+-- Primary FFT lib:  ${FFT}")
+    if(FFT_SINGLE)
+      message(STATUS "Using single precision FFTs")
+    else()
+      message(STATUS "Using double precision FFTs")
+    endif()
+    if(FFT_FFTW_THREADS OR FFT_MKL_THREADS)
+      message(STATUS "Using threaded FFTs")
+    else()
+      message(STATUS "Using non-threaded FFTs")
+    endif()
+    if (FFT_HEFFTE)
+      message(STATUS "Using distributed algorithms from heFTTe")
+    else()
+      message(STATUS "Using builtin distributed algorithms")
+    endif()
+    if(PKG_KOKKOS)
+      if(Kokkos_ENABLE_CUDA)
+        if(FFT STREQUAL "KISS")
+          message(STATUS "Kokkos FFT: KISS")
+        else()
+          message(STATUS "Kokkos FFT: cuFFT")
+        endif()
+      elseif(Kokkos_ENABLE_HIP)
+        if(FFT STREQUAL "KISS")
+          message(STATUS "Kokkos FFT: KISS")
+        else()
+          message(STATUS "Kokkos FFT: hipFFT")
+        endif()
+      else()
+        message(STATUS "Kokkos FFT: ${FFT}")
+      endif()
     endif()
   endif()
 endif()
diff --git a/cmake/Modules/Packages/GPU.cmake b/cmake/Modules/Packages/GPU.cmake
index 47be8b8538..6d0ce303a5 100644
--- a/cmake/Modules/Packages/GPU.cmake
+++ b/cmake/Modules/Packages/GPU.cmake
@@ -1,3 +1,10 @@
+
+# Silence CMake warnings about FindCUDA being obsolete.
+# We may need to eventually rewrite this section to use enable_language(CUDA)
+if(POLICY CMP0146)
+  cmake_policy(SET CMP0146 OLD)
+endif()
+
 set(GPU_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/GPU)
 set(GPU_SOURCES ${GPU_SOURCES_DIR}/gpu_extra.h
                 ${GPU_SOURCES_DIR}/fix_gpu.h
@@ -151,10 +158,10 @@ if(GPU_API STREQUAL "CUDA")
   endif()
 
   cuda_compile_fatbin(GPU_GEN_OBJS ${GPU_LIB_CU} OPTIONS ${CUDA_REQUEST_PIC}
-          -DUNIX -O3 --use_fast_math -Wno-deprecated-gpu-targets -DNV_KERNEL -DUCL_CUDADR ${GPU_CUDA_GENCODE} -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES})
+          -DUNIX -O3 --use_fast_math -Wno-deprecated-gpu-targets -allow-unsupported-compiler -DNV_KERNEL -DUCL_CUDADR ${GPU_CUDA_GENCODE} -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES})
 
   cuda_compile(GPU_OBJS ${GPU_LIB_CUDPP_CU} OPTIONS ${CUDA_REQUEST_PIC}
-          -DUNIX -O3 --use_fast_math -Wno-deprecated-gpu-targets -DUCL_CUDADR ${GPU_CUDA_GENCODE} -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES})
+          -DUNIX -O3 --use_fast_math -Wno-deprecated-gpu-targets -allow-unsupported-compiler -DUCL_CUDADR ${GPU_CUDA_GENCODE} -D_${GPU_PREC_SETTING} -DLAMMPS_${LAMMPS_SIZES})
 
   foreach(CU_OBJ ${GPU_GEN_OBJS})
     get_filename_component(CU_NAME ${CU_OBJ} NAME_WE)
diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake
index 0edd9a3baa..30c46504ed 100644
--- a/cmake/Modules/Packages/KOKKOS.cmake
+++ b/cmake/Modules/Packages/KOKKOS.cmake
@@ -16,11 +16,6 @@ endif()
 if(Kokkos_ENABLE_OPENMP)
   if(NOT BUILD_OMP)
     message(FATAL_ERROR "Must enable BUILD_OMP with Kokkos_ENABLE_OPENMP")
-  else()
-    # NVHPC/(AMD)Clang does not seem to provide a detectable OpenMP version, but is far beyond version 3.1
-    if((OpenMP_CXX_VERSION VERSION_LESS 3.1) AND NOT ((CMAKE_CXX_COMPILER_ID STREQUAL "NVHPC") OR (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")))
-      message(FATAL_ERROR "Compiler must support OpenMP 3.1 or later with Kokkos_ENABLE_OPENMP")
-    endif()
   endif()
 endif()
 ########################################################################
diff --git a/cmake/Modules/Packages/KSPACE.cmake b/cmake/Modules/Packages/KSPACE.cmake
index de7e7e5b20..9c9c879cd4 100644
--- a/cmake/Modules/Packages/KSPACE.cmake
+++ b/cmake/Modules/Packages/KSPACE.cmake
@@ -46,6 +46,42 @@ else()
   target_compile_definitions(lammps PRIVATE -DFFT_KISS)
 endif()
 
+option(FFT_USE_HEFFTE  "Use heFFTe as the distributed FFT engine, overrides the FFT option."  OFF)
+if(FFT_USE_HEFFTE)
+  # if FFT_HEFFTE is enabled, switch the builtin FFT engine with Heffte
+  set(FFT_HEFFTE_BACKEND_VALUES FFTW MKL)
+  set(FFT_HEFFTE_BACKEND "" CACHE STRING "Select heFFTe backend, e.g., FFTW or MKL")
+  set_property(CACHE FFT_HEFFTE_BACKEND PROPERTY STRINGS ${FFT_HEFFTE_BACKEND_VALUES})
+
+  if(FFT_HEFFTE_BACKEND STREQUAL "FFTW") # respect the backend choice, FFTW or MKL
+    set(HEFFTE_COMPONENTS "FFTW")
+    set(Heffte_ENABLE_FFTW "ON" CACHE BOOL "Enables FFTW backend for heFFTe")
+  elseif(FFT_HEFFTE_BACKEND STREQUAL "MKL")
+    set(HEFFTE_COMPONENTS "MKL")
+    set(Heffte_ENABLE_MKL "ON" CACHE BOOL "Enables MKL backend for heFFTe")
+  else()
+    message(WARNING "FFT_HEFFTE_BACKEND not selected, defaulting to the builtin 'stock' backend, which is intended for testing and is not optimized for production runs")
+  endif()
+
+  find_package(Heffte 2.4.0 QUIET COMPONENTS ${HEFFTE_COMPONENTS})
+  if (NOT Heffte_FOUND) # download and build
+    include(FetchContent)
+    FetchContent_Declare(HEFFTE_PROJECT # using v2.4.0
+      URL  "https://github.com/icl-utk-edu/heffte/archive/refs/tags/v2.4.0.tar.gz"
+      URL_HASH SHA256=02310fb4f9688df02f7181667e61c3adb7e38baf79611d80919d47452ff7881d
+      )
+    FetchContent_Populate(HEFFTE_PROJECT)
+    add_subdirectory(${heffte_project_SOURCE_DIR} ${heffte_project_BINARY_DIR})
+    set_target_properties(lmp PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
+    set_target_properties(lammps PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
+    add_library(Heffte::Heffte INTERFACE IMPORTED GLOBAL)
+    target_link_libraries(Heffte::Heffte INTERFACE Heffte)
+  endif()
+
+  target_compile_definitions(lammps PRIVATE -DFFT_HEFFTE "-DFFT_HEFFTE_${FFT_HEFFTE_BACKEND}")
+  target_link_libraries(lammps PRIVATE Heffte::Heffte)
+endif()
+
 set(FFT_PACK "array" CACHE STRING "Optimization for FFT")
 set(FFT_PACK_VALUES array pointer memcpy)
 set_property(CACHE FFT_PACK PROPERTY STRINGS ${FFT_PACK_VALUES})
diff --git a/cmake/Modules/Packages/MDI.cmake b/cmake/Modules/Packages/MDI.cmake
index 447b941d99..b21e508b87 100644
--- a/cmake/Modules/Packages/MDI.cmake
+++ b/cmake/Modules/Packages/MDI.cmake
@@ -8,8 +8,8 @@ option(DOWNLOAD_MDI "Download and compile the MDI library instead of using an al
 
 if(DOWNLOAD_MDI)
   message(STATUS "MDI download requested - we will build our own")
-  set(MDI_URL "https://github.com/MolSSI-MDI/MDI_Library/archive/v1.4.16.tar.gz" CACHE STRING "URL for MDI tarball")
-  set(MDI_MD5 "407db44e2d79447ab5c1233af1965f65" CACHE STRING "MD5 checksum for MDI tarball")
+  set(MDI_URL "https://github.com/MolSSI-MDI/MDI_Library/archive/v1.4.26.tar.gz" CACHE STRING "URL for MDI tarball")
+  set(MDI_MD5 "3124bb85259471e2a53a891f04bf697a" CACHE STRING "MD5 checksum for MDI tarball")
   mark_as_advanced(MDI_URL)
   mark_as_advanced(MDI_MD5)
   GetFallbackURL(MDI_URL MDI_FALLBACK)
diff --git a/cmake/Modules/Packages/ML-PACE.cmake b/cmake/Modules/Packages/ML-PACE.cmake
index ce8f02f5f4..248b8eea76 100644
--- a/cmake/Modules/Packages/ML-PACE.cmake
+++ b/cmake/Modules/Packages/ML-PACE.cmake
@@ -1,33 +1,40 @@
-set(PACELIB_URL "https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/v.2023.10.04.tar.gz" CACHE STRING "URL for PACE evaluator library sources")
+set(PACELIB_URL "https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/v.2023.11.25.fix.tar.gz" CACHE STRING "URL for PACE evaluator library sources")
 
-set(PACELIB_MD5 "70ff79f4e59af175e55d24f3243ad1ff" CACHE STRING "MD5 checksum of PACE evaluator library tarball")
+set(PACELIB_MD5 "b45de9a633f42ed65422567e3ce56f9f" CACHE STRING "MD5 checksum of PACE evaluator library tarball")
 mark_as_advanced(PACELIB_URL)
 mark_as_advanced(PACELIB_MD5)
 GetFallbackURL(PACELIB_URL PACELIB_FALLBACK)
 
-# download library sources to build folder
-if(EXISTS ${CMAKE_BINARY_DIR}/libpace.tar.gz)
-  file(MD5 ${CMAKE_BINARY_DIR}/libpace.tar.gz DL_MD5)
-endif()
-if(NOT "${DL_MD5}" STREQUAL "${PACELIB_MD5}")
-  message(STATUS "Downloading ${PACELIB_URL}")
-  file(DOWNLOAD ${PACELIB_URL} ${CMAKE_BINARY_DIR}/libpace.tar.gz STATUS DL_STATUS SHOW_PROGRESS)
-  file(MD5 ${CMAKE_BINARY_DIR}/libpace.tar.gz DL_MD5)
-  if((NOT DL_STATUS EQUAL 0) OR (NOT "${DL_MD5}" STREQUAL "${PACELIB_MD5}"))
-    message(WARNING "Download from primary URL ${PACELIB_URL} failed\nTrying fallback URL ${PACELIB_FALLBACK}")
-    file(DOWNLOAD ${PACELIB_FALLBACK} ${CMAKE_BINARY_DIR}/libpace.tar.gz EXPECTED_HASH MD5=${PACELIB_MD5} SHOW_PROGRESS)
-  endif()
+# LOCAL_ML-PACE points to top-level dir with local lammps-user-pace repo,
+# to make it easier to check local build without going through the public github releases
+if(LOCAL_ML-PACE)
+ set(lib-pace "${LOCAL_ML-PACE}")
 else()
-  message(STATUS "Using already downloaded archive ${CMAKE_BINARY_DIR}/libpace.tar.gz")
-endif()
+  # download library sources to build folder
+  if(EXISTS ${CMAKE_BINARY_DIR}/libpace.tar.gz)
+    file(MD5 ${CMAKE_BINARY_DIR}/libpace.tar.gz DL_MD5)
+  endif()
+  if(NOT "${DL_MD5}" STREQUAL "${PACELIB_MD5}")
+    message(STATUS "Downloading ${PACELIB_URL}")
+    file(DOWNLOAD ${PACELIB_URL} ${CMAKE_BINARY_DIR}/libpace.tar.gz STATUS DL_STATUS SHOW_PROGRESS)
+    file(MD5 ${CMAKE_BINARY_DIR}/libpace.tar.gz DL_MD5)
+    if((NOT DL_STATUS EQUAL 0) OR (NOT "${DL_MD5}" STREQUAL "${PACELIB_MD5}"))
+      message(WARNING "Download from primary URL ${PACELIB_URL} failed\nTrying fallback URL ${PACELIB_FALLBACK}")
+      file(DOWNLOAD ${PACELIB_FALLBACK} ${CMAKE_BINARY_DIR}/libpace.tar.gz EXPECTED_HASH MD5=${PACELIB_MD5} SHOW_PROGRESS)
+    endif()
+  else()
+    message(STATUS "Using already downloaded archive ${CMAKE_BINARY_DIR}/libpace.tar.gz")
+  endif()
 
-# uncompress downloaded sources
-execute_process(
-  COMMAND ${CMAKE_COMMAND} -E remove_directory lammps-user-pace*
-  COMMAND ${CMAKE_COMMAND} -E tar xzf libpace.tar.gz
-  WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
-)
-get_newest_file(${CMAKE_BINARY_DIR}/lammps-user-pace-* lib-pace)
+
+  # uncompress downloaded sources
+  execute_process(
+    COMMAND ${CMAKE_COMMAND} -E remove_directory lammps-user-pace*
+    COMMAND ${CMAKE_COMMAND} -E tar xzf libpace.tar.gz
+    WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
+  )
+  get_newest_file(${CMAKE_BINARY_DIR}/lammps-user-pace-* lib-pace)
+endif()
 
 add_subdirectory(${lib-pace} build-pace)
 set_target_properties(pace PROPERTIES CXX_EXTENSIONS ON OUTPUT_NAME lammps_pace${LAMMPS_MACHINE})
diff --git a/cmake/presets/gpu-cuda.cmake b/cmake/presets/gpu-cuda.cmake
new file mode 100644
index 0000000000..2ac6bd9ea6
--- /dev/null
+++ b/cmake/presets/gpu-cuda.cmake
@@ -0,0 +1,11 @@
+# preset that enables GPU and selects CUDA API
+
+set(PKG_GPU ON CACHE BOOL "Build GPU package" FORCE)
+set(GPU_API "cuda" CACHE STRING "APU used by GPU package" FORCE)
+set(GPU_PREC "mixed" CACHE STRING "" FORCE)
+
+set(CUDA_NVCC_FLAGS "-allow-unsupported-compiler" CACHE STRING "" FORCE)
+set(CUDA_NVCC_FLAGS_DEBUG "-allow-unsupported-compiler" CACHE STRING "" FORCE)
+set(CUDA_NVCC_FLAGS_MINSIZEREL "-allow-unsupported-compiler" CACHE STRING "" FORCE)
+set(CUDA_NVCC_FLAGS_RELWITHDEBINFO "-allow-unsupported-compiler" CACHE STRING "" FORCE)
+set(CUDA_NVCC_FLAGS_RELEASE "-allow-unsupported-compiler" CACHE STRING "" FORCE)
diff --git a/cmake/presets/oneapi.cmake b/cmake/presets/oneapi.cmake
index 2aacf1a1f5..393d1d9b68 100644
--- a/cmake/presets/oneapi.cmake
+++ b/cmake/presets/oneapi.cmake
@@ -18,11 +18,11 @@ set(MPI_CXX_COMPILER "mpicxx" CACHE STRING "" FORCE)
 
 unset(HAVE_OMP_H_INCLUDE CACHE)
 set(OpenMP_C "icx" CACHE STRING "" FORCE)
-set(OpenMP_C_FLAGS "-qopenmp -qopenmp-simd" CACHE STRING "" FORCE)
+set(OpenMP_C_FLAGS "-qopenmp;-qopenmp-simd" CACHE STRING "" FORCE)
 set(OpenMP_C_LIB_NAMES "omp" CACHE STRING "" FORCE)
 set(OpenMP_CXX "icpx" CACHE STRING "" FORCE)
-set(OpenMP_CXX_FLAGS "-qopenmp -qopenmp-simd" CACHE STRING "" FORCE)
+set(OpenMP_CXX_FLAGS "-qopenmp;-qopenmp-simd" CACHE STRING "" FORCE)
 set(OpenMP_CXX_LIB_NAMES "omp" CACHE STRING "" FORCE)
-set(OpenMP_Fortran_FLAGS "-qopenmp -qopenmp-simd" CACHE STRING "" FORCE)
+set(OpenMP_Fortran_FLAGS "-qopenmp;-qopenmp-simd" CACHE STRING "" FORCE)
 set(OpenMP_omp_LIBRARY "libiomp5.so" CACHE PATH "" FORCE)
 
diff --git a/doc/Makefile b/doc/Makefile
index b652c515e1..f9f8336665 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -100,6 +100,7 @@ html: xmlgen $(VENV) $(SPHINXCONFIG)/conf.py $(ANCHORCHECK) $(MATHJAX)
 		env LC_ALL=C grep -n ' :[a-z]\+`' $(RSTDIR)/*.rst ;\
 		env LC_ALL=C grep -n ' `[^`]\+<[a-z][^`]\+`[^_]' $(RSTDIR)/*.rst ;\
 		env LC_ALL=C grep -n ':\(ref\|doc\):[^`]' $(RSTDIR)/*.rst ;\
+		env LC_ALL=C grep -n '\(ref\|doc\)`[^`]' $(RSTDIR)/*.rst ;\
 		$(PYTHON) $(BUILDDIR)/utils/check-styles.py -s ../src -d src ;\
 		echo "############################################" ;\
 		deactivate ;\
@@ -182,6 +183,7 @@ pdf: xmlgen $(VENV) $(SPHINXCONFIG)/conf.py $(ANCHORCHECK)
 		env LC_ALL=C grep -n ' :[a-z]\+`' $(RSTDIR)/*.rst ;\
 		env LC_ALL=C grep -n ' `[^`]\+<[a-z][^`]\+`[^_]' $(RSTDIR)/*.rst ;\
 		env LC_ALL=C grep -n ':\(ref\|doc\):[^`]' $(RSTDIR)/*.rst ;\
+		env LC_ALL=C grep -n '\(ref\|doc\)`[^`]' $(RSTDIR)/*.rst ;\
 		$(PYTHON) utils/check-styles.py -s ../src -d src ;\
 		echo "############################################" ;\
 		deactivate ;\
@@ -231,6 +233,7 @@ role_check :
 	@( env LC_ALL=C grep -n ' :[a-z]\+`' $(RSTDIR)/*.rst && exit 1 || : )
 	@( env LC_ALL=C grep -n ' `[^`]\+<[a-z][^`]\+`[^_]' $(RSTDIR)/*.rst && exit 1 || : )
 	@( env LC_ALL=C grep -n ':\(ref\|doc\):[^`]' $(RSTDIR)/*.rst && exit 1 || : )
+	@( env LC_ALL=C grep -n '\(ref\|doc\)`[^`]' $(RSTDIR)/*.rst && exit 1 || : )
 
 link_check : $(VENV) html
 	@(\
diff --git a/doc/github-development-workflow.md b/doc/github-development-workflow.md
index fccd75d29a..e16ae82764 100644
--- a/doc/github-development-workflow.md
+++ b/doc/github-development-workflow.md
@@ -36,10 +36,10 @@ requests.
 MUST be submitted as a pull request to GitHub.  All changes to the
 "develop" branch must be made exclusively through merging pull requests.
 The "release" and "stable" branches, respectively, are only to be
-updated upon feature or stable releases based on the associated
-tags.  Updates to the stable release in between stable releases
+updated upon "feature releases" or "stable releases" based on the
+associated tags.  Updates to the stable release in between stable releases
 (for example, back-ported bug fixes) are first merged into the "maintenance"
-branch and then into the "stable" branch as update releases.
+branch and then into the "stable" branch as "stable update releases".
 
 Pull requests may also be submitted to (long-running) feature branches
 created by LAMMPS developers inside the LAMMPS project, if needed. Those
@@ -131,7 +131,7 @@ testing -- that the code in the branch "develop" does not get easily
 broken.  These tests are run after every update to a pull request.  More
 extensive and time-consuming tests (including regression testing) are
 performed after code is merged to the "develop" branch.  There are feature
-releases of LAMMPS made about every 4-6 weeks at a point, when the LAMMPS
+releases of LAMMPS made about every 4-8 weeks at a point, when the LAMMPS
 developers feel, that a sufficient number of changes have been included
 and all post-merge testing has been successful.  These feature releases are
 marked with a `patch_<version date>` tag and the "release" branch
diff --git a/doc/graphviz/Makefile b/doc/graphviz/Makefile
index a3e0c94c63..00b651e888 100644
--- a/doc/graphviz/Makefile
+++ b/doc/graphviz/Makefile
@@ -16,8 +16,11 @@ clean:
 	rm -f $(IMGSVG) $(IMGPDF) $(IMGPNG) *~
 
 ifeq ($(HAS_DOT),YES)
-$(IMGDIR)/%.png: %.dot
+$(IMGDIR)/lammps-classes.png : lammps-classes.dot
 	dot -Tpng -Kneato -o $@ $<
+
+$(IMGDIR)/%.png: %.dot
+	dot -Tpng -Kdot -o $@ $<
 endif
 
 ifeq ($(HAS_DOT),NO)
diff --git a/doc/graphviz/lammps-releases.dot b/doc/graphviz/lammps-releases.dot
new file mode 100644
index 0000000000..f641cac029
--- /dev/null
+++ b/doc/graphviz/lammps-releases.dot
@@ -0,0 +1,34 @@
+// LAMMPS branches and releases
+digraph releases {
+    rankdir="LR";
+    github [shape="box" label="Pull Requests\non GitHub" height=0.75];
+    github -> develop [label="Merge commits"];
+    {
+        rank = "same";
+        work [shape="none" label="Development branches:"]
+        develop [label="'develop' branch" height=0.75];
+        maintenance [label="'maintenance' branch" height=0.75];
+    };
+    {
+        rank = "same";
+        upload [shape="none" label="Release branches:"]
+        release [label="'release' branch" height=0.75];
+        stable [label="'stable' branch" height=0.75];
+    };
+    develop -> release [label="Feature release\n(every 4-8 weeks)"];
+    release -> stable [label="Stable release\n(once per year)"];
+    stable -> maintenance [label="Reset on stable release" style="setlinewidth(2)"];
+    develop -> maintenance [label="Backports of bugfixes" style="dashed"];
+    maintenance -> stable [label="Updates to stable release"];
+    {
+        rank = "same";
+        tag [shape="none" label="Applied tags:"];
+        patchtag [shape="box" label="patch_<date>"];
+        stabletag [shape="box" label="stable_<date>"];
+        updatetag [shape="box" label="stable_<date>_update<num>"];
+    };
+    release -> patchtag [label="feature release" style="dotted"];
+    stable -> stabletag [label="stable release" style="dotted"];
+    stable -> updatetag [label="update release" style="dotted"];
+}
+
diff --git a/doc/src/Bibliography.rst b/doc/src/Bibliography.rst
index e9ea8b0925..4ed8e73dfe 100644
--- a/doc/src/Bibliography.rst
+++ b/doc/src/Bibliography.rst
@@ -562,6 +562,9 @@ Bibliography
 **(Kumar)**
    Kumar and Skinner, J. Phys. Chem. B, 112, 8311 (2008)
 
+**(Lafourcade)**
+  Lafourcade, Maillet, Denoual, Duval, Allera, Goryaeva, and Marinica, `Comp. Mat. Science, 230, 112534 (2023) <https://doi.org/10.1016/j.commatsci.2023.112534>`_
+
 **(Lamoureux and Roux)**
    G.\  Lamoureux, B. Roux, J. Chem. Phys 119, 3025 (2003)
 
diff --git a/doc/src/Build_development.rst b/doc/src/Build_development.rst
index c75c7a6a41..c674b2c258 100644
--- a/doc/src/Build_development.rst
+++ b/doc/src/Build_development.rst
@@ -255,16 +255,18 @@ A test run is then a a collection multiple individual test runs each
 with many comparisons to reference results based on template input
 files, individual command settings, relative error margins, and
 reference data stored in a YAML format file with ``.yaml``
-suffix. Currently the programs ``test_pair_style``, ``test_bond_style``, and
-``test_angle_style`` are implemented.  They will compare forces, energies and
-(global) stress for all atoms after a ``run 0`` calculation and after a
-few steps of MD with :doc:`fix nve <fix_nve>`, each in multiple variants
-with different settings and also for multiple accelerated styles. If a
-prerequisite style or package is missing, the individual tests are
-skipped.  All tests will be executed on a single MPI process, so using
-the CMake option ``-D BUILD_MPI=off`` can significantly speed up testing,
-since this will skip the MPI initialization for each test run.
-Below is an example command and output:
+suffix. Currently the programs ``test_pair_style``, ``test_bond_style``,
+``test_angle_style``, ``test_dihedral_style``, and
+``test_improper_style`` are implemented.  They will compare forces,
+energies and (global) stress for all atoms after a ``run 0`` calculation
+and after a few steps of MD with :doc:`fix nve <fix_nve>`, each in
+multiple variants with different settings and also for multiple
+accelerated styles. If a prerequisite style or package is missing, the
+individual tests are skipped.  All force style tests will be executed on
+a single MPI process, so using the CMake option ``-D BUILD_MPI=off`` can
+significantly speed up testing, since this will skip the MPI
+initialization for each test run.  Below is an example command and
+output:
 
 .. code-block:: console
 
@@ -416,15 +418,16 @@ When compiling LAMMPS with enabled tests, most test executables will
 need to be linked against the LAMMPS library.  Since this can be a very
 large library with many C++ objects when many packages are enabled, link
 times can become very long on machines that use the GNU BFD linker (e.g.
-Linux systems).  Alternatives like the ``lld`` linker of the LLVM project
-or the ``gold`` linker available with GNU binutils can speed up this step
-substantially. CMake will by default test if any of the two can be
-enabled and use it when ``ENABLE_TESTING`` is active.  It can also be
-selected manually through the ``CMAKE_CUSTOM_LINKER`` CMake variable.
-Allowed values are ``lld``, ``gold``, ``bfd``, or ``default``.  The
-``default`` option will use the system default linker otherwise, the
-linker is chosen explicitly.  This option is only available for the
-GNU or Clang C++ compiler.
+Linux systems).  Alternatives like the ``mold`` linker, the ``lld``
+linker of the LLVM project, or the ``gold`` linker available with GNU
+binutils can speed up this step substantially (in this order).  CMake
+will by default test if any of the three can be enabled and use it when
+``ENABLE_TESTING`` is active.  It can also be selected manually through
+the ``CMAKE_CUSTOM_LINKER`` CMake variable.  Allowed values are
+``mold``, ``lld``, ``gold``, ``bfd``, or ``default``.  The ``default``
+option will use the system default linker otherwise, the linker is
+chosen explicitly.  This option is only available for the GNU or Clang
+C++ compilers.
 
 Tests for other components and utility functions
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -518,6 +521,8 @@ The following options are available.
    make fix-errordocs       # remove error docs in header files
    make check-permissions   # search for files with permissions issues
    make fix-permissions     # correct permissions issues in files
+   make check-docs          # search for several issues in the manual
+   make check-version       # list files with pending release version tags
    make check               # run all check targets from above
 
 These should help to make source and documentation files conforming
diff --git a/doc/src/Build_settings.rst b/doc/src/Build_settings.rst
index 7576cae3eb..7fb7539506 100644
--- a/doc/src/Build_settings.rst
+++ b/doc/src/Build_settings.rst
@@ -44,6 +44,14 @@ require use of an FFT library to compute 1d FFTs.  The KISS FFT
 library is included with LAMMPS, but other libraries can be faster.
 LAMMPS can use them if they are available on your system.
 
+.. versionadded:: TBD
+
+Alternatively, LAMMPS can use the `heFFTe
+<https://icl-utk-edu.github.io/heffte/>`_ library for the MPI
+communication algorithms, which comes with many optimizations for
+special cases, e.g. leveraging available 2D and 3D FFTs in the back end
+libraries and better pipelining for packing and communication.
+
 .. tabs::
 
    .. tab:: CMake build
@@ -53,6 +61,7 @@ LAMMPS can use them if they are available on your system.
          -D FFT=value              # FFTW3 or MKL or KISS, default is FFTW3 if found, else KISS
          -D FFT_SINGLE=value       # yes or no (default), no = double precision
          -D FFT_PACK=value         # array (default) or pointer or memcpy
+         -D FFT_USE_HEFFTE=value   # yes or no (default), yes links to heFFTe
 
       .. note::
 
@@ -76,6 +85,16 @@ LAMMPS can use them if they are available on your system.
          -D MKL_INCLUDE_DIR=path     # ditto for Intel MKL library
          -D FFT_MKL_THREADS=on       # enable using threaded FFTs with MKL libraries
          -D MKL_LIBRARY=path         # path to MKL libraries
+         -D FFT_HEFFTE_BACKEND=value # FFTW or MKL or empty/undefined for the stock heFFTe back end
+         -D Heffte_ROOT=path         # path to an existing heFFTe installation
+
+      .. note::
+
+         heFFTe comes with a builtin (= stock) back end for FFTs, i.e. a
+         default internal FFT implementation; however, this stock back
+         end is intended for testing purposes only and is not optimized
+         for production runs.
+
 
    .. tab:: Traditional make
 
@@ -111,6 +130,24 @@ LAMMPS can use them if they are available on your system.
       files in its default search path.  You must specify ``FFT_LIB``
       with the appropriate FFT libraries to include in the link.
 
+      Traditional make can also link to heFFTe using an existing installation
+
+      .. code-block:: make
+
+         include <path-to-heffte-installation>/share/heffte/HeffteMakefile.in
+         FFT_INC = -DFFT_HEFFTE -DFFT_HEFFTE_FFTW $(heffte_include)
+         FFT_PATH =
+         FFT_LIB = $(heffte_link) $(heffte_libs)
+
+      The heFFTe install path will contain `HeffteMakefile.in`.
+      which will define the `heffte_` include variables needed to link to heFFTe from
+      an external project using traditional make.
+      The `-DFFT_HEFFTE` is required to switch to using heFFTe, while the optional `-DFFT_HEFFTE_FFTW`
+      selects the desired heFFTe back end, e.g., `-DFFT_HEFFTE_FFTW` or `-DFFT_HEFFTE_MKL`,
+      omitting the variable will default to the `stock` back end.
+      The heFFTe `stock` back end is intended to be used for testing and debugging,
+      but is not performance optimized for large scale production runs.
+
 The `KISS FFT library <https://github.com/mborgerding/kissfft>`_ is
 included in the LAMMPS distribution.  It is portable across all
 platforms.  Depending on the size of the FFTs and the number of
@@ -170,6 +207,16 @@ Depending on the machine, the size of the FFT grid, the number of
 processors used, one option may be slightly faster.  The default is
 ARRAY mode.
 
+When using ``-DFFT_HEFFTE`` CMake will first look for an existing
+install with hints provided by ``-DHeffte_ROOT``, as recommended by the
+CMake standard and note that the name is case sensitive. If CMake cannot
+find a heFFTe installation with the correct back end (e.g., FFTW or
+MKL), it will attempt to download and build the library automatically.
+In this case, LAMMPS CMake will also accept all heFFTe specific
+variables listed in the `heFFTe documentation
+<https://mkstoyanov.bitbucket.io/heffte/md_doxygen_installation.html>`_
+and those variables will be passed into the heFFTe build.
+
 ----------
 
 .. _size:
@@ -463,8 +510,8 @@ Exception handling when using LAMMPS as a library
 
 LAMMPS errors do not kill the calling code, but throw an exception.  In
 the C-library interface, the call stack is unwound and control returns
-to the caller, e.g. to Python or a code that is coupled to LAMMPS and
-the error status can be queried.  When using C++ directly, the calling
+to the caller, e.g. to Python or a code that is coupled to LAMMPS. The
+error status can then be queried.  When using C++ directly, the calling
 code has to be set up to *catch* exceptions thrown from within LAMMPS.
 
 .. note::
diff --git a/doc/src/Commands_bond.rst b/doc/src/Commands_bond.rst
index aaf706b5df..ef36b6b7c4 100644
--- a/doc/src/Commands_bond.rst
+++ b/doc/src/Commands_bond.rst
@@ -124,7 +124,7 @@ OPT.
    *
    *
    * :doc:`charmm (iko) <dihedral_charmm>`
-   * :doc:`charmmfsw <dihedral_charmm>`
+   * :doc:`charmmfsw (k) <dihedral_charmm>`
    * :doc:`class2 (ko) <dihedral_class2>`
    * :doc:`cosine/shift/exp (o) <dihedral_cosine_shift_exp>`
    * :doc:`fourier (io) <dihedral_fourier>`
diff --git a/doc/src/Commands_compute.rst b/doc/src/Commands_compute.rst
index dbd6b58ce7..0352ad5374 100644
--- a/doc/src/Commands_compute.rst
+++ b/doc/src/Commands_compute.rst
@@ -100,6 +100,7 @@ KOKKOS, o = OPENMP, t = OPT.
    * :doc:`nbond/atom <compute_nbond_atom>`
    * :doc:`omega/chunk <compute_omega_chunk>`
    * :doc:`orientorder/atom (k) <compute_orientorder_atom>`
+   * :doc:`pace <compute_pace>`
    * :doc:`pair <compute_pair>`
    * :doc:`pair/local <compute_pair_local>`
    * :doc:`pe <compute_pe>`
@@ -115,12 +116,15 @@ KOKKOS, o = OPENMP, t = OPT.
    * :doc:`property/grid <compute_property_grid>`
    * :doc:`property/local <compute_property_local>`
    * :doc:`ptm/atom <compute_ptm_atom>`
+   * :doc:`rattlers/atom <compute_rattlers_atom>`
    * :doc:`rdf <compute_rdf>`
+   * :doc:`reaxff/atom (k) <compute_reaxff_atom>`
    * :doc:`reduce <compute_reduce>`
    * :doc:`reduce/chunk <compute_reduce_chunk>`
    * :doc:`reduce/region <compute_reduce>`
    * :doc:`rigid/local <compute_rigid_local>`
    * :doc:`saed <compute_saed>`
+   * :doc:`slcsa/atom <compute_slcsa_atom>`
    * :doc:`slice <compute_slice>`
    * :doc:`smd/contact/radius <compute_smd_contact_radius>`
    * :doc:`smd/damage <compute_smd_damage>`
diff --git a/doc/src/Commands_fix.rst b/doc/src/Commands_fix.rst
index 7301d1345e..e89e302673 100644
--- a/doc/src/Commands_fix.rst
+++ b/doc/src/Commands_fix.rst
@@ -122,6 +122,7 @@ OPT.
    * :doc:`mvv/tdpd <fix_mvv_dpd>`
    * :doc:`neb <fix_neb>`
    * :doc:`neb/spin <fix_neb_spin>`
+   * :doc:`nonaffine/displacement <fix_nonaffine_displacement>`
    * :doc:`nph (ko) <fix_nh>`
    * :doc:`nph/asphere (o) <fix_nph_asphere>`
    * :doc:`nph/body <fix_nph_body>`
@@ -238,10 +239,10 @@ OPT.
    * :doc:`store/force <fix_store_force>`
    * :doc:`store/state <fix_store_state>`
    * :doc:`tdpd/source <fix_dpd_source>`
-   * :doc:`temp/berendsen <fix_temp_berendsen>`
+   * :doc:`temp/berendsen (k) <fix_temp_berendsen>`
    * :doc:`temp/csld <fix_temp_csvr>`
    * :doc:`temp/csvr <fix_temp_csvr>`
-   * :doc:`temp/rescale <fix_temp_rescale>`
+   * :doc:`temp/rescale (k) <fix_temp_rescale>`
    * :doc:`temp/rescale/eff <fix_temp_rescale_eff>`
    * :doc:`tfmc <fix_tfmc>`
    * :doc:`tgnpt/drude <fix_tgnh_drude>`
diff --git a/doc/src/Commands_pair.rst b/doc/src/Commands_pair.rst
index 828f0b10d9..9f2bdbce79 100644
--- a/doc/src/Commands_pair.rst
+++ b/doc/src/Commands_pair.rst
@@ -87,7 +87,7 @@ OPT.
    * :doc:`coul/long/soft (o) <pair_fep_soft>`
    * :doc:`coul/msm (o) <pair_coul>`
    * :doc:`coul/slater/cut <pair_coul_slater>`
-   * :doc:`coul/slater/long <pair_coul_slater>`
+   * :doc:`coul/slater/long (g) <pair_coul_slater>`
    * :doc:`coul/shield <pair_coul_shield>`
    * :doc:`coul/streitz <pair_coul>`
    * :doc:`coul/tt <pair_coul_tt>`
@@ -110,7 +110,7 @@ OPT.
    * :doc:`eam/he <pair_eam>`
    * :doc:`edip (o) <pair_edip>`
    * :doc:`edip/multi <pair_edip>`
-   * :doc:`edpd <pair_mesodpd>`
+   * :doc:`edpd (g) <pair_mesodpd>`
    * :doc:`eff/cut <pair_eff>`
    * :doc:`eim (o) <pair_eim>`
    * :doc:`exp6/rx (k) <pair_exp6_rx>`
@@ -146,7 +146,7 @@ OPT.
    * :doc:`lj/charmm/coul/long/soft (o) <pair_fep_soft>`
    * :doc:`lj/charmm/coul/msm (o) <pair_charmm>`
    * :doc:`lj/charmmfsw/coul/charmmfsh <pair_charmm>`
-   * :doc:`lj/charmmfsw/coul/long <pair_charmm>`
+   * :doc:`lj/charmmfsw/coul/long (k) <pair_charmm>`
    * :doc:`lj/class2 (gko) <pair_class2>`
    * :doc:`lj/class2/coul/cut (ko) <pair_class2>`
    * :doc:`lj/class2/coul/cut/soft <pair_fep_soft>`
@@ -158,14 +158,14 @@ OPT.
    * :doc:`lj/cut (gikot) <pair_lj>`
    * :doc:`lj/cut/coul/cut (gko) <pair_lj_cut_coul>`
    * :doc:`lj/cut/coul/cut/dielectric (o) <pair_dielectric>`
-   * :doc:`lj/cut/coul/cut/soft (o) <pair_fep_soft>`
+   * :doc:`lj/cut/coul/cut/soft (go) <pair_fep_soft>`
    * :doc:`lj/cut/coul/debye (gko) <pair_lj_cut_coul>`
    * :doc:`lj/cut/coul/debye/dielectric (o) <pair_dielectric>`
    * :doc:`lj/cut/coul/dsf (gko) <pair_lj_cut_coul>`
    * :doc:`lj/cut/coul/long (gikot) <pair_lj_cut_coul>`
    * :doc:`lj/cut/coul/long/cs <pair_cs>`
    * :doc:`lj/cut/coul/long/dielectric (o) <pair_dielectric>`
-   * :doc:`lj/cut/coul/long/soft (o) <pair_fep_soft>`
+   * :doc:`lj/cut/coul/long/soft (go) <pair_fep_soft>`
    * :doc:`lj/cut/coul/msm (go) <pair_lj_cut_coul>`
    * :doc:`lj/cut/coul/msm/dielectric <pair_dielectric>`
    * :doc:`lj/cut/coul/wolf (o) <pair_lj_cut_coul>`
@@ -202,7 +202,7 @@ OPT.
    * :doc:`lubricate/poly (o) <pair_lubricate>`
    * :doc:`lubricateU <pair_lubricateU>`
    * :doc:`lubricateU/poly <pair_lubricateU>`
-   * :doc:`mdpd <pair_mesodpd>`
+   * :doc:`mdpd (g) <pair_mesodpd>`
    * :doc:`mdpd/rhosum <pair_mesodpd>`
    * :doc:`meam (k) <pair_meam>`
    * :doc:`meam/ms (k) <pair_meam>`
@@ -268,11 +268,11 @@ OPT.
    * :doc:`smtbq <pair_smtbq>`
    * :doc:`snap (ik) <pair_snap>`
    * :doc:`soft (go) <pair_soft>`
-   * :doc:`sph/heatconduction <pair_sph_heatconduction>`
+   * :doc:`sph/heatconduction (g) <pair_sph_heatconduction>`
    * :doc:`sph/idealgas <pair_sph_idealgas>`
-   * :doc:`sph/lj <pair_sph_lj>`
+   * :doc:`sph/lj (g) <pair_sph_lj>`
    * :doc:`sph/rhosum <pair_sph_rhosum>`
-   * :doc:`sph/taitwater <pair_sph_taitwater>`
+   * :doc:`sph/taitwater (g) <pair_sph_taitwater>`
    * :doc:`sph/taitwater/morris <pair_sph_taitwater_morris>`
    * :doc:`spin/dipole/cut <pair_spin_dipole>`
    * :doc:`spin/dipole/long <pair_spin_dipole>`
diff --git a/doc/src/Developer_platform.rst b/doc/src/Developer_platform.rst
index cdc4bb6770..9b05299146 100644
--- a/doc/src/Developer_platform.rst
+++ b/doc/src/Developer_platform.rst
@@ -70,6 +70,9 @@ File and path functions and global constants
 .. doxygenfunction:: is_console
    :project: progguide
 
+.. doxygenfunction:: disk_free
+   :project: progguide
+
 .. doxygenfunction:: path_is_directory
    :project: progguide
 
diff --git a/doc/src/Developer_unittest.rst b/doc/src/Developer_unittest.rst
index 67c5ce365a..b48c3b4e17 100644
--- a/doc/src/Developer_unittest.rst
+++ b/doc/src/Developer_unittest.rst
@@ -121,7 +121,7 @@ will be suppressed and only a summary printed, but adding
 the '-V' option will then produce output from the tests
 above like the following:
 
-.. code-block::
+.. code-block:: console
 
    [...]
    1: [ RUN      ] Tokenizer.empty_string
@@ -274,9 +274,7 @@ Tests for using the Fortran module are in the ``unittest/fortran``
 folder.  Since they are also using the GoogleTest library, they require
 to also implement test wrappers in C++ that will call fortran functions
 which provide a C function interface through ISO_C_BINDINGS that will in
-turn call the functions in the LAMMPS Fortran module.  This part of the
-unit tests is incomplete since the Fortran module it is based on is
-incomplete as well.
+turn call the functions in the LAMMPS Fortran module.
 
 Tests for the C++-style library interface
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -397,10 +395,10 @@ compare with the reference and also start from the data file.  A final
 check will use multi-cutoff r-RESPA (if supported by the pair style) at
 a 1:1 split and compare to the Verlet results.  These sets of tests are
 run with multiple test fixtures for accelerated styles (OPT, OPENMP,
-INTEL) and for the latter two with 4 OpenMP threads enabled.  For
-these tests the relative error (epsilon) is lowered by a common factor
-due to the additional numerical noise, but the tests are still comparing
-to the same reference data.
+INTEL, KOKKOS (OpenMP only)) and for the latter three with 4 OpenMP
+threads enabled.  For these tests the relative error (epsilon) is lowered
+by a common factor due to the additional numerical noise, but the tests
+are still comparing to the same reference data.
 
 Additional tests will check whether all listed extract keywords are
 supported and have the correct dimensionality and the final set of tests
@@ -434,17 +432,19 @@ The ``test_pair_style`` tester is used with 4 categories of test inputs:
   pair style is defined, but the computation of the pair style contributions
   is disabled.
 
-The ``test_bond_style`` and ``test_angle_style`` are set up in a similar
-fashion and share support functions with the pair style tester.  The final
-group of tests in this section is for fix styles that add/manipulate forces
-and velocities, e.g. for time integration, thermostats and more.
+The ``test_bond_style``, ``test_angle_style``, ``test_dihedral_style``, and
+``test_improper_style`` tester programs are set up in a similar fashion and
+share support functions with the pair style tester.  The final group of
+tests in this section is for fix styles that add/manipulate forces and
+velocities, e.g. for time integration, thermostats and more.
 
-Adding a new test is easiest done by copying and modifying an existing test
-for a style that is similar to one to be tested.  The file name should follow
-the naming conventions described above and after copying the file, the first
-step is to replace the style names where needed.  The coefficient values
-do not have to be meaningful, just in a reasonable range for the given system.
-It does not matter if some forces are large, for as long as they do not diverge.
+Adding a new test is easiest done by copying and modifying an existing YAML
+file for a style that is similar to one to be tested.  The file name should
+follow the naming conventions described above and after copying the file,
+the first step is to replace the style names where needed.  The coefficient
+values do not have to be meaningful, just in a reasonable range for the
+given system.  It does not matter if some forces are large, for as long as
+they do not diverge.
 
 The template input files define a large number of index variables at the top
 that can be modified inside the YAML file to control the behavior.  For example,
@@ -472,7 +472,7 @@ Note that this disables computing the kspace contribution, but still will run
 the setup.  The "gewald" parameter should be set explicitly to speed up the run.
 For styles with long-range electrostatics, typically two tests are added one using
 the (slower) analytic approximation of the erfc() function and the other using
-the tabulated coulomb, to test both code paths. The reference results in the YAML
+the tabulated coulomb, to test both code paths.  The reference results in the YAML
 files then should be compared manually, if they agree well enough within the limits
 of those two approximations.
 
@@ -526,3 +526,102 @@ The ``unittest/tools`` folder contains tests for programs in the
 shell, which are implemented as a python scripts using the ``unittest``
 Python module and launching the tool commands through the ``subprocess``
 Python module.
+
+
+Troubleshooting failed unit tests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The are by default no unit tests for newly added features (e.g. pair, fix,
+or compute styles) unless your pull request also includes tests for the
+added features.  If you are modifying some features, you may see failures
+for existing tests, if your modifications have some unexpected side effects
+or your changes render the existing text invalid.  If you are adding an
+accelerated version of an existing style, then only tests for INTEL,
+KOKKOS (with OpenMP only), OPENMP, and OPT will be run automatically.
+Tests for the GPU package are time consuming and thus are only run
+*after* a merge, or when a special label, ``gpu_unit_tests`` is added
+to the pull request.  After the test has started, it is often best to
+remove the label since every PR activity will re-trigger the test (that
+is a limitation of triggering a test with a label).  Support for unit
+tests with using KOKKOS with GPU acceleration is currently not supported.
+
+When you see a failed build on GitHub, click on ``Details`` to be taken
+to the corresponding LAMMPS Jenkins CI web page.  Click on the "Exit"
+symbol near the ``Logout`` button on the top right of that page to go to
+the "classic view".  In the classic view, there is a list of the
+individual runs that make up this test run (they are shown but cannot be
+inspected in the default view).  You can click on any of those.
+Clicking on ``Test Result`` will display the list of failed tests. Click
+on the "Status" column to sort the tests based on their Failed or Passed
+status.  Then click on the failed test to expand its output.
+
+For example, the following output snippet shows the failed unit test
+
+.. code-block:: console
+
+   [ RUN      ] PairStyle.gpu
+   /home/builder/workspace/dev/pull_requests/ubuntu_gpu/unit_tests/cmake_gpu_opencl_mixed_smallbig_clang_static/unittest/force-styles/test_main.cpp:63: Failure
+   Expected: (err) <= (epsilon)
+   Actual: 0.00018957912910606503 vs 0.0001
+   Google Test trace:
+   /home/builder/workspace/dev/pull_requests/ubuntu_gpu/unit_tests/cmake_gpu_opencl_mixed_smallbig_clang_static/unittest/force-styles/test_main.cpp:56: EXPECT_FORCES: init_forces (newton off)
+   /home/builder/workspace/dev/pull_requests/ubuntu_gpu/unit_tests/cmake_gpu_opencl_mixed_smallbig_clang_static/unittest/force-styles/test_main.cpp:64: Failure
+   Expected: (err) <= (epsilon)
+   Actual: 0.00022892713393549854 vs 0.0001
+
+The failed assertions provide line numbers in the test source
+(e.g. ``test_main.cpp:56``), from which one can understand what
+specific assertion failed.
+
+Note that the force style engine runs one of a small number of systems
+in a rather off-equilibrium configuration with a few atoms for a few
+steps, writes data and restart files, uses :doc:`the clear command
+<clear>` to reset LAMMPS, and then runs from those files with different
+settings (e.g. newton on/off) and integrators (e.g. verlet vs. respa).
+Beyond potential issues/bugs in the source code, the mismatch between
+the expected and actual values could be that force arrays are not
+properly cleared between multiple run commands or that class members are
+not correctly initialized or written to or read from a data or restart
+file.
+
+While the epsilon (relative precision) for a single, `IEEE 754 compliant
+<https://en.wikipedia.org/wiki/IEEE_754>`_, double precision floating
+point operation is at about 2.2e-16, the achievable precision for the
+tests is lower due to most numbers being sums over intermediate results
+and the non-associativity of floating point math leading to larger
+errors.  In some cases specific properties of the tested style.  As a
+rule of thumb, the test epsilon can often be in the range 5.0e-14 to
+1.0e-13.  But for "noisy" force kernels, e.g. those a larger amount of
+arithmetic operations involving `exp()`, `log()` or `sin()` functions,
+and also due to the effect of compiler optimization or differences
+between compilers or platforms, epsilon may need to be further relaxed,
+sometimes epsilon can be relaxed to 1.0e-12. If interpolation or lookup
+tables are used, epsilon may need to be set to 1.0e-10 or even higher.
+For tests of accelerated styles, the per-test epsilon is multiplied
+by empirical factors that take into account the differences in the order
+of floating point operations or that some or most intermediate operations
+may be done using approximations or with single precision floating point
+math.
+
+To rerun the failed unit test individually, change to the ``build`` directory
+and run the test with verbose output. For example,
+
+.. code-block:: bash
+
+    env TEST_ARGS=-v ctest -R ^MolPairStyle:lj_cut_coul_long -V
+
+``ctest`` with the ``-V`` flag also shows the exact command line
+of the test. One can then use ``gdb --args`` to further debug and
+catch exceptions with the test command, for example,
+
+.. code-block:: bash
+
+    gdb --args /path/to/lammps/build/test_pair_style /path/to/lammps/unittest/force-styles/tests/mol-pair-lj_cut_coul_long.yaml
+
+
+It is recommended to configure the build with ``-D
+BUILD_SHARED_LIBS=on`` and use a custom linker to shorten the build time
+during recompilation.  Installing `ccache` in your development
+environment helps speed up recompilation by caching previous
+compilations and detecting when the same compilation is being done
+again.  Please see :doc:`Build_development` for further details.
diff --git a/doc/src/Developer_updating.rst b/doc/src/Developer_updating.rst
index 36c6974b30..cd61eaa5a1 100644
--- a/doc/src/Developer_updating.rst
+++ b/doc/src/Developer_updating.rst
@@ -20,6 +20,7 @@ Available topics in mostly chronological order are:
 - `Use ev_init() to initialize variables derived from eflag and vflag`_
 - `Use utils::numeric() functions instead of force->numeric()`_
 - `Use utils::open_potential() function to open potential files`_
+- `Use symbolic Atom and AtomVec constants instead of numerical values`_
 - `Simplify customized error messages`_
 - `Use of "override" instead of "virtual"`_
 - `Simplified and more compact neighbor list requests`_
@@ -196,6 +197,71 @@ New:
 
    fp = utils::open_potential(filename, lmp);
 
+Use symbolic Atom and AtomVec constants instead of numerical values
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. versionchanged:: 18Sep2020
+
+Properties in LAMMPS that were represented by integer values (0, 1,
+2, 3) to indicate settings in the ``Atom`` and ``AtomVec`` classes (or
+classes derived from it) (and its derived classes) have been converted
+to use scoped enumerators instead.
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Symbolic Constant
+     - Value
+     - Symbolic Constant
+     - Value
+   * - Atom::GROW
+     - 0
+     - Atom::MAP_NONE
+     - 0
+   * - Atom::RESTART
+     - 1
+     - Atom::MAP_ARRAY
+     - 1
+   * - Atom::BORDER
+     - 2
+     - Atom::MAP_HASH
+     - 2
+   * - Atom::ATOMIC
+     - 0
+     - Atom::MAP_YES
+     - 3
+   * - Atom::MOLECULAR
+     - 1
+     - AtomVec::PER_ATOM
+     - 0
+   * - Atom::TEMPLATE
+     - 2
+     - AtomVec::PER_TYPE
+     - 1
+
+Old:
+
+.. code-block:: c++
+
+   molecular = 0;
+   mass_type = 1;
+   if (atom->molecular == 2)
+   if (atom->map_style == 2)
+   atom->add_callback(0);
+   atom->delete_callback(id,1);
+
+New:
+
+.. code-block:: c++
+
+   molecular = Atom::ATOMIC;
+   mass_type = AtomVec::PER_TYPE;
+   if (atom->molecular == Atom::TEMPLATE)
+   if (atom->map_style == Atom::MAP_HASH)
+   atom->add_callback(Atom::GROW);
+   atom->delete_callback(id,Atom::RESTART);
+
 Simplify customized error messages
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/doc/src/Fortran.rst b/doc/src/Fortran.rst
index 76fdff753a..c8d153b2e3 100644
--- a/doc/src/Fortran.rst
+++ b/doc/src/Fortran.rst
@@ -315,6 +315,10 @@ of the contents of the :f:mod:`LIBLAMMPS` Fortran interface to LAMMPS.
    :ftype extract_variable: function
    :f set_variable: :f:subr:`set_variable`
    :ftype set_variable: subroutine
+   :f set_string_variable: :f:subr:`set_set_string_variable`
+   :ftype set_string_variable: subroutine
+   :f set_internal_variable: :f:subr:`set_internal_variable`
+   :ftype set_internal_variable: subroutine
    :f gather_atoms: :f:subr:`gather_atoms`
    :ftype gather_atoms: subroutine
    :f gather_atoms_concat: :f:subr:`gather_atoms_concat`
@@ -1398,7 +1402,28 @@ Procedures Bound to the :f:type:`lammps` Derived Type
 
    Set the value of a string-style variable.
 
-   .. versionadded:: 3Nov2022
+   .. deprecated:: TBD
+
+   This function assigns a new value from the string *str* to the string-style
+   variable *name*\ . If *name* does not exist or is not a string-style
+   variable, an error is generated.
+
+   .. warning::
+
+      This subroutine is deprecated and :f:subr:`set_string_variable`
+      should be used instead.
+
+   :p character(len=*) name: name of the variable
+   :p character(len=*) str:  new value to assign to the variable
+   :to: :cpp:func:`lammps_set_variable`
+
+--------
+
+.. f:subroutine:: set_string_variable(name, str)
+
+   Set the value of a string-style variable.
+
+   .. versionadded:: TBD
 
    This function assigns a new value from the string *str* to the string-style
    variable *name*\ . If *name* does not exist or is not a string-style
@@ -1406,7 +1431,23 @@ Procedures Bound to the :f:type:`lammps` Derived Type
 
    :p character(len=*) name: name of the variable
    :p character(len=*) str:  new value to assign to the variable
-   :to: :cpp:func:`lammps_set_variable`
+   :to: :cpp:func:`lammps_set_string_variable`
+
+--------
+
+.. f:subroutine:: set_internal_variable(name, val)
+
+   Set the value of a internal-style variable.
+
+   .. versionadded:: TBD
+
+   This function assigns a new value from the floating-point number *val* to
+   the internal-style variable *name*\ . If *name* does not exist or is not
+   an internal-style variable, an error is generated.
+
+   :p character(len=*) name: name of the variable
+   :p read(c_double) val:  new value to assign to the variable
+   :to: :cpp:func:`lammps_set_internal_variable`
 
 --------
 
diff --git a/doc/src/Howto_body.rst b/doc/src/Howto_body.rst
index 115b7797c8..968e10edd8 100644
--- a/doc/src/Howto_body.rst
+++ b/doc/src/Howto_body.rst
@@ -335,7 +335,7 @@ faces are listed, so that M = 6 + 3\*N + 1.
 The integer line has three values: number of vertices (N), number of
 edges (E) and number of faces (F). The floating point line(s) list 6
 moments of inertia followed by the coordinates of the N vertices (x1
-to zN) as 3N values, followed by 2N vertex indices corresponding to
+to zN) as 3N values, followed by 2E vertex indices corresponding to
 the end points of the E edges, then 4\*F vertex indices defining F
 faces.  The last value is the diameter value = the rounded diameter of
 the sphere that surrounds each vertex. The diameter value can be
diff --git a/doc/src/Howto_github.rst b/doc/src/Howto_github.rst
index cbe1264d52..b81716c09d 100644
--- a/doc/src/Howto_github.rst
+++ b/doc/src/Howto_github.rst
@@ -480,11 +480,11 @@ Some recent changes to the workflow are not captured in this tutorial.
 For example, in addition to the *develop* branch, to which all new
 features should be submitted, there is also a *release*, a *stable*, and
 a *maintenance* branch; the *release* branch is updated from the
-*develop* as part of a feature release, and *stable* (together with
-*release*) are updated from *develop* when a stable release is made. In
-between stable releases, selected bug fixes and infrastructure updates
-are back-ported from the *develop* branch to the *maintenance* branch
-and occasionally merged to *stable* as an update release.
+*develop* branch as part of a "feature release", and *stable* (together
+with *release*) are updated from *develop* when a "stable release" is
+made. In between stable releases, selected bug fixes and infrastructure
+updates are back-ported from the *develop* branch to the *maintenance*
+branch and occasionally merged to *stable* as an update release.
 
 Furthermore, the naming of the release tags now follow the pattern
 "patch_<Day><Month><Year>" to simplify comparisons between releases.
diff --git a/doc/src/Howto_tip4p.rst b/doc/src/Howto_tip4p.rst
index 4d9b514e0d..bc6e91b6b0 100644
--- a/doc/src/Howto_tip4p.rst
+++ b/doc/src/Howto_tip4p.rst
@@ -193,11 +193,14 @@ file changed):
     write_data tip4p-implicit.data nocoeff
 
 Below is the code for a LAMMPS input file using the explicit method and
-a TIP4P molecule file.  Because of using :doc:`fix rigid/nvt/small
+a TIP4P molecule file.  Because of using :doc:`fix rigid/small
 <fix_rigid>` no bonds need to be defined and thus no extra storage needs
-to be reserved for them, but we need to switch to atom style full or use
-:doc:`fix property/atom mol <fix_property_atom>` so that fix
-rigid/nvt/small can identify rigid bodies by their molecule ID:
+to be reserved for them, but we need to either switch to atom style full
+or use :doc:`fix property/atom mol <fix_property_atom>` so that fix
+rigid/small can identify rigid bodies by their molecule ID.  Also a
+:doc:`neigh_modify exclude <neigh_modify>` command is added to exclude
+computing intramolecular non-bonded interactions, since those are
+removed by the rigid fix anyway:
 
 .. code-block:: LAMMPS
 
@@ -216,17 +219,17 @@ rigid/nvt/small can identify rigid bodies by their molecule ID:
     pair_coeff 2 2 0.0    1.0
     pair_coeff 3 3 0.0    1.0
 
-    fix mol all property/atom mol
+    fix mol all property/atom mol ghost yes
     molecule water tip4p.mol
     create_atoms 0 random 33 34564 NULL mol water 25367 overlap 1.33
+    neigh_modify exclude molecule/intra all
 
     timestep 0.5
-    fix integrate all rigid/nvt/small molecule temp 300.0 300.0 100.0
-    velocity all create 300.0 5463576
+    fix integrate all rigid/small molecule langevin 300.0 300.0 100.0 2345634
 
     thermo_style custom step temp press etotal density pe ke
-    thermo 1000
-    run 20000
+    thermo 2000
+    run 40000
     write_data tip4p-explicit.data nocoeff
 
 .. _tip4p_molecule:
diff --git a/doc/src/Howto_tip5p.rst b/doc/src/Howto_tip5p.rst
index 10674a04b6..4bb9754875 100644
--- a/doc/src/Howto_tip5p.rst
+++ b/doc/src/Howto_tip5p.rst
@@ -81,11 +81,13 @@ long-range Coulombic solver (e.g. Ewald or PPPM in LAMMPS).
 
 Below is the code for a LAMMPS input file for setting up a simulation of
 TIP5P water with a molecule file.  Because of using :doc:`fix
-rigid/nvt/small <fix_rigid>` no bonds need to be defined and thus no
-extra storage needs to be reserved for them, but we need to switch to
+rigid/small <fix_rigid>` no bonds need to be defined and thus no extra
+storage needs to be reserved for them, but we need to either switch to
 atom style full or use :doc:`fix property/atom mol <fix_property_atom>`
-so that fix rigid/nvt/small can identify rigid bodies by their molecule
-ID:
+so that fix rigid/small can identify rigid bodies by their molecule ID.
+Also a :doc:`neigh_modify exclude <neigh_modify>` command is added to
+exclude computing intramolecular non-bonded interactions, since those
+are removed by the rigid fix anyway:
 
 .. code-block:: LAMMPS
 
@@ -107,11 +109,11 @@ ID:
     fix mol all property/atom mol
     molecule water tip5p.mol
     create_atoms 0 random 33 34564 NULL mol water 25367 overlap 1.33
+    neigh_modify exclude molecule/intra all
 
     timestep 0.5
-    fix integrate all rigid/nvt/small molecule temp 300.0 300.0 100.0
+    fix integrate all rigid/small molecule langevin 300.0 300.0 50.0 235664
     reset_timestep 0
-    velocity all create 300.0 5463576
 
     thermo_style custom step temp press etotal density pe ke
     thermo 1000
diff --git a/doc/src/Install_git.rst b/doc/src/Install_git.rst
index b6d3ced0a5..45e364a226 100644
--- a/doc/src/Install_git.rst
+++ b/doc/src/Install_git.rst
@@ -28,16 +28,16 @@ provides `limited support for subversion clients <svn_>`_.
 
 You can follow the LAMMPS development on 4 different git branches:
 
-* **release**  :  this branch is updated with every patch or feature release;
-  updates are always "fast-forward" merges from *develop*
-* **develop**  :  this branch follows the ongoing development and
-  is updated with every merge commit of a pull request
-* **stable**   :  this branch is updated from the *release* branch with
-  every stable release version and also has selected bug fixes with every
-  update release when the *maintenance* branch is merged into it
-* **maintenance**  :  this branch collects back-ported bug fixes from the
-  *develop* branch to the *stable* branch. It is used to update *stable*
-  for update releases and it synchronized with *stable* at each stable release.
+* **develop** : this branch follows the ongoing development and is
+  updated with every merge commit of a pull request
+* **release** : this branch is updated with every "feature release";
+   updates are always "fast-forward" merges from *develop*
+* **maintenance** : this branch collects back-ported bug fixes from the
+  *develop* branch to the *stable* branch.  It is used to update the
+  *stable* branch for "stable update releases".
+* **stable** : this branch is updated from the *release* branch with
+  every "stable release" version and also has selected bug fixes with
+  every "update release" when the *maintenance* branch is merged into it
 
 To access the git repositories on your box, use the clone command to
 create a local copy of the LAMMPS repository with a command like:
diff --git a/doc/src/JPG/lammps-classes.png b/doc/src/JPG/lammps-classes.png
index 71b5af9ccc..d7ed506ab3 100644
Binary files a/doc/src/JPG/lammps-classes.png and b/doc/src/JPG/lammps-classes.png differ
diff --git a/doc/src/JPG/lammps-invoke-python.png b/doc/src/JPG/lammps-invoke-python.png
index d1e25f52ea..890963a577 100644
Binary files a/doc/src/JPG/lammps-invoke-python.png and b/doc/src/JPG/lammps-invoke-python.png differ
diff --git a/doc/src/JPG/lammps-releases.png b/doc/src/JPG/lammps-releases.png
new file mode 100644
index 0000000000..d5c317088f
Binary files /dev/null and b/doc/src/JPG/lammps-releases.png differ
diff --git a/doc/src/JPG/pylammps-invoke-lammps.png b/doc/src/JPG/pylammps-invoke-lammps.png
index 39296ee272..f5d4dd66a0 100644
Binary files a/doc/src/JPG/pylammps-invoke-lammps.png and b/doc/src/JPG/pylammps-invoke-lammps.png differ
diff --git a/doc/src/JPG/python-invoke-lammps.png b/doc/src/JPG/python-invoke-lammps.png
index 0c456028db..6e44b8d56e 100644
Binary files a/doc/src/JPG/python-invoke-lammps.png and b/doc/src/JPG/python-invoke-lammps.png differ
diff --git a/doc/src/Library_objects.rst b/doc/src/Library_objects.rst
index db21817cfd..7c0ca824d7 100644
--- a/doc/src/Library_objects.rst
+++ b/doc/src/Library_objects.rst
@@ -9,6 +9,8 @@ fixes, or variables in LAMMPS using the following functions:
 - :cpp:func:`lammps_extract_variable_datatype`
 - :cpp:func:`lammps_extract_variable`
 - :cpp:func:`lammps_set_variable`
+- :cpp:func:`lammps_set_string_variable`
+- :cpp:func:`lammps_set_internal_variable`
 - :cpp:func:`lammps_variable_info`
 
 -----------------------
@@ -38,6 +40,16 @@ fixes, or variables in LAMMPS using the following functions:
 
 -----------------------
 
+.. doxygenfunction:: lammps_set_string_variable
+   :project: progguide
+
+-----------------------
+
+.. doxygenfunction:: lammps_set_internal_variable
+   :project: progguide
+
+-----------------------
+
 .. doxygenfunction:: lammps_variable_info
    :project: progguide
 
diff --git a/doc/src/Manual_version.rst b/doc/src/Manual_version.rst
index 8fb28fef84..1bfaffaf6d 100644
--- a/doc/src/Manual_version.rst
+++ b/doc/src/Manual_version.rst
@@ -3,45 +3,25 @@ What does a LAMMPS version mean
 
 The LAMMPS "version" is the date when it was released, such as 1 May
 2014.  LAMMPS is updated continuously, and we aim to keep it working
-correctly and reliably at all times.  You can follow its development
-in a public `git repository on GitHub <https://github.com/lammps/lammps>`_.
-
-Modifications of the LAMMPS source code (like bug fixes, code refactors,
-updates to existing features, or addition of new features) are organized
-into pull requests.  Pull requests will be merged into the *develop*
-branch of the git repository after they pass automated testing and code
-review by the LAMMPS developers.  When a sufficient number of changes
-have accumulated *and* the *develop* branch version passes an extended
-set of automated tests, we release it as a *feature release*, which are
-currently made every 4 to 8 weeks.  The *release* branch of the git
-repository is updated with every such release.  A summary of the most
-important changes of the patch releases are on `this website page
-<https://www.lammps.org/bug.html>`_.  More detailed release notes are
-`available on GitHub <https://github.com/lammps/lammps/releases/>`_.
-
-Once or twice a year, we have a "stabilization period" where we apply
-only bug fixes and small, non-intrusive changes to the *develop*
-branch.  At the same time, the code is subjected to more detailed and
-thorough manual testing than the default automated testing.  Also,
-several variants of static code analysis are run to improve the overall
-code quality, consistency, and compliance with programming standards,
-best practices and style conventions.
-
-The release after such a stabilization period is called a *stable*
-version and both, the *release* and the *stable* branches are updated
-with it.  Between stable releases, we collect back-ported bug fixes and
-updates from the *develop* branch in the *maintenance* branch.  From the
-*maintenance* branch we make occasional update releases and update the
-*stable* branch accordingly.
+correctly and reliably at all times.  Also, several variants of static
+code analysis are run regularly to maintain or improve the overall code
+quality, consistency, and compliance with programming standards, best
+practices and style conventions.  You can follow its development in a
+public `git repository on GitHub <https://github.com/lammps/lammps>`_.
 
 Each version of LAMMPS contains all the documented *features* up to and
 including its version date.  For recently added features, we add markers
 to the documentation at which specific LAMMPS version a feature or
 keyword was added or significantly changed.
 
-The version date is printed to the screen and log file every time you run
-LAMMPS.  It is also in the file src/version.h and in the LAMMPS
-directory name created when you unpack a tarball.  And it is on the
+Identifying the Version
+^^^^^^^^^^^^^^^^^^^^^^^
+
+The version date is printed to the screen and log file every time you
+run LAMMPS.  There also is an indication, if a LAMMPS binary was
+compiled from version with modifications **after** a release.
+It is also visible in the file src/version.h and in the LAMMPS directory
+name created when you unpack a downloaded tarball.  And it is on the
 first page of the :doc:`manual <Manual>`.
 
 * If you browse the HTML pages of the online version of the LAMMPS
@@ -53,3 +33,56 @@ first page of the :doc:`manual <Manual>`.
 * If you browse the HTML pages included in your downloaded tarball, they
   describe the version you have, which may be older than the online
   version.
+
+LAMMPS releases, branches, and tags
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. figure:: JPG/lammps-releases.png
+   :figclass: align-center
+
+   Relations between releases, main branches, and tags in the LAMMPS git repository
+
+Development
+"""""""""""
+
+Modifications of the LAMMPS source code (like bug fixes, code
+refactoring, updates to existing features, or addition of new features)
+are organized into pull requests.  Pull requests will be merged into the
+*develop* branch of the git repository after they pass automated testing
+and code review by the LAMMPS developers.
+
+Feature Releases
+""""""""""""""""
+
+When a sufficient number of new features and updates have accumulated
+*and* the LAMMPS version on the *develop* branch passes an extended set
+of automated tests, we release it as a *feature release*, which are
+currently made every 4 to 8 weeks.  The *release* branch of the git
+repository is updated with every such *feature release* and a tag in the
+format ``patch_1May2014`` is added.  A summary of the most important
+changes of these releases for the current year are posted on `this
+website page <https://www.lammps.org/bug.html>`_.  More detailed release
+notes are `available on GitHub
+<https://github.com/lammps/lammps/releases/>`_.
+
+Stable Releases
+"""""""""""""""
+
+About once a year, we release a *stable release* version of LAMMPS.
+This is done after a "stabilization period" where we apply only bug
+fixes and small, non-intrusive changes to the *develop* branch but no
+new features.  At the same time, the code is subjected to more detailed
+and thorough manual testing than the default automated testing.
+After such a *stable release*, both the *release* and the *stable*
+branches are updated and two tags are applied, a ``patch_1May2014`` format
+and a ``stable_1May2014`` format tag.
+
+Stable Release Updates
+""""""""""""""""""""""
+
+Between *stable releases*, we collect bug fixes and updates back-ported
+from the *develop* branch in a branch called *maintenance*.  From the
+*maintenance* branch we make occasional *stable update releases* and
+update the *stable* branch accordingly.  The first update to the
+``stable_1May2014`` release would be tagged as
+``stable_1May2014_update1``.  These updates contain no new features.
diff --git a/doc/src/Packages_details.rst b/doc/src/Packages_details.rst
index 12aa8eeb52..a3d65d9d65 100644
--- a/doc/src/Packages_details.rst
+++ b/doc/src/Packages_details.rst
@@ -2226,7 +2226,7 @@ and third order tensor from finite differences.
 
 **Install:**
 
-The PHONON package requires that also the :ref:`KSPACE <PKG-KSPACE>`
+The fix phonon command also requires that the :ref:`KSPACE <PKG-KSPACE>`
 package is installed.
 
 
diff --git a/doc/src/Python_error.rst b/doc/src/Python_error.rst
index 6aec8df391..bdf69df5b4 100644
--- a/doc/src/Python_error.rst
+++ b/doc/src/Python_error.rst
@@ -1,11 +1,11 @@
 Handling LAMMPS errors
 **********************
 
-The shared library is compiled with :ref:`C++ exception support
-<exceptions>` to provide a better error handling experience.  C++
-exceptions allow capturing errors on the C++ side and rethrowing them on
-the Python side.  This way LAMMPS errors can be handled through the
-Python exception handling mechanism.
+LAMMPS and the LAMMPS library are compiled with :ref:`C++ exception support
+<exceptions>` to provide a better error handling experience.  LAMMPS errors
+trigger throwing a C++ exception. These exceptions allow capturing errors on
+the C++ side and rethrowing them on the Python side.  This way LAMMPS errors
+can be handled through the Python exception handling mechanism.
 
 .. code-block:: python
 
diff --git a/doc/src/Run_output.rst b/doc/src/Run_output.rst
index b1458f056d..2025bf5321 100644
--- a/doc/src/Run_output.rst
+++ b/doc/src/Run_output.rst
@@ -49,14 +49,17 @@ simulation.  An example set of statistics is shown here:
 ----------
 
 The first section provides a global loop timing summary. The *loop time*
-is the total wall-clock time for the simulation to run.  The
-*Performance* line is provided for convenience to help predict how long
-it will take to run a desired physical simulation and to have numbers
-useful for performance comparison between different simulation settings
-or system sizes.  The *CPU use* line provides the CPU utilization per
-MPI task; it should be close to 100% times the number of OpenMP threads
-(or 1 of not using OpenMP).  Lower numbers correspond to delays due to
-file I/O or insufficient thread utilization.
+is the total wall-clock time for the MD steps of the simulation run,
+excluding the time for initialization and setup (i.e. the parts that may
+be skipped with :doc:`run N pre no <run>`).  The *Performance* line is
+provided for convenience to help predict how long it will take to run a
+desired physical simulation and to have numbers useful for performance
+comparison between different simulation settings or system sizes.  The
+*CPU use* line provides the CPU utilization per MPI task; it should be
+close to 100% times the number of OpenMP threads (or 1 if not using
+OpenMP).  Lower numbers correspond to delays due to file I/O or
+insufficient thread utilization from parts of the code that have not
+been multi-threaded.
 
 ----------
 
diff --git a/doc/src/angle_charmm.rst b/doc/src/angle_charmm.rst
index 425ed7e4f1..655b860a28 100644
--- a/doc/src/angle_charmm.rst
+++ b/doc/src/angle_charmm.rst
@@ -70,7 +70,9 @@ for more info.
 Related commands
 """"""""""""""""
 
-:doc:`angle_coeff <angle_coeff>`
+:doc:`angle_coeff <angle_coeff>`, :doc:`pair_style lj/charmm variants <pair_charmm>`,
+:doc:`dihedral_style charmm <dihedral_charmm>`,
+:doc:`dihedral_style charmmfsw <dihedral_charmm>`, :doc:`fix cmap <fix_cmap>`
 
 Default
 """""""
diff --git a/doc/src/angle_lepton.rst b/doc/src/angle_lepton.rst
index 20fa5b1fee..22873f5765 100644
--- a/doc/src/angle_lepton.rst
+++ b/doc/src/angle_lepton.rst
@@ -11,7 +11,16 @@ Syntax
 
 .. code-block:: LAMMPS
 
-   angle_style lepton
+   angle_style style args
+
+* style = *lepton*
+* args = optional arguments
+
+.. parsed-literal::
+
+   args = *auto_offset* or *no_offset*
+     *auto_offset* = offset the potential energy so that the value at theta0 is 0.0 (default)
+     *no_offset* = do not offset the potential energy
 
 Examples
 """"""""
@@ -19,6 +28,7 @@ Examples
 .. code-block:: LAMMPS
 
    angle_style lepton
+   angle_style lepton no_offset
 
    angle_coeff  1  120.0  "k*theta^2; k=250.0"
    angle_coeff  2   90.0  "k2*theta^2 + k3*theta^3 + k4*theta^4; k2=300.0; k3=-100.0; k4=50.0"
@@ -41,6 +51,13 @@ angle coefficient.  For example `"200.0*theta^2"` represents a
 
    U_{angle,i} = K (\theta_i - \theta_0)^2 = K \theta^2 \qquad \theta = \theta_i - \theta_0
 
+.. versionchanged:: TBD
+
+By default the potential energy U is shifted so that the value U is 0.0
+for $theta = theta_0$.  This is equivalent to using the optional keyword
+*auto_offset*.  When using the keyword *no_offset* instead, the
+potential energy is not shifted.
+
 The `Lepton library <https://simtk.org/projects/lepton>`_, that the
 *lepton* angle style interfaces with, evaluates this expression string
 at run time to compute the pairwise energy.  It also creates an
diff --git a/doc/src/atom_style.rst b/doc/src/atom_style.rst
index b5ee0f07ff..60a85e0bcb 100644
--- a/doc/src/atom_style.rst
+++ b/doc/src/atom_style.rst
@@ -49,248 +49,221 @@ Examples
 Description
 """""""""""
 
-Define what style of atoms to use in a simulation.  This determines
-what attributes are associated with the atoms.  This command must be
-used before a simulation is setup via a :doc:`read_data <read_data>`,
-:doc:`read_restart <read_restart>`, or :doc:`create_box <create_box>`
-command.
+The *atom_style* command selects which per-atom attributes are
+associated with atoms in a LAMMPS simulation and thus stored and
+communicated with those atoms as well as read from and stored in data
+and restart files.  Different models (e.g. :doc:`pair styles
+<pair_style>`) require access to specific per-atom attributes and thus
+require a specific atom style.  For example, to compute Coulomb
+interactions, the atom must have a "charge" (aka "q") attribute.
+
+A number of distinct atom styles exist that combine attributes.  Some
+atom styles are a superset of other atom styles.  Further attributes
+may be added to atoms either via using a hybrid style which provides a
+union of the attributes of the sub-styles, or via the :doc:`fix
+property/atom <fix_property_atom>` command.  The *atom_style* command
+must be used before a simulation is setup via a :doc:`read_data
+<read_data>`, :doc:`read_restart <read_restart>`, or :doc:`create_box
+<create_box>` command.
 
 .. note::
 
-   Many of the atom styles discussed here are only enabled if
-   LAMMPS was built with a specific package, as listed below in the
-   Restrictions section.
+   Many of the atom styles discussed here are only enabled if LAMMPS was
+   built with a specific package, as listed below in the Restrictions
+   section.
 
-Once a style is assigned, it cannot be changed, so use a style general
-enough to encompass all attributes.  E.g. with style *bond*, angular
-terms cannot be used or added later to the model.  It is OK to use a
-style more general than needed, though it may be slightly inefficient.
+Once a style is selected and the simulation box defined, it cannot be
+changed but only augmented with the :doc:`fix property/atom
+<fix_property_atom>` command.  So one should select an atom style
+general enough to encompass all attributes required.  E.g. with atom
+style *bond*, it is not possible to define angles and use angle styles.
 
-The choice of style affects what quantities are stored by each atom,
-what quantities are communicated between processors to enable forces
-to be computed, and what quantities are listed in the data file read
-by the :doc:`read_data <read_data>` command.
+It is OK to use a style more general than needed, though it may be
+slightly inefficient because it will allocate and communicate
+additional unused data.
 
-These are the additional attributes of each style and the typical
-kinds of physical systems they are used to model.  All styles store
-coordinates, velocities, atom IDs and types.  See the
+Atom style attributes
+"""""""""""""""""""""
+
+The atom style *atomic* has the minimum subset of per-atom attributes
+and is also the default setting.  It encompasses the following per-atom
+attributes (name of the vector or array in the :doc:`Atom class
+<Classes_atom>` is given in parenthesis): atom-ID (tag), type (type),
+position (x), velocities (v), forces (f), image flags (image), group
+membership (mask).  Since all atom styles are a superset of atom style
+*atomic*\ , they all include these attributes.
+
+This table lists all the available atom styles, which attributes they
+provide, which :doc:`package <Packages>` is required to use them, and
+what the typical applications are that use them.  See the
 :doc:`read_data <read_data>`, :doc:`create_atoms <create_atoms>`, and
-:doc:`set <set>` commands for info on how to set these various
-quantities.
+:doc:`set <set>` commands for details on how to set these various
+quantities.  More information about many of the styles is provided in
+the Additional Information section below.
 
-+--------------+-----------------------------------------------------+--------------------------------------+
-| *amoeba*     | molecular + charge + 1/5 neighbors                  | AMOEBA/HIPPO polarized force fields  |
-+--------------+-----------------------------------------------------+--------------------------------------+
-| *angle*      | bonds and angles                                    | bead-spring polymers with stiffness  |
-+--------------+-----------------------------------------------------+--------------------------------------+
-| *atomic*     | only the default values                             | coarse-grain liquids, solids, metals |
-+--------------+-----------------------------------------------------+--------------------------------------+
-| *body*       | mass, inertia moments, quaternion, angular momentum | arbitrary bodies                     |
-+--------------+-----------------------------------------------------+--------------------------------------+
-| *bond*       | bonds                                               | bead-spring polymers                 |
-+--------------+-----------------------------------------------------+--------------------------------------+
-| *charge*     | charge                                              | atomic system with charges           |
-+--------------+-----------------------------------------------------+--------------------------------------+
-| *dielectric* | normx normy normz area/patch ed em epsilon curv     | system with surface polarization     |
-+--------------+-----------------------------------------------------+--------------------------------------+
-| *dipole*     | charge and dipole moment                            | system with dipolar particles        |
-+--------------+-----------------------------------------------------+--------------------------------------+
-| *dpd*        | internal temperature and internal energies          | DPD particles                        |
-+--------------+-----------------------------------------------------+--------------------------------------+
-| *edpd*       | temperature and heat capacity                       | eDPD particles                       |
-+--------------+-----------------------------------------------------+--------------------------------------+
-| *electron*   | charge and spin and eradius                         | electronic force field               |
-+--------------+-----------------------------------------------------+--------------------------------------+
-| *ellipsoid*  | shape, quaternion, angular momentum                 | aspherical particles                 |
-+--------------+-----------------------------------------------------+--------------------------------------+
-| *full*       | molecular + charge                                  | bio-molecules                        |
-+--------------+-----------------------------------------------------+--------------------------------------+
-| *line*       | end points, angular velocity                        | rigid bodies                         |
-+--------------+-----------------------------------------------------+--------------------------------------+
-| *mdpd*       | density                                             | mDPD particles                       |
-+--------------+-----------------------------------------------------+--------------------------------------+
-| *molecular*  | bonds, angles, dihedrals, impropers                 | uncharged molecules                  |
-+--------------+-----------------------------------------------------+--------------------------------------+
-| *oxdna*      | nucleotide polarity                                 | coarse-grained DNA and RNA models    |
-+--------------+-----------------------------------------------------+--------------------------------------+
-| *peri*       | mass, volume                                        | mesoscopic Peridynamic models        |
-+--------------+-----------------------------------------------------+--------------------------------------+
-| *smd*        | volume, kernel diameter, contact radius, mass       | solid and fluid SPH particles        |
-+--------------+-----------------------------------------------------+--------------------------------------+
-| *sph*        | rho, esph, cv                                       | SPH particles                        |
-+--------------+-----------------------------------------------------+--------------------------------------+
-| *sphere*     | diameter, mass, angular velocity                    | granular models                      |
-+--------------+-----------------------------------------------------+--------------------------------------+
-| *bpm/sphere* | diameter, mass, angular velocity, quaternion        | granular bonded particle models (BPM)|
-+--------------+-----------------------------------------------------+--------------------------------------+
-| *spin*       | magnetic moment                                     | system with magnetic particles       |
-+--------------+-----------------------------------------------------+--------------------------------------+
-| *tdpd*       | chemical concentration                              | tDPD particles                       |
-+--------------+-----------------------------------------------------+--------------------------------------+
-| *template*   | template index, template atom                       | small molecules with fixed topology  |
-+--------------+-----------------------------------------------------+--------------------------------------+
-| *tri*        | corner points, angular momentum                     | rigid bodies                         |
-+--------------+-----------------------------------------------------+--------------------------------------+
-| *wavepacket* | charge, spin, eradius, etag, cs_re, cs_im           | AWPMD                                |
-+--------------+-----------------------------------------------------+--------------------------------------+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Atom style
+     - Attributes
+     - Required package
+     - Applications
+   * - *amoeba*
+     - *full* + "1-5 special neighbor data"
+     - :ref:`AMOEBA <PKG-AMOEBA>`
+     - AMOEBA/HIPPO force fields
+   * - *angle*
+     - *bond* + "angle data"
+     - :ref:`MOLECULE <PKG-MOLECULE>`
+     - bead-spring polymers with stiffness
+   * - *atomic*
+     - tag, type, x, v, f, image, mask
+     -
+     - atomic liquids, solids, metals
+   * - *body*
+     - *atomic* + radius, rmass, angmom, torque, body
+     - :ref:`BODY <PKG-BODY>`
+     - arbitrary bodies, see :doc:`body howto <Howto_body>`
+   * - *bond*
+     - *atomic* + molecule, nspecial, special + "bond data"
+     - :ref:`MOLECULE <PKG-MOLECULE>`
+     - bead-spring polymers
+   * - *bpm/sphere*
+     - *bond* + radius, rmass, omega, torque, quat
+     - :ref:`BPM <PKG-BPM>`
+     - granular bonded particle models, see :doc:`BPM howto <Howto_bpm>`
+   * - *charge*
+     - *atomic* + q
+     -
+     - atomic systems with charges
+   * - *dielectric*
+     - *full* + mu, area, ed, em, epsilon, curvature, q_scaled
+     - :ref:`DIELECTRIC <PKG-DIELECTRIC>`
+     - systems with surface polarization
+   * - *dipole*
+     - *charge* + mu
+     - :ref:`DIPOLE <PKG-DIPOLE>`
+     - atomic systems with charges and point dipoles
+   * - *dpd*
+     - *atomic* + rho + "reactive DPD data"
+     - :ref:`DPD-REACT <PKG-DPD-REACT>`
+     - reactive DPD
+   * - *edpd*
+     - *atomic* + "eDPD data"
+     - :ref:`DPD-MESO <PKG-DPD-MESO>`
+     - Energy conservative DPD (eDPD)
+   * - *electron*
+     - *charge* + espin, eradius, ervel, erforce
+     - :ref:`EFF <PKG-EFF>`
+     - Electron force field systems
+   * - *ellipsoid*
+     - *atomic* + rmass, angmom, torque, ellipsoid
+     -
+     - aspherical particles
+   * - *full*
+     - *molecular* + q
+     - :ref:`MOLECULE <PKG-MOLECULE>`
+     - molecular force fields
+   * - *line*
+     - *atomic* + molecule, radius, rmass, omega, torque, line
+     -
+     - 2-d rigid body particles
+   * - *mdpd*
+     - *atomic* + rho, drho, vest
+     - :ref:`DPD-MESO <PKG-DPD-MESO>`
+     - Many-body DPD (mDPD)
+   * - *molecular*
+     - *angle* + "dihedral and improper data"
+     - :ref:`MOLECULE <PKG-MOLECULE>`
+     - apolar and uncharged molecules
+   * - *oxdna*
+     - *atomic* + id5p
+     - :ref:`CG-DNA <PKG-CG-DNA>`
+     - coarse-grained DNA and RNA models
+   * - *peri*
+     - *atomic* + rmass, vfrac, s0, x0
+     - :ref:`PERI <PKG-PERI>`
+     - mesoscopic Peridynamics models
+   * - *smd*
+     - *atomic* + molecule, radius, rmass + "smd data"
+     - :ref:`MACHDYN <PKG-MACHDYN>`
+     - Smooth Mach Dynamics models
+   * - *sph*
+     - *atomic* + "sph data"
+     - :ref:`SPH <PKG-SPH>`
+     - Smoothed particle hydrodynamics models
+   * - *sphere*
+     - *atomic* + radius, rmass, omega, torque
+     -
+     - finite size spherical particles, e.g. granular models
+   * - *spin*
+     - *atomic* + "magnetic moment data"
+     - :ref:`SPIN <PKG-SPIN>`
+     - magnetic particles
+   * - *tdpd*
+     - *atomic* + cc, cc_flux, vest
+     - :ref:`DPD-MESO <PKG-DPD-MESO>`
+     - Transport DPD (tDPD)
+   * - *template*
+     - *atomic* + molecule, molindex, molatom
+     - :ref:`MOLECULE <PKG-MOLECULE>`
+     - molecular systems where attributes are taken from :doc:`molecule files <molecule>`
+   * - *tri*
+     - *sphere* + molecule, angmom, tri
+     -
+     - 3-d triangulated rigid body LJ particles
+   * - *wavepacket*
+     - *charge* + "wavepacket data"
+     - :ref:`AWPMD <PKG-AWPMD>`
+     - Antisymmetrized wave packet MD
 
 .. note::
 
-   It is possible to add some attributes, such as a molecule ID, to
-   atom styles that do not have them via the :doc:`fix property/atom
-   <fix_property_atom>` command.  This command also allows new custom
-   attributes consisting of extra integer or floating-point values to
-   be added to atoms.  See the :doc:`fix property/atom
-   <fix_property_atom>` page for examples of cases where this is
-   useful and details on how to initialize, access, and output the
-   custom values.
+   It is possible to add some attributes, such as a molecule ID and
+   charge, to atom styles that do not have them built in using the
+   :doc:`fix property/atom <fix_property_atom>` command.  This command
+   also allows new custom-named attributes consisting of extra integer
+   or floating-point values or vectors to be added to atoms.  See the
+   :doc:`fix property/atom <fix_property_atom>` page for examples of
+   cases where this is useful and details on how to initialize,
+   access, and output these custom values.
 
-All of the above styles define point particles, except the *sphere*,
-*bpm/sphere*, *ellipsoid*, *electron*, *peri*, *wavepacket*, *line*,
-*tri*, and *body* styles, which define finite-size particles.  See the
-:doc:`Howto spherical <Howto_spherical>` page for an overview of using
-finite-size particle models with LAMMPS.
+----------
 
-All of the point-particle styles assign mass to particles on a
-per-type basis, using the :doc:`mass <mass>` command, The finite-size
-particle styles assign mass to individual particles on a per-particle
-basis.
+Particle size and mass
+""""""""""""""""""""""
 
-For the *sphere* and *bpm/sphere* styles, the particles are spheres
-and each stores a per-particle diameter and mass.  If the diameter >
-0.0, the particle is a finite-size sphere.  If the diameter = 0.0, it
-is a point particle.  Note that by use of the *disc* keyword with the
-:doc:`fix nve/sphere <fix_nve_sphere>`, :doc:`fix nvt/sphere
-<fix_nvt_sphere>`, :doc:`fix nph/sphere <fix_nph_sphere>`,
-:doc:`fix npt/sphere <fix_npt_sphere>` commands for the *sphere* style,
-spheres can be effectively treated as 2d discs for a 2d simulation if
-desired.  See also the :doc:`set density/disc <set>` command.  These
-styles take an optional 0 or 1 argument.  A value of 0 means the
-radius of each sphere is constant for the duration of the simulation.
-A value of 1 means the radii may vary dynamically during the simulation,
-e.g. due to use of the :doc:`fix adapt <fix_adapt>` command.
+All of the atom styles define point particles unless they (1) define
+finite-size spherical particles via the *radius* attribute, or (2)
+define finite-size aspherical particles (e.g. the *body*, *ellipsoid*,
+*line*, and *tri* styles).  Most of these styles can also be used with
+mixtures of point and finite-size particles.
 
-For the *ellipsoid* style, the particles are ellipsoids and each
-stores a flag which indicates whether it is a finite-size ellipsoid or
-a point particle.  If it is an ellipsoid, it also stores a shape
-vector with the 3 diameters of the ellipsoid and a quaternion 4-vector
-with its orientation.
+Note that the *radius* property may need to be provided as a
+*diameter* (e.g. in :doc:`molecule files <molecule>` or :doc:`data
+files <read_data>`).  See the :doc:`Howto spherical <Howto_spherical>`
+page for an overview of using finite-size spherical and aspherical
+particle models with LAMMPS.
 
-For the *dielectric* style, each particle can be either a physical
-particle (e.g. an ion), or an interface particle representing a boundary
-element between two regions of different dielectric constant. For
-interface particles, in addition to the properties associated with
-atom_style full, each particle also should be assigned a normal unit
-vector (defined by normx, normy, normz), an area (area/patch), the
-difference and mean of the dielectric constants of two sides of the
-interface along the direction of the normal vector (ed and em), the
-local dielectric constant at the boundary element (epsilon), and a mean
-local curvature (curv).  Physical particles must be assigned these
-values, as well, but only their local dielectric constants will be used;
-see documentation for associated :doc:`pair styles <pair_dielectric>`
-and :doc:`fixes <fix_polarize>`.  The distinction between the physical
-and interface particles is only meaningful when :doc:`fix polarize
-<fix_polarize>` commands are applied to the interface particles. This
-style is part of the DIELECTRIC package.
+Unless an atom style defines the per-atom *rmass* attribute, particle
+masses are defined on a per-type basis, using the :doc:`mass <mass>`
+command.  This means each particle's mass is indexed by its atom
+*type*.
 
-For the *dipole* style, a point dipole is defined for each point
-particle.  Note that if you wish the particles to be finite-size
-spheres as in a Stockmayer potential for a dipolar fluid, so that the
-particles can rotate due to dipole-dipole interactions, then you need
-to use atom_style hybrid sphere dipole, which will assign both a
-diameter and dipole moment to each particle.
+A few styles define the per-atom *rmass* attribute which can also be
+added using the :doc:`fix property/atom <fix_property_atom>` command.
+In this case each particle stores its own mass.  Atom styles that have
+a per-atom rmass may define it indirectly through setting particle
+diameter and density on a per-particle basis.  If both per-type mass
+and per-atom *rmass* are defined (e.g. in a hybrid style), the
+per-atom mass will take precedence in any operation which which works
+with both flavors of mass.
 
-For the *electron* style, the particles representing electrons are 3d
-Gaussians with a specified position and bandwidth or uncertainty in
-position, which is represented by the eradius = electron size.
+----------
 
-For the *peri* style, the particles are spherical and each stores a
-per-particle mass and volume.
-
-The *bpm/sphere* style is part of the BPM package.
-
-The *oxdna* style is for coarse-grained nucleotides and stores the
-3'-to-5' polarity of the nucleotide strand, which is set through
-the bond topology in the data file. The first (second) atom in a
-bond definition is understood to point towards the 3'-end (5'-end)
-of the strand. Note that this style is part of the CG-DNA package.
-
-The *dpd* style is for dissipative particle dynamics (DPD) particles.
-Note that it is part of the DPD-REACT package, and is not for use with
-the :doc:`pair_style dpd or dpd/stat <pair_dpd>` commands, which can
-simply use atom_style atomic.  Atom_style dpd extends DPD particle
-properties with internal temperature (dpdTheta), internal conductive
-energy (uCond), internal mechanical energy (uMech), and internal
-chemical energy (uChem).
-
-The *edpd* style is for energy-conserving dissipative particle
-dynamics (eDPD) particles which store a temperature (edpd_temp), and
-heat capacity(edpd_cv).
-
-The *mdpd* style is for many-body dissipative particle dynamics (mDPD)
-particles which store a density (rho) for considering
-density-dependent many-body interactions.
-
-The *tdpd* style is for transport dissipative particle dynamics (tDPD)
-particles which store a set of chemical concentration. An integer
-"cc_species" is required to specify the number of chemical species
-involved in a tDPD system.
-
-The *sph* style is for smoothed particle hydrodynamics (SPH)
-particles which store a density (rho), energy (esph), and heat capacity
-(cv).
-
-The *smd* style is for a general formulation of Smooth Particle
-Hydrodynamics.  Both fluids and solids can be modeled.  Particles
-store the mass and volume of an integration point, a kernel diameter
-used for calculating the field variables (e.g. stress and deformation)
-and a contact radius for calculating repulsive forces which prevent
-individual physical bodies from penetrating each other.
-
-For the *spin* style, a magnetic spin is associated to each atom.
-Those spins have a norm (their magnetic moment) and a direction.
-
-The *wavepacket* style is similar to *electron*, but the electrons may
-consist of several Gaussian wave packets, summed up with coefficients
-cs= (cs_re,cs_im).  Each of the wave packets is treated as a separate
-particle in LAMMPS, wave packets belonging to the same electron must
-have identical *etag* values.
-
-For the *line* style, the particles are idealized line segments and
-each stores a per-particle mass and length and orientation (i.e. the
-end points of the line segment).
-
-For the *tri* style, the particles are planar triangles and each
-stores a per-particle mass and size and orientation (i.e. the corner
-points of the triangle).
-
-The *template* style allows molecular topology (bonds,angles,etc) to be
-defined via a molecule template using the :doc:`molecule <molecule>`
-command.  The template stores one or more molecules with a single copy
-of the topology info (bonds,angles,etc) of each.  Individual atoms
-only store a template index and template atom to identify which
-molecule and which atom-within-the-molecule they represent.  Using the
-*template* style instead of the *bond*, *angle*, *molecular* styles
-can save memory for systems comprised of a large number of small
-molecules, all of a single type (or small number of types).  See the
-paper by Grime and Voth, in :ref:`(Grime) <Grime>`, for examples of how this
-can be advantageous for large-scale coarse-grained systems.
-The ``examples/template`` directory has a few demo inputs and examples
-showing the use of the *template* atom style versus *molecular*.
-
-.. note::
-
-   When using the *template* style with a :doc:`molecule template
-   <molecule>` that contains multiple molecules, you should ensure the
-   atom types, bond types, angle_types, etc in all the molecules are
-   consistent.  E.g. if one molecule represents H2O and another CO2,
-   then you probably do not want each molecule file to define 2 atom
-   types and a single bond type, because they will conflict with each
-   other when a mixture system of H2O and CO2 molecules is defined,
-   e.g. by the :doc:`read_data <read_data>` command.  Rather the H2O
-   molecule should define atom types 1 and 2, and bond type 1.  And
-   the CO2 molecule should define atom types 3 and 4 (or atom types 3
-   and 2 if a single oxygen type is desired), and bond type 2.
+Additional information about specific atom styles
+"""""""""""""""""""""""""""""""""""""""""""""""""
 
 For the *body* style, the particles are arbitrary bodies with internal
 attributes defined by the "style" of the bodies, which is specified by
@@ -309,6 +282,148 @@ Note that there may be additional arguments required along with the
 *bstyle* specification, in the atom_style body command.  These
 arguments are described on the :doc:`Howto body <Howto_body>` doc page.
 
+For the *dielectric* style, each particle can be either a physical
+particle (e.g. an ion), or an interface particle representing a boundary
+element between two regions of different dielectric constant. For
+interface particles, in addition to the properties associated with
+atom_style full, each particle also should be assigned a normal unit
+vector (defined by normx, normy, normz), an area (area/patch), the
+difference and mean of the dielectric constants of two sides of the
+interface along the direction of the normal vector (ed and em), the
+local dielectric constant at the boundary element (epsilon), and a mean
+local curvature (curv).  Physical particles must be assigned these
+values, as well, but only their local dielectric constants will be used;
+see documentation for associated :doc:`pair styles <pair_dielectric>`
+and :doc:`fixes <fix_polarize>`.  The distinction between the physical
+and interface particles is only meaningful when :doc:`fix polarize
+<fix_polarize>` commands are applied to the interface particles. This
+style is part of the DIELECTRIC package.
+
+For the *dipole* style, a point dipole vector mu is defined for each
+point particle.  Note that if you wish the particles to be finite-size
+spheres as in a Stockmayer potential for a dipolar fluid, so that the
+particles can rotate due to dipole-dipole interactions, then you need
+to use the command `atom_style hybrid sphere dipole`, which will
+assign both a diameter and dipole moment to each particle.  This also
+requires using an integrator with a "/sphere" suffix like :doc:`fix
+nve/sphere <fix_nve_sphere>` or :doc:`fix nvt/sphere <fix_nvt_sphere>`
+and the "update dipole" or "update dlm" parameters to the fix
+commands.
+
+The *dpd* style is for reactive dissipative particle dynamics (DPD)
+particles.  Note that it is part of the DPD-REACT package, and is not
+required for use with the :doc:`pair_style dpd or dpd/stat <pair_dpd>`
+commands, which only require the attributes from atom_style *atomic*.
+Atom_style *dpd* extends DPD particle properties with internal
+temperature (dpdTheta), internal conductive energy (uCond), internal
+mechanical energy (uMech), and internal chemical energy (uChem).
+
+The *edpd* style is for energy-conserving dissipative particle
+dynamics (eDPD) particles which store a temperature (edpd_temp), and
+heat capacity (edpd_cv).
+
+For the *electron* style, the particles representing electrons are 3d
+Gaussians with a specified position and bandwidth or uncertainty in
+position, which is represented by the eradius = electron size.
+
+For the *ellipsoid* style, particles can be ellipsoids which each
+stores a shape vector with the 3 diameters of the ellipsoid and a
+quaternion 4-vector with its orientation.  Each particle stores a flag
+in the ellipsoid vector which indicates whether it is an ellipsoid (1)
+or a point particle (0).
+
+For the *line* style, particles can be are idealized line segments
+which store a per-particle mass and length and orientation (i.e. the
+end points of the line segment).  Each particle stores a flag in the
+line vector which indicates whether it is a line segment (1) or a
+point particle (0).
+
+The *mdpd* style is for many-body dissipative particle dynamics (mDPD)
+particles which store a density (rho) for considering density-dependent
+many-body interactions.
+
+The *oxdna* style is for coarse-grained nucleotides and stores the
+3'-to-5' polarity of the nucleotide strand, which is set through
+the bond topology in the data file. The first (second) atom in a
+bond definition is understood to point towards the 3'-end (5'-end)
+of the strand.
+
+For the *peri* style, the particles are spherical and each stores a
+per-particle mass and volume.
+
+The *smd* style is for Smooth Particle Mach dynamics.  Both fluids and
+solids can be modeled.  Particles store the mass and volume of an
+integration point, a kernel diameter used for calculating the field
+variables (e.g. stress and deformation) and a contact radius for
+calculating repulsive forces which prevent individual physical bodies
+from penetrating each other.
+
+The *sph* style is for smoothed particle hydrodynamics (SPH) particles
+which store a density (rho), energy (esph), and heat capacity (cv).
+
+For the *spin* style, a magnetic spin is associated with each atom.
+Those spins have a norm (their magnetic moment) and a direction.
+
+The *tdpd* style is for transport dissipative particle dynamics (tDPD)
+particles which store a set of chemical concentration. An integer
+"cc_species" is required to specify the number of chemical species
+involved in a tDPD system.
+
+The *wavepacket* style is similar to the *electron* style, but the
+electrons may consist of several Gaussian wave packets, summed up with
+coefficients cs= (cs_re,cs_im).  Each of the wave packets is treated
+as a separate particle in LAMMPS, wave packets belonging to the same
+electron must have identical *etag* values.
+
+The *sphere* and *bpm/sphere* styles allow particles to be either point
+particles or finite-size particles.  If the *radius* attribute is >
+0.0, the particle is a finite-size sphere.  If the diameter = 0.0, it
+is a point particle.  Note that by using the *disc* keyword with the
+:doc:`fix nve/sphere <fix_nve_sphere>`, :doc:`fix nvt/sphere
+<fix_nvt_sphere>`, :doc:`fix nph/sphere <fix_nph_sphere>`, :doc:`fix
+npt/sphere <fix_npt_sphere>` commands for the *sphere* style, spheres
+can be effectively treated as 2d discs for a 2d simulation if desired.
+See also the :doc:`set density/disc <set>` command.  These styles also
+take an optional 0 or 1 argument.  A value of 0 means the radius of
+each sphere is constant for the duration of the simulation (this is
+the default).  A value of 1 means the radii may vary dynamically
+during the simulation, e.g. due to use of the :doc:`fix adapt
+<fix_adapt>` command.
+
+The *template* style allows molecular topology (bonds,angles,etc) to be
+defined via a molecule template using the :doc:`molecule <molecule>`
+command.  The template stores one or more molecules with a single copy
+of the topology info (bonds,angles,etc) of each.  Individual atoms only
+store a template index and template atom to identify which molecule and
+which atom-within-the-molecule they represent.  Using the *template*
+style instead of the *bond*, *angle*, *molecular* styles can save memory
+for systems comprised of a large number of small molecules, all of a
+single type (or small number of types).  See the paper by Grime and
+Voth, in :ref:`(Grime) <Grime>`, for examples of how this can be
+advantageous for large-scale coarse-grained systems.  The
+``examples/template`` directory has a few demo inputs and examples
+showing the use of the *template* atom style versus *molecular*.
+
+.. note::
+
+   When using the *template* style with a :doc:`molecule template
+   <molecule>` that contains multiple molecules, you should ensure the
+   atom types, bond types, angle_types, etc in all the molecules are
+   consistent.  E.g. if one molecule represents H2O and another CO2,
+   then you probably do not want each molecule file to define 2 atom
+   types and a single bond type, because they will conflict with each
+   other when a mixture system of H2O and CO2 molecules is defined,
+   e.g. by the :doc:`read_data <read_data>` command.  Rather the H2O
+   molecule should define atom types 1 and 2, and bond type 1.  And
+   the CO2 molecule should define atom types 3 and 4 (or atom types 3
+   and 2 if a single oxygen type is desired), and bond type 2.
+
+For the *tri* style, particles can be planar triangles which each
+stores a per-particle mass and size and orientation (i.e. the corner
+points of the triangle).  Each particle stores a flag in the tri
+vector which indicates whether it is a triangle (1) or a point
+particle (0).
+
 ----------
 
 Typically, simulations require only a single (non-hybrid) atom style.
@@ -326,11 +441,12 @@ dipole".  When a hybrid style is used, atoms store and communicate the
 union of all quantities implied by the individual styles.
 
 When using the *hybrid* style, you cannot combine the *template* style
-with another molecular style that stores bond,angle,etc info on a
+with another molecular style that stores bond, angle, etc info on a
 per-atom basis.
 
-LAMMPS can be extended with new atom styles as well as new body
-styles; see the :doc:`Modify <Modify>` doc page.
+LAMMPS can be extended with new atom styles as well as new body styles;
+see the corresponding manual page on :doc:`modifying & extending LAMMPS
+<Modify_atom>`.
 
 ----------
 
@@ -346,54 +462,20 @@ This command cannot be used after the simulation box is defined by a
 
 Many of the styles listed above are only enabled if LAMMPS was built
 with a specific package, as listed below.  See the :doc:`Build package
-<Build_package>` page for more info.
-
-The *amoeba* style is part of the AMOEBA package.
-
-The *angle*, *bond*, *full*, *molecular*, and *template* styles are
-part of the MOLECULE package.
-
-The *line* and *tri* styles are part of the ASPHERE package.
-
-The *body* style is part of the BODY package.
-
-The *dipole* style is part of the DIPOLE package.
-
-The *peri* style is part of the PERI package for Peridynamics.
-
-The *oxdna* style is part of the CG-DNA package for coarse-grained
-simulation of DNA and RNA.
-
-The *electron* style is part of the EFF package for :doc:`electronic
-force fields <pair_eff>`.
-
-The *dpd* style is part of the DPD-REACT package for dissipative
-particle dynamics (DPD).
-
-The *edpd*, *mdpd*, and *tdpd* styles are part of the DPD-MESO package
-for energy-conserving dissipative particle dynamics (eDPD), many-body
-dissipative particle dynamics (mDPD), and transport dissipative particle
-dynamics (tDPD), respectively.
-
-The *sph* style is part of the SPH package for smoothed particle
-hydrodynamics (SPH).  See `this PDF guide
-<PDF/SPH_LAMMPS_userguide.pdf>`_ to using SPH in LAMMPS.
-
-The *spin* style is part of the SPIN package.
-
-The *wavepacket* style is part of the AWPMD package for the
-:doc:`antisymmetrized wave packet MD method <pair_awpmd>`.
+<Build_package>` page for more info.  The table above lists which package
+is required for individual atom styles.
 
 Related commands
 """"""""""""""""
 
-:doc:`read_data <read_data>`, :doc:`pair_style <pair_style>`
+:doc:`read_data <read_data>`, :doc:`pair_style <pair_style>`,
+:doc:`fix property/atom <fix_property_atom>`, :doc:`set <set>`
 
 Default
 """""""
 
-The default atom style is atomic.  If atom_style sphere is used its
-default argument is 0.
+The default atom style is *atomic*.  If atom_style *sphere* or
+*bpm/sphere* is used, its default argument is 0.
 
 ----------
 
diff --git a/doc/src/bond_bpm_rotational.rst b/doc/src/bond_bpm_rotational.rst
index 7459d491d6..6734bd7bfe 100644
--- a/doc/src/bond_bpm_rotational.rst
+++ b/doc/src/bond_bpm_rotational.rst
@@ -147,8 +147,8 @@ By default, pair forces are not calculated between bonded particles.
 Pair forces can alternatively be overlaid on top of bond forces by setting
 the *overlay/pair* keyword to *yes*. These settings require specific
 :doc:`special_bonds <special_bonds>` settings described in the
-restrictions.  Further details can be found in the :doc:`how to
-<Howto_bpm>` page on BPMs.
+restrictions.  Further details can be found in the :doc:`how to <Howto_bpm>`
+page on BPMs.
 
 .. versionadded:: 28Mar2023
 
diff --git a/doc/src/bond_bpm_spring.rst b/doc/src/bond_bpm_spring.rst
index 04ff4d5991..a03c832249 100644
--- a/doc/src/bond_bpm_spring.rst
+++ b/doc/src/bond_bpm_spring.rst
@@ -113,8 +113,8 @@ By default, pair forces are not calculated between bonded particles.
 Pair forces can alternatively be overlaid on top of bond forces by setting
 the *overlay/pair* keyword to *yes*. These settings require specific
 :doc:`special_bonds <special_bonds>` settings described in the
-restrictions.  Further details can be found in the :doc:`how to
-<Howto_bpm>` page on BPMs.
+restrictions.  Further details can be found in the :doc:`how to <Howto_bpm>`
+page on BPMs.
 
 .. versionadded:: 28Mar2023
 
diff --git a/doc/src/bond_lepton.rst b/doc/src/bond_lepton.rst
index adfd30627d..9429535af8 100644
--- a/doc/src/bond_lepton.rst
+++ b/doc/src/bond_lepton.rst
@@ -11,7 +11,16 @@ Syntax
 
 .. code-block:: LAMMPS
 
-   bond_style lepton
+   bond_style style args
+
+* style = *lepton*
+* args = optional arguments
+
+.. parsed-literal::
+
+   args = *auto_offset* or *no_offset*
+     *auto_offset* = offset the potential energy so that the value at r0 is 0.0 (default)
+     *no_offset* = do not offset the potential energy
 
 Examples
 """"""""
@@ -19,6 +28,7 @@ Examples
 .. code-block:: LAMMPS
 
    bond_style lepton
+   bond_style lepton no_offset
 
    bond_coeff  1  1.5 "k*r^2; k=250.0"
    bond_coeff  2  1.1 "k2*r^2 + k3*r^3 + k4*r^4; k2=300.0; k3=-100.0; k4=50.0"
@@ -40,6 +50,13 @@ constant *K* of 200.0 energy units:
 
    U_{bond,i} = K (r_i - r_0)^2 = K r^2 \qquad r = r_i - r_0
 
+.. versionchanged:: TBD
+
+By default the potential energy U is shifted so that he value U is 0.0
+for $r = r_0$.  This is equivalent to using the optional keyword
+*auto_offset*.  When using the keyword *no_offset* instead, the
+potential energy is not shifted.
+
 The `Lepton library <https://simtk.org/projects/lepton>`_, that the
 *lepton* bond style interfaces with, evaluates this expression string at
 run time to compute the pairwise energy.  It also creates an analytical
diff --git a/doc/src/compute.rst b/doc/src/compute.rst
index 6737203618..7b620deed7 100644
--- a/doc/src/compute.rst
+++ b/doc/src/compute.rst
@@ -264,6 +264,7 @@ The individual style names on the :doc:`Commands compute <Commands_compute>` pag
 * :doc:`nbond/atom <compute_nbond_atom>` - calculates number of bonds per atom
 * :doc:`omega/chunk <compute_omega_chunk>` - angular velocity for each chunk
 * :doc:`orientorder/atom <compute_orientorder_atom>` - Steinhardt bond orientational order parameters Ql
+* :doc:`pace <compute_pace>` - atomic cluster expansion descriptors and related quantities
 * :doc:`pair <compute_pair>` - values computed by a pair style
 * :doc:`pair/local <compute_pair_local>` - distance/energy/force of each pairwise interaction
 * :doc:`pe <compute_pe>` - potential energy
@@ -279,12 +280,15 @@ The individual style names on the :doc:`Commands compute <Commands_compute>` pag
 * :doc:`property/grid <compute_property_grid>` - convert per-grid attributes to per-grid vectors/arrays
 * :doc:`property/local <compute_property_local>` - convert local attributes to local vectors/arrays
 * :doc:`ptm/atom <compute_ptm_atom>` - determines the local lattice structure based on the Polyhedral Template Matching method
+* :doc:`rattlers/atom <compute_rattlers_atom>` - identify under-coordinated rattler atoms
 * :doc:`rdf <compute_rdf>` - radial distribution function :math:`g(r)` histogram of group of atoms
+* :doc:`reaxff/atom <compute_reaxff_atom>` - extract ReaxFF bond information
 * :doc:`reduce <compute_reduce>` - combine per-atom quantities into a single global value
 * :doc:`reduce/chunk <compute_reduce_chunk>` - reduce per-atom quantities within each chunk
 * :doc:`reduce/region <compute_reduce>` - same as compute reduce, within a region
 * :doc:`rigid/local <compute_rigid_local>` - extract rigid body attributes
 * :doc:`saed <compute_saed>` - electron diffraction intensity on a mesh of reciprocal lattice nodes
+* :doc:`slcsa/atom <compute_slcsa_atom>` - perform Supervised Learning Crystal Structure Analysis (SL-CSA)
 * :doc:`slice <compute_slice>` - extract values from global vector or array
 * :doc:`smd/contact/radius <compute_smd_contact_radius>` - contact radius for Smooth Mach Dynamics
 * :doc:`smd/damage <compute_smd_damage>` - damage status of SPH particles in Smooth Mach Dynamics
diff --git a/doc/src/compute_contact_atom.rst b/doc/src/compute_contact_atom.rst
index 31aa24aa60..b7ed062ff6 100644
--- a/doc/src/compute_contact_atom.rst
+++ b/doc/src/compute_contact_atom.rst
@@ -36,6 +36,9 @@ sum of the radii of the two particles.
 The value of the contact number will be 0.0 for atoms not in the
 specified compute group.
 
+The optional *group2-ID* argument allows to specify from which group atoms
+contribute to the coordination number. Default setting is group 'all'.
+
 Output info
 """""""""""
 
@@ -47,9 +50,6 @@ overview of LAMMPS output options.
 The per-atom vector values will be a number :math:`\ge 0.0`, as explained
 above.
 
-The optional *group2-ID* argument allows to specify from which group atoms
-contribute to the coordination number. Default setting is group 'all.'
-
 Restrictions
 """"""""""""
 
@@ -69,6 +69,3 @@ Default
 """""""
 
 *group2-ID* = all
-
-
-none
diff --git a/doc/src/compute_pace.rst b/doc/src/compute_pace.rst
new file mode 100644
index 0000000000..c510319dfc
--- /dev/null
+++ b/doc/src/compute_pace.rst
@@ -0,0 +1,253 @@
+.. index:: compute pace
+
+compute pace command
+========================
+
+Syntax
+""""""
+
+.. code-block:: LAMMPS
+
+   compute ID group-ID pace ace_potential_filename ... keyword values ...
+
+* ID, group-ID are documented in :doc:`compute <compute>` command
+* pace = style name of this compute command
+* ace_potential_filename = file name (in the .yace or .ace format from :doc:`pace pair_style <pair_pace>`) including ACE hyper-parameters, bonds, and generalized coupling coefficients
+* keyword = *bikflag* or *dgradflag*
+
+  .. parsed-literal::
+
+       *bikflag* value = *0* or *1*
+          *0* = descriptors are summed over atoms of each type
+          *1* = descriptors are listed separately for each atom
+       *dgradflag* value = *0* or *1*
+          *0* = descriptor gradients are summed over atoms of each type
+          *1* = descriptor gradients are listed separately for each atom pair
+
+Examples
+""""""""
+
+.. code-block:: LAMMPS
+
+   compute pace all pace coupling_coefficients.yace
+   compute pace all pace coupling_coefficients.yace 0 1
+   compute pace all pace coupling_coefficients.yace 1 1
+
+Description
+"""""""""""
+
+.. versionadded:: TBD
+
+This compute calculates a set of quantities related to the atomic
+cluster expansion (ACE) descriptors of the atoms in a group.  ACE
+descriptors are highly general atomic descriptors, encoding the radial
+and angular distribution of neighbor atoms, up to arbitrary bond order
+(rank).  The detailed mathematical definition is given in the paper by
+:ref:`(Drautz) <Drautz19>`.  These descriptors are used in the
+:doc:`pace pair_style <pair_pace>`.  Quantities obtained from `compute
+pace` are related to those used in :doc:`pace pair_style <pair_pace>` to
+evaluate atomic energies, forces, and stresses for linear ACE models.
+
+For example, the energy for a linear ACE model is calculated as:
+:math:`E=\sum_i^{N\_atoms} \sum_{\boldsymbol{\nu}} c_{\boldsymbol{\nu}}
+B_{i,\boldsymbol{\boldsymbol{\nu}}}`.  The ACE descriptors for atom `i`
+:math:`B_{i,\boldsymbol{\nu}}`, and :math:`c_{\nu}` are linear model
+parameters.  The detailed definition and indexing convention for ACE
+descriptors is given in :ref:`(Drautz) <Drautz19>`.  In short, body
+order :math:`N`, angular character, radial character, and chemical
+elements in the *N-body* descriptor are encoded by :math:`\nu`.  In the
+:doc:`pace pair_style <pair_pace>`, the linear model parameters and the
+ACE descriptors are combined for efficient evaluation of energies and
+forces.  The details and benefits of this efficient implementation are
+given in :ref:`(Lysogorskiy) <Lysogorskiy21>`, but the combined
+descriptors and linear model parameters for the purposes of `compute
+pace` may be expressed in terms of the ACE descriptors mentioned above.
+
+:math:`c_{\boldsymbol{\nu}} B_{i,\boldsymbol{\nu}}= \sum_{\boldsymbol{\nu}' \in \boldsymbol{\nu} } \big[ c_{\boldsymbol{\nu}} C(\boldsymbol{\nu}') \big] A_{i,\boldsymbol{\nu}'}`
+
+where the bracketed terms on the right-hand side are the combined functions
+with linear model parameters typically provided in the `<name>.yace` potential
+file for `pace pair_style`. When these bracketed terms are multiplied by the
+products of the atomic base from :ref:`(Drautz) <Drautz19>`,
+:math:`A_{i,\boldsymbol{\nu'}}`, the ACE descriptors are recovered but they
+are also scaled by linear model parameters. The generalized coupling coefficients,
+written in short-hand here as :math:`C(\boldsymbol{\nu}')`, are the generalized
+Clebsch-Gordan or generalized Wigner symbols. It may be desirable to reverse the
+combination of these descriptors and the linear model parameters so that the
+ACE descriptors themselves may be used. The ACE descriptors and their gradients
+are often used when training ACE models, performing custom data analysis,
+generalizing ACE model forms, and other tasks that involve direct computation of
+descriptors. The key utility of `compute pace` is that it can compute the ACE
+descriptors and gradients so that these tasks can be performed during a LAMMPS
+simulation or so that LAMMPS can be used as a driver for tasks like ACE model
+parameterization. To see how this command can be used within a Python workflow
+to train ACE potentials, see the examples in
+`FitSNAP <https://github.com/FitSNAP/FitSNAP>`_. Examples on using outputs from
+this compute to construct general ACE potential forms are demonstrated in
+:ref:`(Goff) <Goff23>`. The various keywords and inputs to `compute pace`
+determine what ACE descriptors and related quantities are returned in a compute
+array.
+
+The coefficient file, `<name>.yace`, ultimately defines the number of ACE
+descriptors to be computed, their maximum body-order, the degree of angular
+character they have, the degree of radial character they have, the chemical
+character (which element-element interactions are encoded by descriptors),
+and other hyper-parameters defined in :ref:`(Drautz) <Drautz19>`. These may
+be modeled after the potential files in :doc:`pace pair_style <pair_pace>`,
+and have the same format. Details on how to generate the coefficient files
+to train ACE models may be found in `FitSNAP <https://github.com/FitSNAP/FitSNAP>`_.
+
+The keyword *bikflag* determines whether or not to list the descriptors of
+each atom separately, or sum them together and list in a single row. If
+*bikflag* is set to *0* then a single descriptor row is used, which contains
+the per-atom ACE descriptors :math:`B_{i,\boldsymbol{\nu}}` summed over all
+atoms *i* to produce :math:`B_{\boldsymbol{\nu}}`. If *bikflag* is set to
+*1* this is replaced by a separate per-atom ACE descriptor row for each atom.
+In this case, the entries in the final column for these rows are set to zero.
+
+The keyword *dgradflag* determines whether to sum atom gradients or list
+them separately. If *dgradflag* is set to 0, the ACE
+descriptor gradients w.r.t. atom *j* are summed over all atoms *i'*
+of, which may be useful when training linear ACE models on atomic forces.
+If *dgradflag* is set to 1, gradients are listed separately for each pair of atoms.
+Each row corresponds
+to a single term :math:`\frac{\partial {B_{i,\boldsymbol{\nu}}}}{\partial {r}^a_j}`
+where :math:`{r}^a_j` is the *a-th* position coordinate of the atom with global
+index *j*. This also changes the number of columns to be equal to the number of
+ACE descriptors, with 3 additional columns representing the indices :math:`i`,
+:math:`j`, and :math:`a`, as explained more in the Output info section below.
+The option *dgradflag=1* requires that *bikflag=1*.
+
+.. note::
+
+    It is noted here that in contrast to :doc:`pace pair_style <pair_pace>`,
+    the *.yace* file for `compute pace` typically should not contain linear
+    parameters for an ACE potential. If :math:`c_{\nu}` are included,
+    the value of the descriptor will not be returned in the `compute` array,
+    but instead, the energy contribution from that descriptor will be returned.
+    Do not do this unless it is the desired behavior.
+    *In short, you should not plug in a '.yace' for a pace potential into this
+    compute to evaluate descriptors.*
+
+.. note::
+
+    *Generalized Clebsch-Gordan or Generalized Wigner symbols (with appropriate
+    factors) must be used to evaluate ACE descriptors with this compute.* There
+    are multiple ways to define the generalized coupling coefficients. Because
+    of this, this compute will not revert your potential file to a coupling
+    coefficient file. Instead this compute allows the user to supply coupling
+    coefficients that follow any convention.
+
+.. note::
+
+   Using *dgradflag* = 1 produces a global array with :math:`N + 3N^2 + 1` rows
+   which becomes expensive for systems with more than 1000 atoms.
+
+.. note::
+
+   If you have a bonded system, then the settings of :doc:`special_bonds
+   <special_bonds>` command can remove pairwise interactions between
+   atoms in the same bond, angle, or dihedral.  This is the default
+   setting for the :doc:`special_bonds <special_bonds>` command, and
+   means those pairwise interactions do not appear in the neighbor list.
+   Because this fix uses the neighbor list, it also means those pairs
+   will not be included in the calculation.  One way to get around this,
+   is to write a dump file, and use the :doc:`rerun <rerun>` command to
+   compute the ACE descriptors for snapshots in the dump file.
+   The rerun script can use a :doc:`special_bonds <special_bonds>`
+   command that includes all pairs in the neighbor list.
+
+----------
+
+Output info
+"""""""""""
+
+Compute *pace* evaluates a global array.  The columns are arranged into
+*ntypes* blocks, listed in order of atom type *I*\ . Each block contains
+one column for each ACE descriptor, the same as for compute
+*sna/atom*\ in :doc:`compute snap <compute_sna_atom>`. A final column contains the corresponding energy, force
+component on an atom, or virial stress component. The rows of the array
+appear in the following order:
+
+* 1 row: *pace* average descriptor values for all atoms of type *I*
+* 3\*\ *n* force rows: quantities, with derivatives w.r.t. x, y, and z coordinate of atom *i* appearing in consecutive rows. The atoms are sorted based on atom ID and run up to the total number of atoms, *n*.
+* 6 rows: *virial* quantities summed for all atoms of type *I*
+
+For example, if :math:`\# \; B_{i, \boldsymbol{\nu}}` =30 and ntypes=1, the number of columns in the
+The number of columns in the global array generated by *pace* are 31, and
+931, respectively, while the number of rows is 1+3\*\ *n*\ +6, where *n*
+is the total number of atoms.
+
+If the *bik* keyword is set to 1, the structure of the pace array is expanded.
+The first :math:`N` rows of the pace array
+correspond to :math:`\# \; B_{i,\boldsymbol{\nu}}` instead of a single row summed over atoms :math:`i`.
+In this case, the entries in the final column for these rows
+are set to zero. Also, each row contains only non-zero entries for the
+columns corresponding to the type of that atom. This is not true in the case
+of *dgradflag* keyword = 1 (see below).
+
+If the *dgradflag* keyword is set to 1, this changes the structure of the
+global array completely.
+Here the per-atom quantities are replaced with rows corresponding to
+descriptor gradient components on single atoms:
+
+.. math::
+
+  \frac{\partial {B_{i,\boldsymbol{\nu}}  }}{\partial {r}^a_j}
+
+where :math:`{r}^a_j` is the *a-th* position coordinate of the atom with global
+index *j*. The rows are
+organized in chunks, where each chunk corresponds to an atom with global index
+:math:`j`. The rows in an atom :math:`j` chunk correspond to
+atoms with global index :math:`i`. The total number of rows for
+these descriptor gradients is therefore :math:`3N^2`.
+The number of columns is equal to the number of ACE descriptors,
+plus 3 additional left-most columns representing the global atom indices
+:math:`i`, :math:`j`,
+and Cartesian direction :math:`a`  (0, 1, 2, for x, y, z).
+The first 3 columns of the first :math:`N` rows belong to the reference
+potential force components. The remaining K columns contain the
+:math:`B_{i,\boldsymbol{\nu}}` per-atom descriptors corresponding to the non-zero entries
+obtained when *bikflag* = 1.
+The first column of the last row, after the first
+:math:`N + 3N^2` rows, contains the reference potential
+energy. The virial components are not used with this option. The total number of
+rows is therefore :math:`N + 3N^2 + 1` and the number of columns is :math:`K + 3`.
+
+These values can be accessed by any command that uses global values
+from a compute as input.  See the :doc:`Howto output <Howto_output>` doc
+page for an overview of LAMMPS output options.
+
+Restrictions
+""""""""""""
+
+These computes are part of the ML-PACE package.  They are only enabled
+if LAMMPS was built with that package.  See the :doc:`Build package
+<Build_package>` page for more info.
+
+Related commands
+""""""""""""""""
+
+:doc:`pair_style pace <pair_pace>`
+:doc:`pair_style snap <pair_snap>`
+:doc:`compute snap <compute_sna_atom>`
+
+Default
+"""""""
+
+The optional keyword defaults are *bikflag* = 0,
+*dgradflag* = 0
+
+----------
+
+.. _Drautz19:
+
+**(Drautz)** Drautz, Phys Rev B, 99, 014104 (2019).
+
+.. _Lysogorskiy21:
+
+**(Lysogorskiy)** Lysogorskiy, van der Oord, Bochkarev, Menon, Rinaldi, Hammerschmidt, Mrovec, Thompson, Csanyi, Ortner, Drautz, npj Comp Mat, 7, 97 (2021).
+
+.. _Goff23:
+
+**(Goff)** Goff, Zhang, Negre, Rohskopf, Niklasson, Journal of Chemical Theory and Computation 19, no. 13 (2023).
diff --git a/doc/src/compute_rattlers_atom.rst b/doc/src/compute_rattlers_atom.rst
new file mode 100644
index 0000000000..cf4e888657
--- /dev/null
+++ b/doc/src/compute_rattlers_atom.rst
@@ -0,0 +1,92 @@
+.. index:: compute rattlers/atom
+
+compute rattlers/atom command
+=============================
+
+Syntax
+""""""
+
+.. parsed-literal::
+
+   compute ID group-ID rattlers/atom cutoff zmin ntries
+
+* ID, group-ID are documented in :doc:`compute <compute>` command
+* rattlers/atom = style name of this compute command
+* cutoff = *type* or *radius*
+
+  .. parsed-literal::
+
+       *type* = cutoffs determined based on atom types
+       *radius* = cutoffs determined based on atom diameters (atom style sphere)
+
+* zmin = minimum coordination for a non-rattler atom
+* ntries = maximum number of iterations to remove rattlers
+
+Examples
+""""""""
+
+.. code-block:: LAMMPS
+
+   compute 1 all rattlers/atom type 4 10
+
+Description
+"""""""""""
+
+.. versionadded:: TBD
+
+Define a compute that identifies rattlers in a system. Rattlers are often
+identified in granular or glassy packings as under-coordinated atoms that
+do not have the required number of contacts to constrain their translational
+degrees of freedom. Such atoms are not considered rigid and can often freely
+rattle around in the system. This compute identifies rattlers which can be
+helpful for excluding them from analysis or providing extra damping forces
+to accelerate relaxation processes.
+
+Rattlers are identified using an interactive approach. The coordination
+number of all atoms is first calculated.  The *type* and *radius* settings
+are used to select whether interaction cutoffs are determined by atom
+types or by the sum of atomic radii (atom style sphere), respectively.
+Rattlers are then identified as atoms with a coordination number less
+than *zmin* and are removed from consideration. Atomic coordination
+numbers are then recalculated, excluding previously identified rattlers,
+to identify a new set of rattlers. This process is iterated up to a maximum
+of *ntries* or until no new rattlers are identified and the remaining
+atoms form a stable network of contacts.
+
+In dense homogeneous systems where the average atom coordination number
+is expected to be larger than *zmin*, this process usually only takes a few
+iterations and a value of *ntries* around ten may be sufficient. In systems
+with significant heterogeneity or average coordination numbers less than
+*zmin*, an appropriate value of *ntries* depends heavily on the specific
+system. For instance, a linear chain of N rattler atoms with a *zmin* of 2
+would take N/2 iterations to identify that all the atoms are rattlers.
+
+Output info
+"""""""""""
+
+This compute calculates a per-atom vector and a global scalar. The vector
+designates which atoms are rattlers, indicated by a value 1. Non-rattlers
+have a value of 0. The global scalar returns the total number of rattlers
+in the system. See the :doc:`Howto output <Howto_output>` page for an
+overview of LAMMPS output options.
+
+Restrictions
+""""""""""""
+
+This compute is part of the EXTRA-COMPUTE package.  It is only enabled if
+LAMMPS was built with that package.  See the
+:doc:`Build package <Build_package>` page for more info.
+
+The *radius* cutoff option requires that atoms store a radius as defined by the
+:doc:`atom_style sphere <atom_style>` or similar commands.
+
+Related commands
+""""""""""""""""
+
+:doc:`compute coord/atom <compute_coord_atom>`
+:doc:`compute contact/atom <compute_contact_atom>`
+
+Default
+"""""""
+
+none
diff --git a/doc/src/compute_reaxff_atom.rst b/doc/src/compute_reaxff_atom.rst
new file mode 100644
index 0000000000..997ad02e9f
--- /dev/null
+++ b/doc/src/compute_reaxff_atom.rst
@@ -0,0 +1,97 @@
+.. index:: compute reaxff/atom
+.. index:: compute reaxff/atom/kk
+
+compute reaxff/atom command
+===========================
+
+Accelerator Variants: *reaxff/atom/kk*
+
+Syntax
+""""""
+
+.. code-block:: LAMMPS
+
+   compute ID group-ID reaxff/atom attribute args ... keyword value ...
+
+* ID, group-ID are documented in :doc:`compute <compute>` command
+* reaxff/atom = name of this compute command
+* attribute = *pair*
+
+  .. parsed-literal::
+
+       *pair* args = nsub
+         nsub = *n*-instance of a sub-style, if a pair style is used multiple times in a hybrid style
+
+* keyword = *bonds*
+
+  .. parsed-literal::
+
+       *bonds* value = *no* or *yes*
+         *no* = ignore list of local bonds
+         *yes* = include list of local bonds
+
+Examples
+""""""""
+
+.. code-block:: LAMMPS
+
+   compute 1 all reaxff/atom bonds yes
+
+Description
+"""""""""""
+
+.. versionadded:: TBD
+
+Define a computation that extracts bond information computed by the ReaxFF
+potential specified by :doc:`pair_style reaxff <pair_reaxff>`.
+
+By default, it produces per-atom data that includes the following columns:
+
+* abo = atom bond order (sum of all bonds)
+* nlp = number of lone pairs
+* nb = number of bonds
+
+Bonds will only be included if its atoms are in the group.
+
+In addition, if ``bonds`` is set to ``yes``, the compute will also produce a
+local array of all bonds on the current processor whose atoms are in the group.
+The columns of each entry of this local array are:
+
+* id_i = atom i id of bond
+* id_j = atom j id of bond
+* bo = bond order of bond
+
+Output info
+"""""""""""
+
+This compute calculates a per-atom array and local array depending on the
+number of keywords. The number of rows in the local array is the number of
+bonds as described above. Both per-atom and local array have 3 columns.
+
+The arrays can be accessed by any command that uses local and per-atom values
+from a compute as input.  See the :doc:`Howto output <Howto_output>` page for
+an overview of LAMMPS output options.
+
+----------
+
+.. include:: accel_styles.rst
+
+----------
+
+Restrictions
+""""""""""""
+
+The compute reaxff/atom command requires that the :doc:`pair_style reaxff
+<pair_reaxff>` is invoked.  This fix is part of the REAXFF package.  It is only
+enabled if LAMMPS was built with that package.  See the :doc:`Build package
+<Build_package>` page for more info.
+
+Related commands
+""""""""""""""""
+
+:doc:`pair_style reaxff <pair_reaxff>`
+
+Default
+"""""""
+
+The option defaults are *bonds* = *no*.
diff --git a/doc/src/compute_saed.rst b/doc/src/compute_saed.rst
index 9ec455d03b..3079afb7ce 100644
--- a/doc/src/compute_saed.rst
+++ b/doc/src/compute_saed.rst
@@ -68,7 +68,7 @@ reciprocal lattice nodes. The mesh spacing is defined either (a)  by
 the entire simulation domain or (b) manually using selected values as
 shown in the 2D diagram below.
 
-.. image:: img/saed_mesh.jpg
+.. image:: img/saed_mesh.png
    :scale: 75%
    :align: center
 
diff --git a/doc/src/compute_slcsa_atom.rst b/doc/src/compute_slcsa_atom.rst
new file mode 100644
index 0000000000..6b2708c4d9
--- /dev/null
+++ b/doc/src/compute_slcsa_atom.rst
@@ -0,0 +1,162 @@
+.. index:: compute slcsa/atom
+
+compute slcsa/atom command
+============================
+
+Syntax
+""""""
+
+.. code-block:: LAMMPS
+
+   compute ID group-ID slcsa/atom twojmax nclasses db_mean_descriptor_file lda_file lr_decision_file lr_bias_file maha_file value
+
+* ID, group-ID are documented in :doc:`compute <compute>` command
+* slcsa/atom = style name of this compute command
+* twojmax = band limit for bispectrum components (non-negative integer)
+* nclasses = number of crystal structures used in the database for the classifier SL-CSA
+* db_mean_descriptor_file = file name of file containing the database mean descriptor
+* lda_file = file name of file containing the linear discriminant analysis matrix for dimension reduction
+* lr_decision_file = file name of file containing the scaling matrix for logistic regression classification
+* lr_bias_file = file name of file containing the bias vector for logistic regression classification
+* maha_file = file name of file containing for each crystal structure: the Mahalanobis distance threshold for sanity check purposes, the average reduced descriptor and the inverse of the corresponding covariance matrix
+* c_ID[*] = compute ID of previously required *compute sna/atom* command
+
+Examples
+""""""""
+
+.. code-block:: LAMMPS
+
+   compute b1 all sna/atom 9.0 0.99363 8 0.5 1.0 rmin0 0.0 nnn 24 wmode 1 delta 0.3
+   compute b2 all slcsa/atom 8 4 mean_descriptors.dat lda_scalings.dat lr_decision.dat lr_bias.dat maha_thresholds.dat c_b1[*]
+
+Description
+"""""""""""
+
+.. versionadded:: TBD
+
+Define a computation that performs the Supervised Learning Crystal
+Structure Analysis (SL-CSA) from :ref:`(Lafourcade) <Lafourcade2023_1>`
+for each atom in the group. The SL-CSA tool takes as an input a per-atom
+descriptor (bispectrum) that is computed through the *compute sna/atom*
+command and then proceeds to a dimension reduction step followed by a
+logistic regression in order to assign a probable crystal structure to
+each atom in the group. The SL-CSA tool is pre-trained on a database
+containing :math:`C` distinct crystal structures from which a crystal
+structure classifier is derived and a tutorial to build such a tool is
+available at `SL-CSA <https://github.com/lafourcadep/SL-CSA>`_.
+
+The first step of the SL-CSA tool consists in performing a dimension
+reduction of the per-atom descriptor :math:`\mathbf{B}^i \in
+\mathbb{R}^{D}` through the Linear Discriminant Analysis (LDA) method,
+leading to a new projected descriptor
+:math:`\mathbf{x}^i=\mathrm{P}_\mathrm{LDA}(\mathbf{B}^i):\mathbb{R}^D
+\rightarrow \mathbb{R}^{d=C-1}`:
+
+.. math::
+
+   \mathbf{x}^i = \mathbf{C}^T_\mathrm{LDA} \cdot (\mathbf{B}^i - \mu^\mathbf{B}_\mathrm{db})
+
+where :math:`\mathbf{C}^T_\mathrm{LDA} \in \mathbb{R}^{D \times d}` is
+the reduction coefficients matrix of the LDA model read in file
+*lda_file*, :math:`\mathbf{B}^i \in \mathbb{R}^{D}` is the bispectrum of
+atom :math:`i` and :math:`\mu^\mathbf{B}_\mathrm{db} \in \mathbb{R}^{D}`
+is the average descriptor of the entire database. The latter is computed
+from the average descriptors of each crystal structure read from the
+file *mean_descriptors_file*.
+
+The new projected descriptor with dimension :math:`d=C-1` allows for a
+good separation of different crystal structures fingerprints in the
+latent space.
+
+Once the dimension reduction step is performed by means of LDA, the new
+descriptor :math:`\mathbf{x}^i \in \mathbb{R}^{d=C-1}` is taken as an
+input for performing a multinomial logistic regression (LR) which
+provides a score vector
+:math:`\mathbf{s}^i=\mathrm{P}_\mathrm{LR}(\mathbf{x}^i):\mathbb{R}^d
+\rightarrow \mathbb{R}^C` defined as:
+
+.. math::
+
+   \mathbf{s}^i = \mathbf{b}_\mathrm{LR} + \mathbf{D}_\mathrm{LR} \cdot {\mathbf{x}^i}^T
+
+with :math:`\mathbf{b}_\mathrm{LR} \in \mathbb{R}^C` and
+:math:`\mathbf{D}_\mathrm{LR} \in \mathbb{R}^{C \times d}` the bias
+vector and decision matrix of the LR model after training both read in
+files *lr_fil1* and *lr_file2* respectively.
+
+Finally, a probability vector
+:math:`\mathbf{p}^i=\mathrm{P}_\mathrm{LR}(\mathbf{x}^i):\mathbb{R}^d
+\rightarrow \mathbb{R}^C` is defined as:
+
+.. math::
+
+   \mathbf{p}^i = \frac{\mathrm{exp}(\mathbf{s}^i)}{\sum\limits_{j} \mathrm{exp}(s^i_j) }
+
+from which the crystal structure assigned to each atom with descriptor
+:math:`\mathbf{B}^i` and projected descriptor :math:`\mathbf{x}^i` is
+computed as the *argmax* of the probability vector
+:math:`\mathbf{p}^i`. Since the logistic regression step systematically
+attributes a crystal structure to each atom, a sanity check is needed to
+avoid misclassification. To this end, a per-atom Mahalanobis distance to
+each crystal structure *CS* present in the database is computed:
+
+.. math::
+
+   d_\mathrm{Mahalanobis}^{i \rightarrow \mathrm{CS}} = \sqrt{(\mathbf{x}^i - \mathbf{\mu}^\mathbf{x}_\mathrm{CS})^\mathrm{T} \cdot \mathbf{\Sigma}^{-1}_\mathrm{CS} \cdot (\mathbf{x}^i - \mathbf{\mu}^\mathbf{x}_\mathrm{CS}) }
+
+where :math:`\mathbf{\mu}^\mathbf{x}_\mathrm{CS} \in \mathbb{R}^{d}` is
+the average projected descriptor of crystal structure *CS* in the
+database and where :math:`\mathbf{\Sigma}_\mathrm{CS} \in \mathbb{R}^{d
+\times d}` is the corresponding covariance matrix. Finally, if the
+Mahalanobis distance to crystal structure *CS* for atom *i* is greater
+than the pre-determined threshold, no crystal structure is assigned to
+atom *i*. The Mahalanobis distance thresholds are read in file
+*maha_file* while the covariance matrices are read in file
+*covmat_file*.
+
+The `SL-CSA <https://github.com/lafourcadep/SL-CSA>`_ framework provides
+an automatic computation of the different matrices and thresholds
+required for a proper classification and writes down all the required
+files for calling the *compute slcsa/atom* command.
+
+The *compute slcsa/atom* command requires that the :doc:`compute
+sna/atom <compute_sna_atom>` command is called before as it takes the
+resulting per-atom bispectrum as an input. In addition, it is crucial
+that the value *twojmax* is set to the same value of the value *twojmax*
+used in the *compute sna/atom* command, as well as that the value
+*nclasses* is set to the number of crystal structures used in the
+database to train the SL-CSA tool.
+
+Output info
+"""""""""""
+
+By default, this compute computes the Mahalanobis distances to the
+different crystal structures present in the database in addition to
+assigning a crystal structure for each atom as a per-atom vector, which
+can be accessed by any command that uses per-atom values from a compute
+as input.  See the :doc:`Howto output <Howto_output>` page for an
+overview of LAMMPS output options.
+
+Restrictions
+""""""""""""
+
+This compute is part of the EXTRA-COMPUTE package.  It is only enabled
+if LAMMPS was built with that package.  See the :doc:`Build package
+<Build_package>` page for more info.
+
+Related commands
+""""""""""""""""
+
+:doc:`compute sna/atom <compute_sna_atom>`
+
+Default
+"""""""
+
+none
+
+----------
+
+.. _Lafourcade2023_1:
+
+**(Lafourcade)** Lafourcade, Maillet, Denoual, Duval, Allera, Goryaeva, and Marinica,
+`Comp. Mat. Science, 230, 112534 (2023) <https://doi.org/10.1016/j.commatsci.2023.112534>`_
diff --git a/doc/src/compute_sna_atom.rst b/doc/src/compute_sna_atom.rst
index 8d06868f3d..179c362dc6 100644
--- a/doc/src/compute_sna_atom.rst
+++ b/doc/src/compute_sna_atom.rst
@@ -45,7 +45,7 @@ Syntax
 * w_1, w_2,... = list of neighbor weights, one for each type
 * nx, ny, nz = number of grid points in x, y, and z directions (positive integer)
 * zero or more keyword/value pairs may be appended
-* keyword = *rmin0* or *switchflag* or *bzeroflag* or *quadraticflag* or *chem* or *bnormflag* or *wselfallflag* or *bikflag* or *switchinnerflag* or *sinner* or *dinner* or *dgradflag*
+* keyword = *rmin0* or *switchflag* or *bzeroflag* or *quadraticflag* or *chem* or *bnormflag* or *wselfallflag* or *bikflag* or *switchinnerflag* or *sinner* or *dinner* or *dgradflag* or *nnn* or *wmode* or *delta*
 
   .. parsed-literal::
 
@@ -82,6 +82,16 @@ Syntax
           *0* = descriptor gradients are summed over atoms of each type
           *1* = descriptor gradients are listed separately for each atom pair
 
+* additional keyword = *nnn* or *wmode* or *delta*
+
+  .. parsed-literal::
+
+       *nnn* value = number of considered nearest neighbors to compute the bispectrum over a target specific number of neighbors (only implemented for compute sna/atom)
+       *wmode* value = weight function for finding optimal cutoff to match the target number of neighbors (required if nnn used, only implemented for compute sna/atom)
+          *0* = heavyside weight function
+          *1* = hyperbolic tangent weight function
+       *delta* value = transition interval centered at cutoff distance for hyperbolic tangent weight function (ignored if wmode=0, required if wmode=1, only implemented for compute sna/atom)
+
 Examples
 """"""""
 
@@ -94,6 +104,7 @@ Examples
    compute snap all snap 1.0 0.99363 6 3.81 3.83 1.0 0.93 chem 2 0 1
    compute snap all snap 1.0 0.99363 6 3.81 3.83 1.0 0.93 switchinnerflag 1 sinner 1.35 1.6 dinner 0.25 0.3
    compute bgrid all sna/grid/local 200 200 200 1.4 0.95 6 2.0 1.0
+   compute bnnn all sna/atom 9.0 0.99363 8 0.5 1.0 rmin0 0.0 nnn 24 wmode 1 delta 0.2
 
 Description
 """""""""""
@@ -433,6 +444,25 @@ requires that *bikflag=1*.
    The rerun script can use a :doc:`special_bonds <special_bonds>`
    command that includes all pairs in the neighbor list.
 
+The keyword *nnn* allows for the calculation of the bispectrum over a
+specific target number of neighbors. This option is only implemented for
+the compute *sna/atom*\ .  An optimal cutoff radius for defining the
+neighborhood of the central atom is calculated by means of a dichotomy
+algorithm.  This iterative process allows to assign weights to
+neighboring atoms in order to match the total sum of weights with the
+target number of neighbors.  Depending on the radial weight function
+used in that process, the cutoff radius can fluctuate a lot in the
+presence of thermal noise.  Therefore, in addition to the *nnn* keyword,
+the keyword *wmode* allows to choose whether a Heaviside (*wmode* = 0)
+function or a Hyperbolic tangent function (*wmode* = 1) should be used.
+If the Heaviside function is used, the cutoff radius exactly matches the
+distance between the central atom an its *nnn*'th neighbor.  However, in
+the case of the hyperbolic tangent function, the dichotomy algorithm
+allows to span the weights over a distance *delta* in order to reduce
+fluctuations in the resulting local atomic environment fingerprint.  The
+detailed formalism is given in the paper by Lafourcade et
+al. :ref:`(Lafourcade) <Lafourcade2023_2>`.
+
 ----------
 
 Output info
@@ -585,6 +615,7 @@ Related commands
 """"""""""""""""
 
 :doc:`pair_style snap <pair_snap>`
+:doc:`compute slcsa/atom <compute_slcsa_atom>`
 
 Default
 """""""
@@ -592,6 +623,7 @@ Default
 The optional keyword defaults are *rmin0* = 0,
 *switchflag* = 1, *bzeroflag* = 1, *quadraticflag* = 0,
 *bnormflag* = 0, *wselfallflag* = 0, *switchinnerflag* = 0,
+*nnn* = -1, *wmode* = 0, *delta* = 1.e-3
 
 ----------
 
@@ -623,3 +655,8 @@ of Angular Momentum, World Scientific, Singapore (1987).
 .. _Ellis2021:
 
 **(Ellis)** Ellis, Fiedler, Popoola, Modine, Stephens, Thompson, Cangi, Rajamanickam,  Phys Rev B, 104, 035120, (2021)
+
+.. _Lafourcade2023_2:
+
+**(Lafourcade)** Lafourcade, Maillet, Denoual, Duval, Allera, Goryaeva, and Marinica,
+`Comp. Mat. Science, 230, 112534 (2023) <https://doi.org/10.1016/j.commatsci.2023.112534>`_
diff --git a/doc/src/compute_stress_atom.rst b/doc/src/compute_stress_atom.rst
index 8a45171dfe..ffd0d2ffb4 100644
--- a/doc/src/compute_stress_atom.rst
+++ b/doc/src/compute_stress_atom.rst
@@ -127,11 +127,11 @@ result in more consistent heat flux values for angle, dihedrals,
 improper and constraint force contributions
 when computed via :doc:`compute heat/flux <compute_heat_flux>`.
 
-If no extra keywords are listed, the kinetic contribution all of the
-virial contribution terms are included in the per-atom stress tensor.
-If any extra keywords are listed, only those terms are summed to
-compute the tensor.  The *virial* keyword means include all terms
-except the kinetic energy *ke*\ .
+If no extra keywords are listed, the kinetic contribution *and* all
+of the virial contribution terms are included in the per-atom stress
+tensor.  If any extra keywords are listed, only those terms are
+summed to compute the tensor.  The *virial* keyword means include all
+terms except the kinetic energy *ke*\ .
 
 Note that the stress for each atom is due to its interaction with all
 other atoms in the simulation, not just with other atoms in the group.
diff --git a/doc/src/compute_stress_mop.rst b/doc/src/compute_stress_mop.rst
index 21c2963545..74d4c618e7 100644
--- a/doc/src/compute_stress_mop.rst
+++ b/doc/src/compute_stress_mop.rst
@@ -18,7 +18,7 @@ Syntax
 * style = *stress/mop* or *stress/mop/profile*
 * dir = *x* or *y* or *z* is the direction normal to the plane
 * args = argument specific to the compute style
-* keywords = *kin* or *conf* or *total* or *pair* or *bond* or *angle* (one or more can be specified)
+* keywords = *kin* or *conf* or *total* or *pair* or *bond* or *angle* or *dihedral* (one or more can be specified)
 
 .. parsed-literal::
 
@@ -68,15 +68,13 @@ Verlet algorithm.
 
 .. versionadded:: 15Jun2023
 
-   contributions from bond and angle potentials
+   contributions from bond, angle and dihedral potentials
 
-Between one and six keywords can be used to indicate which contributions
+Between one and seven keywords can be used to indicate which contributions
 to the stress must be computed: total stress (total), kinetic stress
 (kin), configurational stress (conf), stress due to bond stretching
-(bond), stress due to angle bending (angle) and/or due to pairwise
-non-bonded interactions (pair).  The angle keyword is currently
-available only for the *stress/mop* command and **not** the
-*stress/mop/profile* command.
+(bond), stress due to angle bending (angle), stress due to dihedral terms (dihedral)
+and/or due to pairwise non-bonded interactions (pair).
 
 NOTE 1: The configurational stress is computed considering all pairs of
 atoms where at least one atom belongs to group group-ID.
@@ -134,14 +132,9 @@ size does not change in time, and axis-aligned planes.
 The method only works with two-body pair interactions, because it
 requires the class method ``Pair::single()`` to be implemented, which is
 not possible for manybody potentials.  In particular, compute
-*stress/mop/profile* does not work with more than two-body pair
+*stress/mop/profile* and *stress/mop* do not work with more than two-body pair
 interactions, long range (kspace) interactions and
-angle/dihedral/improper intramolecular interactions. Similarly, compute
-*stress/mop* does not work with more than two-body pair interactions,
-long range (kspace) interactions and dihedral/improper intramolecular
-interactions but works with all bond interactions with the class method
-single() implemented and all angle interactions with the class method
-born_matrix() implemented.
+improper intramolecular interactions.
 
 Related commands
 """"""""""""""""
diff --git a/doc/src/dihedral_charmm.rst b/doc/src/dihedral_charmm.rst
index cc792693a2..a5652bc74e 100644
--- a/doc/src/dihedral_charmm.rst
+++ b/doc/src/dihedral_charmm.rst
@@ -3,6 +3,7 @@
 .. index:: dihedral_style charmm/kk
 .. index:: dihedral_style charmm/omp
 .. index:: dihedral_style charmmfsw
+.. index:: dihedral_style charmmfsw/kk
 
 dihedral_style charmm command
 =============================
@@ -12,6 +13,8 @@ Accelerator Variants: *charmm/intel*, *charmm/kk*, *charmm/omp*
 dihedral_style charmmfsw command
 ================================
 
+Accelerator Variants: *charmmfsw/kk*
+
 Syntax
 """"""
 
@@ -144,7 +147,9 @@ for more info.
 Related commands
 """"""""""""""""
 
-:doc:`dihedral_coeff <dihedral_coeff>`
+:doc:`dihedral_coeff <dihedral_coeff>`,
+:doc:`pair_style lj/charmm variants <pair_charmm>`,
+:doc:`angle_style charmm <angle_charmm>`, :doc:`fix cmap <fix_cmap>`
 
 Default
 """""""
diff --git a/doc/src/fix.rst b/doc/src/fix.rst
index 0889fe281f..69a7212487 100644
--- a/doc/src/fix.rst
+++ b/doc/src/fix.rst
@@ -287,6 +287,7 @@ accelerated styles exist.
 * :doc:`mvv/tdpd <fix_mvv_dpd>` - constant temperature DPD using the modified velocity-Verlet algorithm
 * :doc:`neb <fix_neb>` - nudged elastic band (NEB) spring forces
 * :doc:`neb/spin <fix_neb_spin>` - nudged elastic band (NEB) spring forces for spins
+* :doc:`nonaffine/displacement <fix_nonaffine_displacement>` - calculate nonaffine displacement of atoms
 * :doc:`nph <fix_nh>` - constant NPH time integration via Nose/Hoover
 * :doc:`nph/asphere <fix_nph_asphere>` - NPH for aspherical particles
 * :doc:`nph/body <fix_nph_body>` - NPH for body particles
diff --git a/doc/src/fix_adapt.rst b/doc/src/fix_adapt.rst
index 9cfbef7a11..f857a6333c 100644
--- a/doc/src/fix_adapt.rst
+++ b/doc/src/fix_adapt.rst
@@ -221,7 +221,7 @@ formulas for the meaning of these parameters:
 +------------------------------------------------------------------------------+--------------------------------------------------+-------------+
 | :doc:`table <pair_table>`                                                    | table_cutoff                                     | type pairs  |
 +------------------------------------------------------------------------------+--------------------------------------------------+-------------+
-| :doc:`ufm <pair_ufm>`                                                        | epsilon,sigma                                    | type pairs  |
+| :doc:`ufm <pair_ufm>`                                                        | epsilon,sigma,scale                              | type pairs  |
 +------------------------------------------------------------------------------+--------------------------------------------------+-------------+
 | :doc:`wf/cut <pair_wf_cut>`                                                  | epsilon,sigma,nu,mu                              | type pairs  |
 +------------------------------------------------------------------------------+--------------------------------------------------+-------------+
diff --git a/doc/src/fix_adapt_fep.rst b/doc/src/fix_adapt_fep.rst
index 1b2298cd96..474fc799de 100644
--- a/doc/src/fix_adapt_fep.rst
+++ b/doc/src/fix_adapt_fep.rst
@@ -123,19 +123,29 @@ styles and their energy formulas for the meaning of these parameters:
 +------------------------------------------------------------------------------+-------------------------+------------+
 | :doc:`born <pair_born>`                                                      | a,b,c                   | type pairs |
 +------------------------------------------------------------------------------+-------------------------+------------+
+| :doc:`born/gauss <pair_born_gauss>`                                          | biga0,biga1,r0          | type pairs |
++------------------------------------------------------------------------------+-------------------------+------------+
 | :doc:`buck, buck/coul/cut, buck/coul/long, buck/coul/msm  <pair_buck>`       | a,c                     | type pairs |
 +------------------------------------------------------------------------------+-------------------------+------------+
 | :doc:`buck/mdf <pair_mdf>`                                                   | a,c                     | type pairs |
 +------------------------------------------------------------------------------+-------------------------+------------+
-| :doc:`coul/cut <pair_coul>`                                                  | scale                   | type pairs |
+| :doc:`coul/cut, coul/cut/global <pair_coul>`                                 | scale                   | type pairs |
 +------------------------------------------------------------------------------+-------------------------+------------+
 | :doc:`coul/cut/soft <pair_fep_soft>`                                         | lambda                  | type pairs |
 +------------------------------------------------------------------------------+-------------------------+------------+
+| :doc:`coul/debye <pair_coul>`                                                | scale                   | type pairs |
++------------------------------------------------------------------------------+-------------------------+------------+
 | :doc:`coul/long, coul/msm <pair_coul>`                                       | scale                   | type pairs |
 +------------------------------------------------------------------------------+-------------------------+------------+
 | :doc:`coul/long/soft <pair_fep_soft>`                                        | scale, lambda           | type pairs |
 +------------------------------------------------------------------------------+-------------------------+------------+
-| :doc:`eam <pair_eam>`                                                        | scale                   | type pairs |
+| :doc:`coul/slater/long <pair_coul_slater>`                                   | scale                   | type pairs |
++------------------------------------------------------------------------------+-------------------------+------------+
+| :doc:`coul/streitz <pair_coul>`                                              | scale                   | type pairs |
++------------------------------------------------------------------------------+-------------------------+------------+
+| :doc:`eam, eam/alloy, eam/fs <pair_eam>`                                     | scale                   | type pairs |
++------------------------------------------------------------------------------+-------------------------+------------+
+| :doc:`harmonic/cut <pair_harmonic_cut>`                                      | k                       | type pairs |
 +------------------------------------------------------------------------------+-------------------------+------------+
 | :doc:`gauss <pair_gauss>`                                                    | a                       | type pairs |
 +------------------------------------------------------------------------------+-------------------------+------------+
@@ -163,6 +173,8 @@ styles and their energy formulas for the meaning of these parameters:
 +------------------------------------------------------------------------------+-------------------------+------------+
 | :doc:`lj/sf/dipole/sf <pair_dipole>`                                         | epsilon,sigma,scale     | type pairs |
 +------------------------------------------------------------------------------+-------------------------+------------+
+| :doc:`meam <pair_meam>`                                                      | scale                   | type pairs |
++------------------------------------------------------------------------------+-------------------------+------------+
 | :doc:`mie/cut <pair_mie>`                                                    | epsilon,sigma,gamR,gamA | type pairs |
 +------------------------------------------------------------------------------+-------------------------+------------+
 | :doc:`morse, morse/smooth/linear <pair_morse>`                               | d0,r0,alpha             | type pairs |
@@ -173,12 +185,16 @@ styles and their energy formulas for the meaning of these parameters:
 +------------------------------------------------------------------------------+-------------------------+------------+
 | :doc:`nm/cut/coul/cut, nm/cut/coul/long <pair_nm>`                           | e0,r0,nn,mm             | type pairs |
 +------------------------------------------------------------------------------+-------------------------+------------+
+| :doc:`pace, pace/extrapolation <pair_pace>`                                  | scale                   | type pairs |
++------------------------------------------------------------------------------+-------------------------+------------+
 | :doc:`snap <pair_snap>`                                                      | scale                   | type pairs |
 +------------------------------------------------------------------------------+-------------------------+------------+
 | :doc:`soft <pair_soft>`                                                      | a                       | type pairs |
 +------------------------------------------------------------------------------+-------------------------+------------+
 | :doc:`ufm <pair_ufm>`                                                        | epsilon,sigma,scale     | type pairs |
 +------------------------------------------------------------------------------+-------------------------+------------+
+| :doc:`wf/cut <pair_wf_cut>`                                                  | epsilon,sigma,nu,mu     | type pairs |
++------------------------------------------------------------------------------+-------------------------+------------+
 
 .. note::
 
diff --git a/doc/src/fix_halt.rst b/doc/src/fix_halt.rst
index 4231c77cc5..25331804aa 100644
--- a/doc/src/fix_halt.rst
+++ b/doc/src/fix_halt.rst
@@ -183,4 +183,4 @@ Related commands
 Default
 """""""
 
-The option defaults are error = hard, message = yes, and path = ".".
+The option defaults are error = soft, message = yes, and path = ".".
diff --git a/doc/src/fix_neb.rst b/doc/src/fix_neb.rst
index ccd0f9b83d..51066675b8 100644
--- a/doc/src/fix_neb.rst
+++ b/doc/src/fix_neb.rst
@@ -109,7 +109,7 @@ Note that in this case the specified *Kspring* is in
 force/distance units.
 
 With a value of *ideal*, the spring force is computed as suggested in
-ref`(WeinanE) <WeinanE>`
+:ref:`(WeinanE) <WeinanE>`
 
 .. math::
 
@@ -120,18 +120,18 @@ and :math:`RD_{ideal}` is the ideal *RD* for which all the images are
 equally spaced.  I.e. :math:`RD_{ideal} = (i-1) \cdot meanDist` when the
 climbing replica is off, where *i* is the replica number).  The
 *meanDist* is the average distance between replicas.  Note that in this
-case the specified *Kspring* is in force units. When the climbing replica
-is on, :math:`RD_{ideal}` and :math:`meanDist` are calculated separately
-each side of the climbing image. Note that the *ideal* form of nudging
-can often be more effective at keeping the replicas equally spaced before
-climbing, then equally spaced either side of the climbing image whilst
-climbing.
+case the specified *Kspring* is in force units. When the climbing
+replica is on, :math:`RD_{ideal}` and :math:`meanDist` are calculated
+separately each side of the climbing image. Note that the *ideal* form
+of nudging can often be more effective at keeping the replicas equally
+spaced before climbing, then equally spaced either side of the climbing
+image whilst climbing.
 
-With a value of *equal* the spring force is computed as for *ideal*
-when the climbing replica is off, promoting equidistance. When the climbing
+With a value of *equal* the spring force is computed as for *ideal* when
+the climbing replica is off, promoting equidistance. When the climbing
 replica is on, the spring force is computed to promote equidistant
-absolute differences in energy, rather than distance, each side of
-the climbing image:
+absolute differences in energy, rather than distance, each side of the
+climbing image:
 
 .. math::
 
@@ -143,23 +143,22 @@ where *ED* is the cumulative sum of absolute energy differences:
 
    ED = \sum_{i<N} \left|E(R_{i+1}) - E(R_i)\right|,
 
-*meanEdist* is the average absolute energy difference between
-replicas up to the climbing image or from the climbing image
-to the final image, for images before or after the climbing
-image respectively. :math:`ED_{ideal}` is the corresponding
-cumulative sum of average absolute energy differences in
-each case, in close analogy to *ideal*. This form of nudging
-is to aid schemes which integrate forces along, or near to,
-NEB pathways such as :doc:`fix_pafi <fix_pafi>`.
+*meanEdist* is the average absolute energy difference between replicas
+up to the climbing image or from the climbing image to the final image,
+for images before or after the climbing image
+respectively. :math:`ED_{ideal}` is the corresponding cumulative sum of
+average absolute energy differences in each case, in close analogy to
+*ideal*. This form of nudging is to aid schemes which integrate forces
+along, or near to, NEB pathways such as :doc:`fix_pafi <fix_pafi>`.
 
 ----------
 
-The keyword *perp* specifies if and how a perpendicular nudging force
-is computed.  It adds a spring force perpendicular to the path in
-order to prevent the path from becoming too strongly kinked.  It can
+The keyword *perp* specifies if and how a perpendicular nudging force is
+computed.  It adds a spring force perpendicular to the path in order to
+prevent the path from becoming too strongly kinked.  It can
 significantly improve the convergence of the NEB calculation when the
-resolution is poor.  I.e. when few replicas are used; see
-:ref:`(Maras) <Maras1>` for details.
+resolution is poor.  I.e. when few replicas are used; see :ref:`(Maras)
+<Maras1>` for details.
 
 The perpendicular spring force is given by
 
@@ -181,10 +180,10 @@ force is added.
 
 By default, no additional forces act on the first and last replicas
 during the NEB relaxation, so these replicas simply relax toward their
-respective local minima.  By using the key word *end*, additional
-forces can be applied to the first and/or last replicas, to enable
-them to relax toward a MEP while constraining their energy E to the
-target energy ETarget.
+respective local minima.  By using the key word *end*, additional forces
+can be applied to the first and/or last replicas, to enable them to
+relax toward a MEP while constraining their energy E to the target
+energy ETarget.
 
 If :math:`E_{Target} > E`, the interatomic force :math:`F_i` for the
 specified replica becomes:
@@ -197,33 +196,33 @@ specified replica becomes:
 The "spring" constant on the difference in energies is the specified
 *Kspring3* value.
 
-When *estyle* is specified as *first*, the force is applied to the
-first replica.  When *estyle* is specified as *last*, the force is
-applied to the last replica.  Note that the *end* keyword can be used
-twice to add forces to both the first and last replicas.
+When *estyle* is specified as *first*, the force is applied to the first
+replica.  When *estyle* is specified as *last*, the force is applied to
+the last replica.  Note that the *end* keyword can be used twice to add
+forces to both the first and last replicas.
 
 For both these *estyle* settings, the target energy *ETarget* is set
 to the initial energy of the replica (at the start of the NEB
 calculation).
 
 If the *estyle* is specified as *last/efirst* or *last/efirst/middle*,
-force is applied to the last replica, but the target energy *ETarget*
-is continuously set to the energy of the first replica, as it evolves
+force is applied to the last replica, but the target energy *ETarget* is
+continuously set to the energy of the first replica, as it evolves
 during the NEB relaxation.
 
 The difference between these two *estyle* options is as follows.  When
 *estyle* is specified as *last/efirst*, no change is made to the
-inter-replica force applied to the intermediate replicas (neither
-first or last).  If the initial path is too far from the MEP, an
-intermediate replica may relax "faster" and reach a lower energy than
-the last replica.  In this case the intermediate replica will be
-relaxing toward its own local minima.  This behavior can be prevented
-by specifying *estyle* as *last/efirst/middle* which will alter the
-inter-replica force applied to intermediate replicas by removing the
-contribution of the gradient to the inter-replica force.  This will
-only be done if a particular intermediate replica has a lower energy
-than the first replica.  This should effectively prevent the
-intermediate replicas from over-relaxing.
+inter-replica force applied to the intermediate replicas (neither first
+or last).  If the initial path is too far from the MEP, an intermediate
+replica may relax "faster" and reach a lower energy than the last
+replica.  In this case the intermediate replica will be relaxing toward
+its own local minima.  This behavior can be prevented by specifying
+*estyle* as *last/efirst/middle* which will alter the inter-replica
+force applied to intermediate replicas by removing the contribution of
+the gradient to the inter-replica force.  This will only be done if a
+particular intermediate replica has a lower energy than the first
+replica.  This should effectively prevent the intermediate replicas from
+over-relaxing.
 
 After converging a NEB calculation using an *estyle* of
 *last/efirst/middle*, you should check that all intermediate replicas
@@ -237,9 +236,10 @@ target energy.
 Restart, fix_modify, output, run start/stop, minimize info
 """""""""""""""""""""""""""""""""""""""""""""""""""""""""""
 
-No information about this fix is written to :doc:`binary restart files <restart>`.  None of the :doc:`fix_modify <fix_modify>` options
-are relevant to this fix.  No global or per-atom quantities are stored
-by this fix for access by various :doc:`output commands <Howto_output>`.
+No information about this fix is written to :doc:`binary restart files
+<restart>`.  None of the :doc:`fix_modify <fix_modify>` options are
+relevant to this fix.  No global or per-atom quantities are stored by
+this fix for access by various :doc:`output commands <Howto_output>`.
 No parameter of this fix can be used with the *start/stop* keywords of
 the :doc:`run <run>` command.
 
diff --git a/doc/src/fix_nh_uef.rst b/doc/src/fix_nh_uef.rst
index a515375746..60d47994f0 100644
--- a/doc/src/fix_nh_uef.rst
+++ b/doc/src/fix_nh_uef.rst
@@ -23,7 +23,7 @@ Syntax
   .. parsed-literal::
 
      keyword = *erate* or *ext* or *strain* or *temp* or *iso* or *x* or *y* or *z* or *tchain* or *pchain* or *tloop* or *ploop* or *mtk*
-       *erate* values = e_x e_y = engineering strain rates (required)
+       *erate* values = e_x e_y = true strain rates (required)
        *ext* value = *x* or *y* or *z* or *xy* or *yz* or *xz* = external dimensions
          sets the external dimensions used to calculate the scalar pressure
        *strain* values = e_x e_y = initial strain
@@ -62,7 +62,7 @@ performed using the :doc:`fix deform <fix_deform>`, :doc:`fix nvt/sllod
 <fix_nvt_sllod>`, and :doc:`compute temp/deform <compute_temp_deform>`
 commands.
 
-The applied flow field is set by the *eps* keyword. The values
+The applied flow field is set by the *erate* keyword. The values
 *edot_x* and *edot_y* correspond to the strain rates in the xx and yy
 directions.  It is implicitly assumed that the flow field is
 traceless, and therefore the strain rate in the zz direction is eqal
diff --git a/doc/src/fix_nonaffine_displacement.rst b/doc/src/fix_nonaffine_displacement.rst
new file mode 100644
index 0000000000..363b0a747a
--- /dev/null
+++ b/doc/src/fix_nonaffine_displacement.rst
@@ -0,0 +1,133 @@
+.. index:: fix nonaffine/displacement
+
+fix nonaffine/displacement command
+==================================
+
+Syntax
+""""""
+
+.. parsed-literal::
+
+   fix ID group nonaffine/displacement style args reference/style nstep
+
+* ID, group are documented in :doc:`fix <fix>` command
+* nonaffine/displacement = style name of this fix command
+* nevery = calculate nonaffine displacement every this many timesteps
+* style = *d2min* or *integrated*
+
+  .. parsed-literal::
+
+       *d2min* args = cutoff args
+         cutoff = *type* or *radius* or *custom*
+           *type* args = none, cutoffs determined by atom types
+           *radius* args = none, cutoffs determined based on atom diameters (atom style sphere)
+           *custom* args = *rmax*, cutoff set by a constant numeric value *rmax* (distance units)
+       *integrated* args = none
+
+* reference/style = *fixed* or *update* or *offset*
+
+  .. parsed-literal::
+
+       *fixed* = use a fixed reference frame at *nstep*
+       *update* = update the reference frame every *nstep* timesteps
+       *offset* = update the reference frame *nstep* timesteps before calculating the nonaffine displacement
+
+Examples
+""""""""
+
+.. code-block:: LAMMPS
+
+   fix 1 all nonaffine/displacement 100 integrated update 100
+   fix 1 all nonaffine/displacement 1000 d2min type fixed 0
+   fix 1 all nonaffine/displacement 1000 d2min custom 2.0 offset 100
+
+Description
+"""""""""""
+
+.. versionadded:: TBD
+
+This fix computes different metrics of the nonaffine displacement of
+particles. The first metric, *d2min* calculates the :math:`D^2_\mathrm{min}`
+nonaffine displacement by Falk and Langer in :ref:`(Falk) <d2min-Falk>`.
+For each atom, the fix computes the two tensors
+
+.. math::
+
+   X = \sum_{\mathrm{neighbors}} \vec{r} \left(\vec{r}_{0} \right)^T
+
+and
+
+.. math::
+
+   Y = \sum_{\mathrm{neighbors}} \vec{r}_0 \left(\vec{r}_{0} \right)^T
+
+where the neighbors include all other atoms within the distance criterion
+set by the cutoff option, discussed below, :math:`\vec{r}` is the current
+displacement between particles, and :math:`\vec{r}_0` is the reference
+displacement. A deformation gradient tensor is then calculated as
+:math:`F = X Y^{-1}` from which
+
+.. math::
+
+    D^2_\mathrm{min} = \sum_{\mathrm{neighbors}} \left| \vec{r} - F \vec{r}_0 \right|^2
+
+and a strain tensor is calculated :math:`E = F F^{T} - I` where :math:`I`
+is the identity tensor. This calculation is only performed on timesteps that
+are a multiple of *nevery* (including timestep zero). Data accessed before
+this occurs will simply be zeroed.
+
+The *integrated* style simply integrates the velocity of particles
+every timestep to calculate a displacement. This style only works if
+used in conjunction with another fix that deforms the box and displaces
+atom positions such as :doc:`fix deform <fix_deform>` with remap x,
+:doc:`fix press/berendsen <fix_press_berendsen>`, or :doc:`fix nh <fix_nh>`.
+
+Both of these methods require defining a reference state. With the *fixed* reference
+style, the user picks a specific timestep *nstep* at which particle positions are saved.
+If peratom data is accessed from this compute prior to this timestep, it will simply be
+zeroed. The *update* reference style implies the reference state will be updated every
+*nstep* timesteps. The *offset* reference only applies to the *d2min* metric and will
+update the reference state *nstep* timesteps before a multiple of *nevery* timesteps.
+
+
+----------
+
+Restart, fix_modify, output, run start/stop, minimize info
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+The reference state is saved to :doc:`binary restart files <restart>`.
+
+None of the :doc:`fix_modify <fix_modify>` options are relevant to this
+fix.
+
+This fix computes a peratom array with 3 columns, which can be accessed
+by indices 1-3 using any command that uses per-atom values from a fix
+as input.
+
+For the *integrated* style, the three columns are the nonaffine
+displacements in the x, y, and z directions. For the *d2min* style,
+the three columns are the calculated :math:`\sqrt{D^2_\mathrm{min}}`, the
+volumetric strain, and the deviatoric strain.
+
+Restrictions
+""""""""""""
+
+This compute is part of the EXTRA-FIX package.  It is only enabled if
+LAMMPS was built with that package.  See the
+:doc:`Build package <Build_package>` page for more info.
+
+Related commands
+""""""""""""""""
+
+none
+
+Default
+"""""""
+
+none
+
+----------
+
+.. _d2min-Falk:
+
+**(Falk)** Falk and Langer PRE, 57, 7192 (1998).
diff --git a/doc/src/fix_phonon.rst b/doc/src/fix_phonon.rst
index e99d2b6891..6c336dc091 100644
--- a/doc/src/fix_phonon.rst
+++ b/doc/src/fix_phonon.rst
@@ -181,10 +181,10 @@ This fix assumes a crystalline system with periodical lattice. The
 temperature of the system should not exceed the melting temperature to
 keep the system in its solid state.
 
-This fix is part of the PHONON package.  It is only enabled if
-LAMMPS was built with that package.  See the :doc:`Build package <Build_package>` page for more info.
-
-This fix requires LAMMPS be built with an FFT library.  See the :doc:`Build settings <Build_settings>` page for details.
+This fix is part of the PHONON package.  It is only enabled if LAMMPS
+was built with that package.  This fix also requires LAMMPS to be built
+with 3d-FFT support which is included in the KSPACE package.  See the
+:doc:`Build package <Build_package>` page for more info.
 
 Related commands
 """"""""""""""""
diff --git a/doc/src/fix_qeq.rst b/doc/src/fix_qeq.rst
index bace7af0ca..f353e9a998 100644
--- a/doc/src/fix_qeq.rst
+++ b/doc/src/fix_qeq.rst
@@ -232,8 +232,6 @@ These fixes are part of the QEQ package.  They are only enabled if
 LAMMPS was built with that package.  See the :doc:`Build package
 <Build_package>` page for more info.
 
-These qeq fixes are not compatible with the GPU and USER-INTEL packages.
-
 These qeq fixes will ignore electric field contributions from
 :doc:`fix efield <fix_efield>`.
 
diff --git a/doc/src/fix_rigid.rst b/doc/src/fix_rigid.rst
index a50e215681..3174a0929c 100644
--- a/doc/src/fix_rigid.rst
+++ b/doc/src/fix_rigid.rst
@@ -80,7 +80,7 @@ Syntax
          groupID1, groupID2, ... = list of N group IDs
 
 * zero or more keyword/value pairs may be appended
-* keyword = *langevin* or *reinit* or *temp* or *iso* or *aniso* or *x* or *y* or *z* or *couple* or *tparam* or *pchain* or *dilate* or *force* or *torque* or *infile* or *gravity*
+* keyword = *langevin* or *reinit* or *temp*  or *mol* or *iso* or *aniso* or *x* or *y* or *z* or *couple* or *tparam* or *pchain* or *dilate* or *force* or *torque* or *infile* or *gravity*
 
   .. parsed-literal::
 
@@ -92,6 +92,8 @@ Syntax
        *temp* values = Tstart Tstop Tdamp
          Tstart,Tstop = desired temperature at start/stop of run (temperature units)
          Tdamp = temperature damping parameter (time units)
+       *mol* value = template-ID
+         template-ID = ID of molecule template specified in a separate :doc:`molecule <molecule>` command
        *iso* or *aniso* values = Pstart Pstop Pdamp
          Pstart,Pstop = scalar external pressure at start/end of run (pressure units)
          Pdamp = pressure damping parameter (time units)
diff --git a/doc/src/fix_temp_berendsen.rst b/doc/src/fix_temp_berendsen.rst
index 67e496e6c5..541f3191d5 100644
--- a/doc/src/fix_temp_berendsen.rst
+++ b/doc/src/fix_temp_berendsen.rst
@@ -1,8 +1,11 @@
 .. index:: fix temp/berendsen
+.. index:: fix temp/berendsen/kk
 
 fix temp/berendsen command
 ==========================
 
+Accelerator Variants: *temp/berendsen/kk*
+
 Syntax
 """"""
 
@@ -118,6 +121,10 @@ remaining thermal degrees of freedom, and the bias is added back in.
 
 ----------
 
+.. include:: accel_styles.rst
+
+----------
+
 Restart, fix_modify, output, run start/stop, minimize info
 """""""""""""""""""""""""""""""""""""""""""""""""""""""""""
 
diff --git a/doc/src/fix_temp_rescale.rst b/doc/src/fix_temp_rescale.rst
index bfdcaa90f8..2dd2178346 100644
--- a/doc/src/fix_temp_rescale.rst
+++ b/doc/src/fix_temp_rescale.rst
@@ -1,8 +1,11 @@
 .. index:: fix temp/rescale
+.. index:: fix temp/rescale/kk
 
 fix temp/rescale command
 ========================
 
+Accelerator Variants: *temp/rescale/kk*
+
 Syntax
 """"""
 
@@ -125,6 +128,10 @@ remaining thermal degrees of freedom, and the bias is added back in.
 
 ----------
 
+.. include:: accel_styles.rst
+
+----------
+
 Restart, fix_modify, output, run start/stop, minimize info
 """""""""""""""""""""""""""""""""""""""""""""""""""""""""""
 
diff --git a/doc/src/img/saed_mesh.jpg b/doc/src/img/saed_mesh.jpg
deleted file mode 100644
index 7b0bf4117f..0000000000
Binary files a/doc/src/img/saed_mesh.jpg and /dev/null differ
diff --git a/doc/src/img/saed_mesh.png b/doc/src/img/saed_mesh.png
new file mode 100644
index 0000000000..bd558ce072
Binary files /dev/null and b/doc/src/img/saed_mesh.png differ
diff --git a/doc/src/molecule.rst b/doc/src/molecule.rst
index 480e175e7b..e1770ced2a 100644
--- a/doc/src/molecule.rst
+++ b/doc/src/molecule.rst
@@ -126,14 +126,50 @@ molecule (header keyword = inertia).
 Format of a molecule file
 """""""""""""""""""""""""
 
-The format of an individual molecule file is similar but
-(not identical) to the data file read by the :doc:`read_data <read_data>`
-commands, and is as follows.
+The format of an individual molecule file looks similar but is
+different than that of a data file read by the :doc:`read_data <read_data>`
+commands.  Here is a simple example for a TIP3P water molecule:
+
+.. code-block::
+
+   # Water molecule. TIP3P geometry
+   # header section:
+   3 atoms
+   2 bonds
+   1 angles
+
+   # body section:
+   Coords
+
+   1    0.00000  -0.06556   0.00000
+   2    0.75695   0.52032   0.00000
+   3   -0.75695   0.52032   0.00000
+
+   Types
+
+   1        1   # O
+   2        2   # H
+   3        2   # H
+
+   Charges
+
+   1       -0.834
+   2        0.417
+   3        0.417
+
+   Bonds
+
+   1   1      1      2
+   2   1      1      3
+
+   Angles
+
+   1   1      2      1      3
 
 A molecule file has a header and a body.  The header appears first.  The
-first line of the header and thus of the molecule file is *always* skipped;
-it typically contains a description of the file or a comment from the software
-that created the file.
+first line of the header and thus of the molecule file is *always*
+skipped; it typically contains a description of the file or a comment
+from the software that created the file.
 
 Then lines are read one line at a time.  Lines can have a trailing
 comment starting with '#' that is ignored.  There *must* be at least one
@@ -154,25 +190,66 @@ These are the recognized header keywords.  Header lines can come in
 any order.  The numeric value(s) are read from the beginning of the
 line.  The keyword should appear at the end of the line.  All these
 settings have default values, as explained below.  A line need only
-appear if the value(s) are different than the default.
+appear if the value(s) are different than the default, except when
+defining a *body* particle, which requires setting the number of
+*atoms* to 1, and setting the *inertia* in a specific section (see below).
 
-* N *atoms* = # of atoms N in molecule, default = 0
-* Nb *bonds* = # of bonds Nb in molecule, default = 0
-* Na *angles* = # of angles Na in molecule, default = 0
-* Nd *dihedrals* = # of dihedrals Nd in molecule, default = 0
-* Ni *impropers* = # of impropers Ni in molecule, default = 0
-* Nf *fragments* = # of fragments in molecule, default = 0
-* Mtotal *mass* = total mass of molecule
-* Xc Yc Zc *com* = coordinates of center-of-mass of molecule
-* Ixx Iyy Izz Ixy Ixz Iyz *inertia* = 6 components of inertia tensor of molecule
+   .. list-table::
+      :header-rows: 1
+      :widths: auto
 
-For *mass*, *com*, and *inertia*, the default is for LAMMPS to
-calculate this quantity itself if needed, assuming the molecules
-consists of a set of point particles or finite-size particles (with a
-non-zero diameter) that do not overlap.  If finite-size particles in
-the molecule do overlap, LAMMPS will not account for the overlap
-effects when calculating any of these 3 quantities, so you should
-pre-compute them yourself and list the values in the file.
+      * - Number(s)
+        - Keyword
+        - Meaning
+        - Default Value
+      * - N
+        - atoms
+        - # of atoms N in molecule
+        - 0
+      * - Nb
+        - bonds
+        - # of bonds Nb in molecule
+        - 0
+      * - Na
+        - angles
+        - # of angles Na in molecule
+        - 0
+      * - Nd
+        - dihedrals
+        - # of dihedrals Nd in molecule
+        - 0
+      * - Ni
+        - impropers
+        - # of impropers Ni in molecule
+        - 0
+      * - Nf
+        - fragments
+        - # of fragments Nf in molecule
+        - 0
+      * - Ninteger Ndouble
+        - body
+        - # of integer and floating-point values in body particle
+        - 0
+      * - Mtotal
+        - mass
+        - total mass of molecule
+        - computed
+      * - Xc Yc Zc
+        - com
+        - coordinates of center-of-mass of molecule
+        - computed
+      * - Ixx Iyy Izz Ixy Ixz Iyz
+        - inertia
+        - 6 components of inertia tensor of molecule
+        - computed
+
+For *mass*, *com*, and *inertia*, the default is for LAMMPS to calculate
+this quantity itself if needed, assuming the molecules consist of a set
+of point particles or finite-size particles (with a non-zero diameter)
+that do **not** overlap.  If finite-size particles in the molecule
+**do** overlap, LAMMPS will not account for the overlap effects when
+calculating any of these 3 quantities, so you should pre-compute them
+yourself and list the values in the file.
 
 The mass and center-of-mass coordinates (Xc,Yc,Zc) are
 self-explanatory.  The 6 moments of inertia (ixx,iyy,izz,ixy,ixz,iyz)
@@ -184,10 +261,11 @@ internally.
 
 These are the allowed section keywords for the body of the file.
 
-* *Coords, Types, Molecules, Fragments, Charges, Diameters, Masses* = atom-property sections
+* *Coords, Types, Molecules, Fragments, Charges, Diameters, Dipoles, Masses* = atom-property sections
 * *Bonds, Angles, Dihedrals, Impropers* = molecular topology sections
 * *Special Bond Counts, Special Bonds* = special neighbor info
 * *Shake Flags, Shake Atoms, Shake Bond Types* = SHAKE info
+* *Body Integers, Body Doubles* = body-property sections
 
 For the Types, Bonds, Angles, Dihedrals, and Impropers sections, each
 atom/bond/angle/etc type can be specified either as a number (numeric
@@ -298,6 +376,21 @@ not listed, the default diameter of each atom in the molecule is 1.0.
 
 ----------
 
+.. versionadded:: TBD
+
+*Dipoles* section:
+
+* one line per atom
+* line syntax: ID mux muy muz
+* mux,muy,muz = x-, y-, and z-component of point dipole vector of atom
+
+This section is only allowed for :doc:`atom styles <atom_style>` that
+support particles with point dipoles, e.g. atom_style dipole.  If not
+listed, the default dipole component of each atom in the molecule is set
+to 0.0.
+
+----------
+
 *Masses* section:
 
 * one line per atom
@@ -515,6 +608,67 @@ of SHAKE clusters.
 
 ----------
 
+*Body Integers* section:
+
+* one line
+* line syntax: N E F
+* N = number of sub-particles or number or vertices
+* E,F = number of edges and faces
+
+This section is only needed when the molecule is a body particle. the other
+Body section must also appear in the file.
+
+The total number of values that must appear is determined by the body style, and
+must be equal to the Ninteger value given in the *body* header.
+
+For *nparticle* and *rounded/polygon*, only the number of sub-particles or
+vertices N is required, and Ninteger should have a value of 1.
+
+For *rounded/polyhedron*, the number of edges E and faces F is required, and
+Ninteger should have a value of 3.
+
+See the :doc:`Howto body <Howto_body>` page for a further description of
+the file format.
+
+----------
+
+*Body Doubles* section:
+
+* first line
+* line syntax: Ixx Iyy Izz Ixy Ixz Iyz
+* Ixx Iyy Izz Ixy Ixz Iyz = 6 components of inertia tensor of body particle
+* one line per sub-particle or vertex
+* line syntax: x y z
+* x, y, z = coordinates of sub-particle or vertex
+* one line per edge
+* line syntax: N1 N2
+* N1, N2 = vertex indices
+* one line per face
+* line syntax: N1 N2 N3 N4
+* N1, N2, N3, N4 = vertex indices
+* last line
+* line syntax: diam
+* diam = rounded diameter that surrounds each vertex
+
+This section is only needed when the molecule is a body particle. the other
+Body section must also appear in the file.
+
+The total number of values that must appear is determined by the body style, and
+must be equal to the Ndouble value given in the *body* header. The 6 moments of
+inertia and the 3N coordinates of the sub-particles or vertices are required
+for all body styles.
+
+For *rounded/polygon*, the E = 6 + 3*N + 1 edges are automatically determined
+from the vertices.
+
+For *rounded/polyhedron*, the 2E vertex indices for the end points of the edges
+and 4F vertex indices defining the faces are required.
+
+See the :doc:`Howto body <Howto_body>` page for a further description of
+the file format.
+
+----------
+
 Restrictions
 """"""""""""
 
diff --git a/doc/src/neb.rst b/doc/src/neb.rst
index 0bc5de010b..b626796b6b 100644
--- a/doc/src/neb.rst
+++ b/doc/src/neb.rst
@@ -10,7 +10,7 @@ Syntax
 
    neb etol ftol N1 N2 Nevery file-style arg keyword values
 
-* etol = stopping tolerance for energy (energy units)
+* etol = stopping tolerance for energy (dimensionless)
 * ftol = stopping tolerance for force (force units)
 * N1 = max # of iterations (timesteps) to run initial NEB
 * N2 = max # of iterations (timesteps) to run barrier-climbing NEB
@@ -89,10 +89,11 @@ potentials, and the starting configuration when the neb command is
 issued should be the same for every replica.
 
 In a NEB calculation each replica is connected to other replicas by
-inter-replica nudging forces.  These forces are imposed by the :doc:`fix neb <fix_neb>` command, which must be used in conjunction with the
-neb command.  The group used to define the fix neb command defines the
-NEB atoms which are the only ones that inter-replica springs are
-applied to.  If the group does not include all atoms, then non-NEB
+inter-replica nudging forces.  These forces are imposed by the
+:doc:`fix neb <fix_neb>` command, which must be used in conjunction
+with the neb command.  The group used to define the fix neb command
+defines the NEB atoms which are the only ones that inter-replica springs
+are applied to.  If the group does not include all atoms, then non-NEB
 atoms have no inter-replica springs and the forces they feel and their
 motion is computed in the usual way due only to other atoms within
 their replica.  Conceptually, the non-NEB atoms provide a background
@@ -445,7 +446,7 @@ Related commands
 """"""""""""""""
 
 :doc:`prd <prd>`, :doc:`temper <temper>`, :doc:`fix langevin <fix_langevin>`,
-:doc:`fix viscous <fix_viscous>`
+:doc:`fix viscous <fix_viscous>`, :doc:`fix neb <fix_neb>`
 
 Default
 """""""
diff --git a/doc/src/package.rst b/doc/src/package.rst
index 63a7f095ad..212a06258c 100644
--- a/doc/src/package.rst
+++ b/doc/src/package.rst
@@ -344,12 +344,10 @@ specify additional flags for the runtime build.
 
 ----------
 
-The *intel* style invokes settings associated with the use of the
-INTEL package.  All of its settings, except the *omp* and *mode*
-keywords, are ignored if LAMMPS was not built with Xeon Phi
-co-processor support.  All of its settings, including the *omp* and
-*mode* keyword are applicable if LAMMPS was built with co-processor
-support.
+The *intel* style invokes settings associated with the use of the INTEL
+package.  The keywords *balance*, *ghost*, *tpc*, and *tptask* are
+**only** applicable if LAMMPS was built with Xeon Phi co-processor
+support and are otherwise ignored.
 
 The *Nphi* argument sets the number of co-processors per node.
 This can be set to any value, including 0, if LAMMPS was not
@@ -474,13 +472,13 @@ If the *neigh/thread* keyword is set to *off*, then the KOKKOS package
 threads only over atoms. However, for small systems, this may not expose
 enough parallelism to keep a GPU busy. When this keyword is set to *on*,
 the KOKKOS package threads over both atoms and neighbors of atoms. When
-using *neigh/thread* *on*, a full neighbor list must also be used. Using
-*neigh/thread* *on* may be slower for large systems, so this this option
-is turned on by default only when there are 16K atoms or less owned by
-an MPI rank and when using a full neighbor list. Not all KOKKOS-enabled
-potentials support this keyword yet, and only thread over atoms. Many
-simple pairwise potentials such as Lennard-Jones do support threading
-over both atoms and neighbors.
+using *neigh/thread* *on*, the :doc:`newton pair <newton>` setting must
+be "off". Using *neigh/thread* *on* may be slower for large systems, so
+this this option is turned on by default only when running on one or
+more GPUs and there are 16k atoms or less owned by an MPI rank. Not all
+KOKKOS-enabled potentials support this keyword yet, and only thread over
+atoms. Many simple pairwise potentials such as Lennard-Jones do support
+threading over both atoms and neighbors.
 
 If the *neigh/transpose* keyword is set to *off*, then the KOKKOS
 package will use the same memory layout for building the neighbor list on
@@ -732,7 +730,7 @@ comm = device, sort = device, neigh/transpose = off, gpu/aware = on. When
 LAMMPS can safely detect that GPU-aware MPI is not available, the default value
 of gpu/aware becomes "off". For CPUs or Xeon Phis, the option defaults are
 neigh = half, neigh/qeq = half, newton = on, binsize = 0.0, comm = no, and sort
-= no.  The option neigh/thread = on when there are 16K atoms or less on an MPI
+= no. For GPUs, option neigh/thread = on when there are 16k atoms or less on an MPI
 rank, otherwise it is "off". These settings are made automatically by the
 required "-k on" :doc:`command-line switch <Run_options>`. You can change them
 by using the package kokkos command in your input script or via the :doc:`-pk
diff --git a/doc/src/pair_aip_water_2dm.rst b/doc/src/pair_aip_water_2dm.rst
index 5cee40edda..b84202e69e 100644
--- a/doc/src/pair_aip_water_2dm.rst
+++ b/doc/src/pair_aip_water_2dm.rst
@@ -22,13 +22,24 @@ Examples
 .. code-block:: LAMMPS
 
    pair_style  hybrid/overlay aip/water/2dm 16.0 1
-   pair_coeff  * * aip/water/2dm  COH.aip.water.2dm C Ow Hw
+   pair_coeff  * * aip/water/2dm  CBNOH.aip.water.2dm C Ow Hw
 
    pair_style  hybrid/overlay aip/water/2dm 16.0 lj/cut/tip4p/long 2 3 1 1 0.1546 10 8.5
-   pair_coeff  2 2   lj/cut/tip4p/long    8.0313e-3  3.1589  # O-O
-   pair_coeff  2 3   lj/cut/tip4p/long    0.0        0.0     # O-H
-   pair_coeff  3 3   lj/cut/tip4p/long    0.0        0.0     # H-H
-   pair_coeff  * *   aip/water/2dm        COH.aip.water.2dm    C Ow Hw
+   pair_coeff  2 2   lj/cut/tip4p/long    8.0313e-3  3.1589      # O-O
+   pair_coeff  2 3   lj/cut/tip4p/long    0.0        0.0         # O-H
+   pair_coeff  3 3   lj/cut/tip4p/long    0.0        0.0         # H-H
+   pair_coeff  * *   aip/water/2dm        CBNOH.aip.water.2dm    C Ow Hw
+
+   pair_style  hybrid/overlay aip/water/2dm 16.0 lj/cut/tip4p/long 3 4 1 1 0.1546 10 8.5 coul/shield 16.0 1
+   pair_coeff  1*2 1*2   none
+   pair_coeff  3 3   lj/cut/tip4p/long    8.0313e-3  3.1589      # O-O
+   pair_coeff  3 4   lj/cut/tip4p/long    0.0        0.0         # O-H
+   pair_coeff  4 4   lj/cut/tip4p/long    0.0        0.0         # H-H
+   pair_coeff  * *   aip/water/2dm        CBNOH.aip.water.2dm  B N Ow Hw
+   pair_coeff  1 3   coul/shield          1.333
+   pair_coeff  1 4   coul/shield          1.333
+   pair_coeff  2 3   coul/shield          1.333
+   pair_coeff  2 4   coul/shield          1.333
 
 Description
 """""""""""
@@ -37,7 +48,7 @@ Description
 
 The *aip/water/2dm* style computes the anisotropic interfacial potential
 (AIP) potential for interfaces of water with two-dimensional (2D)
-materials as described in :ref:`(Feng) <Feng>`.
+materials as described in :ref:`(Feng1) <Feng1>` and :ref:`(Feng2) <Feng2>`.
 
 .. math::
 
@@ -62,12 +73,12 @@ larger than :math:`r_c` :doc:`pair_style ilp_graphene_hbn
 .. note::
 
    This pair style uses the atomic normal vector definition from
-   :ref:`(Feng) <Feng>`), where the atomic normal vectors of the
+   :ref:`(Feng1) <Feng1>`), where the atomic normal vectors of the
    hydrogen atoms are assumed to lie along the corresponding
    oxygen-hydrogen bonds and the normal vector of the central oxygen
    atom is defined as their average.
 
-The provided parameter file, ``COH.aip.water.2dm``, is intended for use
+The provided parameter file, ``CBNOH.aip.water.2dm``, is intended for use
 with *metal* :doc:`units <units>`, with energies in meV.  Two additional
 parameters, *S*, and *rcut* are included in the parameter file. *S* is
 designed to facilitate scaling of energies; *rcut* is the cutoff for an
@@ -77,7 +88,7 @@ the calculation of the normals for all atom pairs.
 .. note::
 
    The parameters presented in the provided parameter file,
-   ``COH.aip.water.2dm``, are fitted with the taper function enabled by
+   ``CBNOH.aip.water.2dm``, are fitted with the taper function enabled by
    setting the cutoff equal to 16.0 Angstrom.  Using a different cutoff
    or taper function setting should be carefully checked as they can
    lead to significant errors.  These parameters provide a good
@@ -134,7 +145,7 @@ if LAMMPS was built with that package.  See the :doc:`Build package
 This pair style requires the newton setting to be *on* for pair
 interactions.
 
-The ``COH.aip.water.2dm`` potential file provided with LAMMPS is
+The ``CBNOH.aip.water.2dm`` potential file provided with LAMMPS is
 parameterized for *metal* units.  You can use this pair style with any
 LAMMPS units, but you would need to create your own potential file with
 parameters in the appropriate units, if your simulation does not use
@@ -162,6 +173,10 @@ tap_flag = 1
 
 ----------
 
-.. _Feng:
+.. _Feng1:
 
-**(Feng)** Z. Feng and W. Ouyang et al., J. Phys. Chem. C. 127, 8704-8713 (2023).
+**(Feng1)** Z. Feng, ..., and W. Ouyang, J. Phys. Chem. C. 127(18), 8704-8713 (2023).
+
+.. _Feng2:
+
+**(Feng2)** Z. Feng, ..., and W. Ouyang, Langmuir 39(50), 18198-18207 (2023).
diff --git a/doc/src/pair_charmm.rst b/doc/src/pair_charmm.rst
index 8ff6508dea..30b03ad872 100644
--- a/doc/src/pair_charmm.rst
+++ b/doc/src/pair_charmm.rst
@@ -16,6 +16,7 @@
 .. index:: pair_style lj/charmm/coul/msm/omp
 .. index:: pair_style lj/charmmfsw/coul/charmmfsh
 .. index:: pair_style lj/charmmfsw/coul/long
+.. index:: pair_style lj/charmmfsw/coul/long/kk
 
 pair_style lj/charmm/coul/charmm command
 ========================================
@@ -43,6 +44,8 @@ pair_style lj/charmmfsw/coul/charmmfsh command
 pair_style lj/charmmfsw/coul/long command
 =========================================
 
+Accelerator Variants: *lj/charmmfsw/coul/long/kk*
+
 Syntax
 """"""
 
@@ -281,7 +284,9 @@ page for more info.
 Related commands
 """"""""""""""""
 
-:doc:`pair_coeff <pair_coeff>`
+:doc:`pair_coeff <pair_coeff>`, :doc:`angle_style charmm <angle_charmm>`,
+:doc:`dihedral_style charmm <dihedral_charmm>`,
+:doc:`dihedral_style charmmfsw <dihedral_charmm>`, :doc:`fix cmap <fix_cmap>`
 
 Default
 """""""
diff --git a/doc/src/pair_coul_slater.rst b/doc/src/pair_coul_slater.rst
index 443de4262b..bde14276db 100644
--- a/doc/src/pair_coul_slater.rst
+++ b/doc/src/pair_coul_slater.rst
@@ -1,6 +1,7 @@
 .. index:: pair_style coul/slater
 .. index:: pair_style coul/slater/cut
 .. index:: pair_style coul/slater/long
+.. index:: pair_style coul/slater/long/gpu
 
 pair_style coul/slater command
 ==============================
@@ -11,6 +12,8 @@ pair_style coul/slater/cut command
 pair_style coul/slater/long command
 ===================================
 
+Accelerator Variants: *coul/slater/long/gpu*
+
 Syntax
 """"""
 
diff --git a/doc/src/pair_fep_soft.rst b/doc/src/pair_fep_soft.rst
index 400ad0cc4a..20e17ce0b4 100644
--- a/doc/src/pair_fep_soft.rst
+++ b/doc/src/pair_fep_soft.rst
@@ -1,8 +1,10 @@
 .. index:: pair_style lj/cut/soft
 .. index:: pair_style lj/cut/soft/omp
 .. index:: pair_style lj/cut/coul/cut/soft
+.. index:: pair_style lj/cut/coul/cut/soft/gpu
 .. index:: pair_style lj/cut/coul/cut/soft/omp
 .. index:: pair_style lj/cut/coul/long/soft
+.. index:: pair_style lj/cut/coul/long/soft/gpu
 .. index:: pair_style lj/cut/coul/long/soft/omp
 .. index:: pair_style lj/cut/tip4p/long/soft
 .. index:: pair_style lj/cut/tip4p/long/soft/omp
@@ -27,12 +29,12 @@ Accelerator Variants: *lj/cut/soft/omp*
 pair_style lj/cut/coul/cut/soft command
 =======================================
 
-Accelerator Variants: *lj/cut/coul/cut/soft/omp*
+Accelerator Variants: *lj/cut/coul/cut/soft/gpu*, *lj/cut/coul/cut/soft/omp*
 
 pair_style lj/cut/coul/long/soft command
 ========================================
 
-Accelerator Variants: *lj/cut/coul/long/soft/omp*
+Accelerator Variants: *lj/cut/coul/long/soft/gpu*, *lj/cut/coul/long/soft/omp*
 
 pair_style lj/cut/tip4p/long/soft command
 =========================================
diff --git a/doc/src/pair_ilp_tmd.rst b/doc/src/pair_ilp_tmd.rst
index 70a4768389..575bafdc91 100644
--- a/doc/src/pair_ilp_tmd.rst
+++ b/doc/src/pair_ilp_tmd.rst
@@ -22,12 +22,12 @@ Examples
 .. code-block:: LAMMPS
 
    pair_style  hybrid/overlay ilp/tmd 16.0 1
-   pair_coeff  * * ilp/tmd  MoS2.ILP Mo S S
+   pair_coeff  * * ilp/tmd  TMD.ILP Mo S S
 
    pair_style  hybrid/overlay sw/mod sw/mod ilp/tmd 16.0
    pair_coeff  * * sw/mod 1  tmd.sw.mod Mo S S NULL NULL NULL
-   pair_coeff  * * sw/mod 2  tmd.sw.mod NULL NULL NULL Mo S S
-   pair_coeff  * * ilp/tmd   MoS2.ILP   Mo S S Mo S S
+   pair_coeff  * * sw/mod 2  tmd.sw.mod NULL NULL NULL W Se Se
+   pair_coeff  * * ilp/tmd   TMD.ILP   Mo S S W Se Se
 
 Description
 """""""""""
@@ -36,7 +36,7 @@ Description
 
 The *ilp/tmd* style computes the registry-dependent interlayer
 potential (ILP) potential for transition metal dichalcogenides (TMD)
-as described in :ref:`(Ouyang7) <Ouyang7>`.
+as described in :ref:`(Ouyang7) <Ouyang7>` and :ref:`(Jiang) <Jiang>`.
 
 .. math::
 
@@ -69,7 +69,7 @@ calculating the normals.
    each atom `i`, its six nearest neighboring atoms belonging to the same
    sub-layer are chosen to define the normal vector `{\bf n}_i`.
 
-The parameter file (e.g. MoS2.ILP), is intended for use with *metal*
+The parameter file (e.g. TMD.ILP), is intended for use with *metal*
 :doc:`units <units>`, with energies in meV. Two additional parameters,
 *S*, and *rcut* are included in the parameter file. *S* is designed to
 facilitate scaling of energies. *rcut* is designed to build the neighbor
@@ -77,7 +77,7 @@ list for calculating the normals for each atom pair.
 
 .. note::
 
-   The parameters presented in the parameter file (e.g. MoS2.ILP),
+   The parameters presented in the parameter file (e.g. TMD.ILP),
    are fitted with taper function by setting the cutoff equal to 16.0
    Angstrom.  Using different cutoff or taper function should be careful.
    These parameters provide a good description in both short- and long-range
@@ -133,10 +133,10 @@ if LAMMPS was built with that package.  See the :doc:`Build package
 This pair style requires the newton setting to be *on* for pair
 interactions.
 
-The MoS2.ILP potential file provided with LAMMPS (see the potentials
+The TMD.ILP potential file provided with LAMMPS (see the potentials
 directory) are parameterized for *metal* units.  You can use this
 potential with any LAMMPS units, but you would need to create your own
-custom MoS2.ILP potential file with coefficients listed in the appropriate
+custom TMD.ILP potential file with coefficients listed in the appropriate
 units, if your simulation does not use *metal* units.
 
 Related commands
@@ -164,3 +164,7 @@ tap_flag = 1
 .. _Ouyang7:
 
 **(Ouyang7)** W. Ouyang, et al., J. Chem. Theory Comput. 17, 7237 (2021).
+
+.. _Jiang:
+
+**(Jiang)** W. Jiang, et al., J. Phys. Chem. A, 127, 46, 9820-9830 (2023).
diff --git a/doc/src/pair_lepton.rst b/doc/src/pair_lepton.rst
index 21e619a3d9..5b5dc698e7 100644
--- a/doc/src/pair_lepton.rst
+++ b/doc/src/pair_lepton.rst
@@ -72,7 +72,7 @@ interactions between particles which depend on the distance and have a
 cutoff.  The potential function must be provided as an expression string
 using "r" as the distance variable.  With pair style *lepton/coul* one
 may additionally reference the charges of the two atoms of the pair with
-"qi" and "qj", respectively.  With pair style *lepton/coul* one may
+"qi" and "qj", respectively.  With pair style *lepton/sphere* one may
 instead reference the radii of the two atoms of the pair with "radi" and
 "radj", respectively; this is half of the diameter that can be set in
 :doc:`data files <read_data>` or the :doc:`set command <set>`.
@@ -166,8 +166,8 @@ mixing.  Thus, expressions for *all* I,J pairs must be specified
 explicitly.
 
 Only pair style *lepton* supports the :doc:`pair_modify shift <pair_modify>`
-option for shifting the energy of the pair interaction so that it is
-0 at the cutoff, pair styles *lepton/coul* and *lepton/sphere* do *not*.
+option for shifting the potential energy of the pair interaction so that
+it is 0 at the cutoff, pair styles *lepton/coul* and *lepton/sphere* do *not*.
 
 The :doc:`pair_modify table <pair_modify>` options are not relevant for
 the these pair styles.
diff --git a/doc/src/pair_mesodpd.rst b/doc/src/pair_mesodpd.rst
index 5d244f3b1d..28a398754f 100644
--- a/doc/src/pair_mesodpd.rst
+++ b/doc/src/pair_mesodpd.rst
@@ -1,14 +1,20 @@
 .. index:: pair_style edpd
+.. index:: pair_style edpd/gpu
 .. index:: pair_style mdpd
+.. index:: pair_style mdpd/gpu
 .. index:: pair_style mdpd/rhosum
 .. index:: pair_style tdpd
 
 pair_style edpd command
 =======================
 
+Accelerator Variants: *edpd/gpu*
+
 pair_style mdpd command
 =======================
 
+Accelerator Variants: *mdpd/gpu*
+
 pair_style mdpd/rhosum command
 ==============================
 
diff --git a/doc/src/pair_pace.rst b/doc/src/pair_pace.rst
index d815f663fe..001214370c 100644
--- a/doc/src/pair_pace.rst
+++ b/doc/src/pair_pace.rst
@@ -40,6 +40,9 @@ Examples
    pair_style pace product chunksize 2048
    pair_coeff * * Cu-PBE-core-rep.ace Cu
 
+   pair_style pace
+   pair_coeff * * Cu.yaml Cu
+
    pair_style pace/extrapolation
    pair_coeff * * Cu.yaml Cu.asi Cu
 
@@ -64,7 +67,7 @@ specifies an ACE coefficient file followed by N additional arguments
 specifying the mapping of ACE elements to LAMMPS atom types, where N is
 the number of LAMMPS atom types:
 
-* ACE coefficient file
+* ACE coefficient file (.yaml or .yace/.ace format)
 * N element names = mapping of ACE elements to atom types
 
 Only a single pair_coeff command is used with the *pace* style which
@@ -136,6 +139,22 @@ product B-basis evaluator is always used and only *linear* ASI is supported.
 See the :doc:`pair_coeff <pair_coeff>` page for alternate ways
 to specify the path for the ACE coefficient file.
 
+Core repulsion
+"""""""""""""""""""
+The ACE potential can be configured to initiate core-repulsion from an inner cutoff,
+seamlessly transitioning from ACE to ZBL. The core repulsion factor can be accessed
+as a per-atom quantity, as demonstrated in the example below:
+
+.. code-block:: LAMMPS
+
+    pair_style  pace
+    pair_coeff  * * CuNi.yaml Cu Ni
+
+    fix pace_corerep all pair 1 pace corerep 1
+
+In this case, per-atom `f_pace_corerep` quantities represent the fraction of ZBL
+core-repulsion for each atom.
+
 Mixing, shift, table, tail correction, restart, rRESPA info
 """""""""""""""""""""""""""""""""""""""""""""""""""""""""""
 
diff --git a/doc/src/pair_reaxff.rst b/doc/src/pair_reaxff.rst
index d28e15b0a2..03d53d1ff4 100644
--- a/doc/src/pair_reaxff.rst
+++ b/doc/src/pair_reaxff.rst
@@ -373,7 +373,8 @@ Related commands
 
 :doc:`pair_coeff <pair_coeff>`, :doc:`fix qeq/reaxff <fix_qeq_reaxff>`,
 :doc:`fix acks2/reaxff <fix_acks2_reaxff>`, :doc:`fix reaxff/bonds <fix_reaxff_bonds>`,
-:doc:`fix reaxff/species <fix_reaxff_species>`
+:doc:`fix reaxff/species <fix_reaxff_species>`,
+:doc:`compute reaxff/atom <compute_reaxff_atom>`
 
 Default
 """""""
diff --git a/doc/src/pair_sph_heatconduction.rst b/doc/src/pair_sph_heatconduction.rst
index 4716ed54fb..e9004cb5a4 100644
--- a/doc/src/pair_sph_heatconduction.rst
+++ b/doc/src/pair_sph_heatconduction.rst
@@ -1,8 +1,11 @@
 .. index:: pair_style sph/heatconduction
+.. index:: pair_style sph/heatconduction/gpu
 
 pair_style sph/heatconduction command
 =====================================
 
+Accelerator Variants: *sph/heatconduction/gpu*
+
 Syntax
 """"""
 
diff --git a/doc/src/pair_sph_lj.rst b/doc/src/pair_sph_lj.rst
index b5c02c41ff..5ac7ab9c6b 100644
--- a/doc/src/pair_sph_lj.rst
+++ b/doc/src/pair_sph_lj.rst
@@ -1,8 +1,11 @@
 .. index:: pair_style sph/lj
+.. index:: pair_style sph/lj/gpu
 
 pair_style sph/lj command
 =========================
 
+Accelerator Variants: *sph/lj/gpu*
+
 Syntax
 """"""
 
diff --git a/doc/src/pair_sph_taitwater.rst b/doc/src/pair_sph_taitwater.rst
index 34eb65f005..79972660c4 100644
--- a/doc/src/pair_sph_taitwater.rst
+++ b/doc/src/pair_sph_taitwater.rst
@@ -1,8 +1,11 @@
 .. index:: pair_style sph/taitwater
+.. index:: pair_style sph/taitwater/gpu
 
 pair_style sph/taitwater command
 ================================
 
+Accelerator Variants: *sph/taitwater/gpu*
+
 Syntax
 """"""
 
diff --git a/doc/src/run_style.rst b/doc/src/run_style.rst
index 0804ce5c82..d2e47c0884 100644
--- a/doc/src/run_style.rst
+++ b/doc/src/run_style.rst
@@ -329,7 +329,8 @@ Restrictions
 The *verlet/split* style can only be used if LAMMPS was built with the
 REPLICA package. Correspondingly the *respa/omp* style is available
 only if the OPENMP package was included. See the :doc:`Build package
-<Build_package>` page for more info.
+<Build_package>` page for more info.  It is not compatible with
+kspace styles from the INTEL package.
 
 Whenever using rRESPA, the user should experiment with trade-offs in
 speed and accuracy for their system, and verify that they are
diff --git a/doc/utils/sphinx-config/false_positives.txt b/doc/utils/sphinx-config/false_positives.txt
index 10ef06f89f..c74e2a79f3 100644
--- a/doc/utils/sphinx-config/false_positives.txt
+++ b/doc/utils/sphinx-config/false_positives.txt
@@ -79,6 +79,7 @@ Alessandro
 Alexey
 ali
 aliceblue
+Allera
 Allinger
 allocatable
 allocator
@@ -124,6 +125,7 @@ antisymmetry
 anton
 Antonelli
 api
+apolar
 Apoorva
 Appl
 Appshaw
@@ -150,6 +152,7 @@ asphericity
 Asq
 assignee
 assively
+associativity
 Asta
 Astart
 Astop
@@ -719,6 +722,7 @@ dem
 Dendrimer
 dendritic
 Denniston
+Denoual
 dephase
 dephasing
 dequidt
@@ -790,11 +794,13 @@ dispersionflag
 dissipative
 Dissipative
 distharm
+distutils
 dl
 dlabel
 dlambda
 DLAMMPS
 dll
+dlm
 dlopen
 dm
 dmax
@@ -860,6 +866,7 @@ Dunweg
 Dupend
 Dupont
 dUs
+Duval
 dV
 dvector
 dVx
@@ -1011,6 +1018,7 @@ Ercolessi
 Erdmann
 erf
 erfc
+erforce
 Erhart
 erorate
 erose
@@ -1074,6 +1082,7 @@ facesets
 factorizable
 factorizations
 Fahrenberger
+Falk
 Faken
 Farago
 Fasolino
@@ -1298,6 +1307,7 @@ Gonzalez-Melchor
 googlemail
 googletest
 Gordan
+Goryaeva
 Goudeau
 GPa
 GPL
@@ -1384,12 +1394,14 @@ hcp
 hdnnp
 HDNNP
 Hearn
+Heaviside
 heatconduction
 heatflow
 Hebbeker
 Hebenstreit
 Hecht
 Heenen
+heFFTe
 Hendrik
 Henin
 Henkelman
@@ -1822,6 +1834,7 @@ Lanczos
 Lande
 Landron
 Landsgesell
+Langer
 langevin
 Langevin
 Langston
@@ -1847,6 +1860,7 @@ lbl
 LBtype
 lcbop
 ld
+lda
 ldfftw
 ldg
 lebedeva
@@ -1968,6 +1982,7 @@ lossy
 Lozovik
 lps
 lpsapi
+lr
 lrt
 lsfftw
 lt
@@ -2010,7 +2025,10 @@ magelec
 Maginn
 magneton
 magnetons
+maha
+Mahalanobis
 Mahoney
+Maillet
 mainboard
 mainboards
 makefile
@@ -2189,6 +2207,7 @@ mintcream
 Mintmire
 Miron
 mis
+misclassification
 Mises
 Mishin
 Mishra
@@ -2220,8 +2239,10 @@ Mohd
 Mohles
 mol
 Mol
+molatom
 molfile
 Molfile
+molindex
 MolPairStyle
 moltemplate
 momb
@@ -2297,6 +2318,7 @@ multicomponent
 multicore
 multielectron
 multinode
+multinomial
 multiphysics
 Multipole
 multiscale
@@ -2388,6 +2410,7 @@ Nbtypes
 Nbytes
 nc
 Nc
+nclasses
 nchunk
 Nchunk
 ncoeff
@@ -2499,6 +2522,7 @@ noforce
 noguess
 Noid
 nolib
+nonaffine
 nonequilibrium
 nongauss
 nonGaussian
@@ -2551,6 +2575,7 @@ ns
 Ns
 Nsample
 Nskip
+nspecial
 Nspecies
 nsq
 Nstart
@@ -2571,6 +2596,7 @@ nthreads
 ntimestep
 Ntptask
 Ntriples
+ntries
 ntris
 Ntype
 ntypes
@@ -2796,6 +2822,7 @@ pIm
 pimd
 Piola
 pIp
+pipelining
 Pisarev
 Pishevar
 Pitera
@@ -3348,6 +3375,7 @@ Skylake
 slateblue
 slategray
 slater
+slcsa
 Slepoy
 Sliozberg
 sLL
@@ -3856,6 +3884,7 @@ versa
 Verstraelen
 ves
 vflag
+vfrac
 vhi
 vibrational
 Vij
@@ -4028,6 +4057,7 @@ xy
 xyz
 xz
 xzhou
+yace
 Yade
 yade
 yaff
diff --git a/examples/COUPLE/plugin/liblammpsplugin.c b/examples/COUPLE/plugin/liblammpsplugin.c
index 5228e07e9c..011c320254 100644
--- a/examples/COUPLE/plugin/liblammpsplugin.c
+++ b/examples/COUPLE/plugin/liblammpsplugin.c
@@ -110,6 +110,8 @@ liblammpsplugin_t *liblammpsplugin_load(const char *lib)
   ADDSYM(extract_variable);
   ADDSYM(extract_variable_datatype);
   ADDSYM(set_variable);
+  ADDSYM(set_string_variable);
+  ADDSYM(set_internal_variable);
   ADDSYM(variable_info);
 
   ADDSYM(gather_atoms);
diff --git a/examples/COUPLE/plugin/liblammpsplugin.h b/examples/COUPLE/plugin/liblammpsplugin.h
index 92398dfb27..1d647e8e93 100644
--- a/examples/COUPLE/plugin/liblammpsplugin.h
+++ b/examples/COUPLE/plugin/liblammpsplugin.h
@@ -152,9 +152,11 @@ struct _liblammpsplugin {
 
   void *(*extract_compute)(void *, const char *, int, int);
   void *(*extract_fix)(void *, const char *, int, int, int, int);
-  void *(*extract_variable)(void *, const char *, char *);
+  void *(*extract_variable)(void *, const char *, const char *);
   int (*extract_variable_datatype)(void *, const char *);
-  int (*set_variable)(void *, char *, char *);
+  int (*set_variable)(void *, const char *, const char *);
+  int (*set_string_variable)(void *, const char *, const char *);
+  int (*set_internal_variable)(void *, const char *, double);
   int (*variable_info)(void *, int, char *, int);
 
   void (*gather_atoms)(void *, const char *, int, int, void *);
diff --git a/examples/COUPLE/simple/simple.cpp b/examples/COUPLE/simple/simple.cpp
index c8727cc81f..f6365ac3de 100644
--- a/examples/COUPLE/simple/simple.cpp
+++ b/examples/COUPLE/simple/simple.cpp
@@ -67,7 +67,7 @@ int main(int narg, char **arg)
   FILE *fp;
   if (me == 0) {
     fp = fopen(arg[2],"r");
-    if (fp == NULL) {
+    if (fp == nullptr) {
       printf("ERROR: Could not open LAMMPS input script\n");
       MPI_Abort(MPI_COMM_WORLD,1);
     }
@@ -78,14 +78,14 @@ int main(int narg, char **arg)
   // (could just send it to proc 0 of comm_lammps and let it Bcast)
   // all LAMMPS procs call input->one() on the line
   
-  LAMMPS *lmp = NULL;
-  if (lammps == 1) lmp = new LAMMPS(0,NULL,comm_lammps);
+  LAMMPS *lmp = nullptr;
+  if (lammps == 1) lmp = new LAMMPS(0,nullptr,comm_lammps);
 
   int n;
   char line[1024];
-  while (1) {
+  while (true) {
     if (me == 0) {
-      if (fgets(line,1024,fp) == NULL) n = 0;
+      if (fgets(line,1024,fp) == nullptr) n = 0;
       else n = strlen(line) + 1;
       if (n == 0) fclose(fp);
     }
@@ -101,8 +101,8 @@ int main(int narg, char **arg)
   // put coords back into LAMMPS
   // run a single step with changed coords
 
-  double *x = NULL;
-  double *v = NULL;
+  double *x = nullptr;
+  double *v = nullptr;
 
   if (lammps == 1) {
     lmp->input->one("run 10");
@@ -147,7 +147,7 @@ int main(int narg, char **arg)
   // create_atoms() to create new ones with old coords, vels
   // initial thermo should be same as step 20
 
-  int *type = NULL;
+  int *type = nullptr;
 
   if (lammps == 1) {
     int natoms = static_cast<int> (lmp->atom->natoms);
@@ -155,7 +155,7 @@ int main(int narg, char **arg)
     for (int i = 0; i < natoms; i++) type[i] = 1;
 
     lmp->input->one("delete_atoms group all");
-    lammps_create_atoms(lmp,natoms,NULL,type,x,v,NULL,0);
+    lammps_create_atoms(lmp,natoms,nullptr,type,x,v,nullptr,0);
     lmp->input->one("run 10");
   }
 
diff --git a/examples/PACKAGES/cgdna/util/generate.py b/examples/PACKAGES/cgdna/util/generate.py
index cd7465acdb..e85661abb1 100644
--- a/examples/PACKAGES/cgdna/util/generate.py
+++ b/examples/PACKAGES/cgdna/util/generate.py
@@ -22,22 +22,26 @@
 """
 Import basic modules
 """
+
+# for python2/3 compatibility
+from __future__ import print_function
+
 import sys, os, timeit
 
 from timeit import default_timer as timer
 start_time = timer()
 """
-Try to import numpy; if failed, import a local version mynumpy 
+Try to import numpy; if failed, import a local version mynumpy
 which needs to be provided
 """
 try:
     import numpy as np
 except:
-    print >> sys.stderr, "numpy not found. Exiting."
+    print("numpy not found. Exiting.", file=sys.stderr)
     sys.exit(1)
 
 """
-Check that the required arguments (box offset and size in simulation units 
+Check that the required arguments (box offset and size in simulation units
 and the sequence file were provided
 """
 try:
@@ -45,8 +49,8 @@ try:
     box_length = float(sys.argv[2])
     infile = sys.argv[3]
 except:
-    print >> sys.stderr, "Usage: %s <%s> <%s> <%s>" % (sys.argv[0], \
-	"box offset", "box length", "file with sequences")
+    print( "Usage: %s <%s> <%s> <%s>" % (sys.argv[0], \
+	"box offset", "box length", "file with sequences"), file=sys.stderr)
     sys.exit(1)
 box = np.array ([box_length, box_length, box_length])
 
@@ -57,8 +61,7 @@ try:
     inp = open (infile, 'r')
     inp.close()
 except:
-    print >> sys.stderr, "Could not open file '%s' for reading. \
-					      Aborting." % infile
+    print( "Could not open file '%s' for reading. Aborting." % infile, file=sys.stderr)
     sys.exit(2)
 
 # return parts of a string
@@ -86,7 +89,7 @@ Define auxiliary variables for the construction of a helix
 # center of the double strand
 CM_CENTER_DS = POS_BASE + 0.2
 
-# ideal distance between base sites of two nucleotides 
+# ideal distance between base sites of two nucleotides
 # which are to be base paired in a duplex
 BASE_BASE = 0.3897628551303122
 
@@ -118,7 +121,7 @@ strandnum = []
 
 bonds = []
 
-""" 
+"""
 Convert local body frame to quaternion DOF
 """
 def exyz_to_quat (mya1, mya3):
@@ -135,25 +138,25 @@ def exyz_to_quat (mya1, mya3):
     # compute other components from it
 
     if q0sq >= 0.25:
-	myquat[0] = np.sqrt(q0sq)
-	myquat[1] = (mya2[2] - mya3[1]) / (4.0*myquat[0])
-	myquat[2] = (mya3[0] - mya1[2]) / (4.0*myquat[0])
-	myquat[3] = (mya1[1] - mya2[0]) / (4.0*myquat[0])
+        myquat[0] = np.sqrt(q0sq)
+        myquat[1] = (mya2[2] - mya3[1]) / (4.0*myquat[0])
+        myquat[2] = (mya3[0] - mya1[2]) / (4.0*myquat[0])
+        myquat[3] = (mya1[1] - mya2[0]) / (4.0*myquat[0])
     elif q1sq >= 0.25:
-	myquat[1] = np.sqrt(q1sq)
-	myquat[0] = (mya2[2] - mya3[1]) / (4.0*myquat[1])
-	myquat[2] = (mya2[0] + mya1[1]) / (4.0*myquat[1])
-	myquat[3] = (mya1[2] + mya3[0]) / (4.0*myquat[1])
+        myquat[1] = np.sqrt(q1sq)
+        myquat[0] = (mya2[2] - mya3[1]) / (4.0*myquat[1])
+        myquat[2] = (mya2[0] + mya1[1]) / (4.0*myquat[1])
+        myquat[3] = (mya1[2] + mya3[0]) / (4.0*myquat[1])
     elif q2sq >= 0.25:
-	myquat[2] = np.sqrt(q2sq)
-	myquat[0] = (mya3[0] - mya1[2]) / (4.0*myquat[2])
-	myquat[1] = (mya2[0] + mya1[1]) / (4.0*myquat[2])
-	myquat[3] = (mya3[1] + mya2[2]) / (4.0*myquat[2])
+        myquat[2] = np.sqrt(q2sq)
+        myquat[0] = (mya3[0] - mya1[2]) / (4.0*myquat[2])
+        myquat[1] = (mya2[0] + mya1[1]) / (4.0*myquat[2])
+        myquat[3] = (mya3[1] + mya2[2]) / (4.0*myquat[2])
     elif q3sq >= 0.25:
-	myquat[3] = np.sqrt(q3sq)
-	myquat[0] = (mya1[1] - mya2[0]) / (4.0*myquat[3])
-	myquat[1] = (mya3[0] + mya1[2]) / (4.0*myquat[3])
-	myquat[2] = (mya3[1] + mya2[2]) / (4.0*myquat[3])
+        myquat[3] = np.sqrt(q3sq)
+        myquat[0] = (mya1[1] - mya2[0]) / (4.0*myquat[3])
+        myquat[1] = (mya3[0] + mya1[2]) / (4.0*myquat[3])
+        myquat[2] = (mya3[1] + mya2[2]) / (4.0*myquat[3])
 
     norm = 1.0/np.sqrt(myquat[0]*myquat[0] + myquat[1]*myquat[1] + \
 			  myquat[2]*myquat[2] + myquat[3]*myquat[3])
@@ -169,62 +172,62 @@ Adds a strand to the system by appending it to the array of previous strands
 """
 def add_strands (mynewpositions, mynewa1s, mynewa3s):
     overlap = False
-	
-    # This is a simple check for each of the particles where for previously 
-    # placed particles i we check whether it overlaps with any of the 
+
+    # This is a simple check for each of the particles where for previously
+    # placed particles i we check whether it overlaps with any of the
     # newly created particles j
 
-    print >> sys.stdout, "## Checking for overlaps"
+    print( "## Checking for overlaps", file=sys.stdout)
 
-    for i in xrange(len(positions)):
+    for i in range(len(positions)):
 
-	p = positions[i]
-	pa1 = a1s[i]
+        p = positions[i]
+        pa1 = a1s[i]
 
-	for j in xrange (len(mynewpositions)):
+        for j in range (len(mynewpositions)):
 
-	    q = mynewpositions[j]
-	    qa1 = mynewa1s[j]
+            q = mynewpositions[j]
+            qa1 = mynewa1s[j]
 
-	    # skip particles that are anyway too far away
-	    dr = p - q
-	    dr -= box * np.rint (dr / box)
-	    if np.dot(dr, dr) > RC2:
-		continue
+            # skip particles that are anyway too far away
+            dr = p - q
+            dr -= box * np.rint(dr / box)
+            if np.dot(dr, dr) > RC2:
+                continue
 
-	    # base site and backbone site of the two particles
+            # base site and backbone site of the two particles
             p_pos_back = p + pa1 * POS_BACK
             p_pos_base = p + pa1 * POS_BASE
             q_pos_back = q + qa1 * POS_BACK
             q_pos_base = q + qa1 * POS_BASE
 
-	    # check for no overlap between the two backbone sites
+            # check for no overlap between the two backbone sites
             dr = p_pos_back - q_pos_back
-            dr -= box * np.rint (dr / box)
+            dr -= box * np.rint(dr / box)
             if np.dot(dr, dr) < RC2_BACK:
                 overlap = True
 
-	    # check for no overlap between the two base sites
+            # check for no overlap between the two base sites
             dr = p_pos_base -  q_pos_base
-            dr -= box * np.rint (dr / box)
+            dr -= box * np.rint(dr / box)
             if np.dot(dr, dr) < RC2_BASE:
                 overlap = True
 
-	    # check for no overlap between backbone site of particle p 
-	    # with base site of particle q
+            # check for no overlap between backbone site of particle p
+            # with base site of particle q
             dr = p_pos_back - q_pos_base
             dr -= box * np.rint (dr / box)
             if np.dot(dr, dr) < RC2_BACK_BASE:
                 overlap = True
 
-	    # check for no overlap between base site of particle p and 
-	    # backbone site of particle q
+            # check for no overlap between base site of particle p and
+            # backbone site of particle q
             dr = p_pos_base - q_pos_back
             dr -= box * np.rint (dr / box)
             if np.dot(dr, dr) < RC2_BACK_BASE:
                 overlap = True
 
-	    # exit if there is an overlap
+            # exit if there is an overlap
             if overlap:
                 return False
 
@@ -237,10 +240,10 @@ def add_strands (mynewpositions, mynewa1s, mynewa3s):
             a1s.append (p)
         for p in mynewa3s:
             a3s.append (p)
-	# calculate quaternion from local body frame and append
-	for ia in xrange(len(mynewpositions)):
-	    mynewquaternions = exyz_to_quat(mynewa1s[ia],mynewa3s[ia])
-	    quaternions.append(mynewquaternions)
+        # calculate quaternion from local body frame and append
+        for ia in range(len(mynewpositions)):
+            mynewquaternions = exyz_to_quat(mynewa1s[ia],mynewa3s[ia])
+            quaternions.append(mynewquaternions)
 
     return True
 
@@ -281,7 +284,7 @@ def get_rotation_matrix(axis, anglest):
                     [olc*x*z-st*y, olc*y*z+st*x, olc*z*z+ct]])
 
 """
-Generates the position and orientation vectors of a 
+Generates the position and orientation vectors of a
 (single or double) strand from a sequence string
 """
 def generate_strand(bp, sequence=None, start_pos=np.array([0, 0, 0]), \
@@ -295,76 +298,75 @@ def generate_strand(bp, sequence=None, start_pos=np.array([0, 0, 0]), \
     # overall direction of the helix
     dir = np.array(dir, dtype=float)
     if sequence == None:
-	sequence = np.random.randint(1, 5, bp)
+        sequence = np.random.randint(1, 5, bp)
 
-    # the elseif here is most likely redundant 
+    # the elseif here is most likely redundant
     elif len(sequence) != bp:
-	n = bp - len(sequence)
-	sequence += np.random.randint(1, 5, n)
-	print >> sys.stderr, "sequence is too short, adding %d random bases" % n
+        n = bp - len(sequence)
+        sequence += np.random.randint(1, 5, n)
+        print( "sequence is too short, adding %d random bases" % n, file=sys.stderr)
 
     # normalize direction
     dir_norm = np.sqrt(np.dot(dir,dir))
     if dir_norm < 1e-10:
-	print >> sys.stderr, "direction must be a valid vector, \
-			      defaulting to (0, 0, 1)"
-	dir = np.array([0, 0, 1])
+        print( "direction must be a valid vector, defaulting to (0, 0, 1)", file=sys.stderr)
+        dir = np.array([0, 0, 1])
     else: dir /= dir_norm
 
     # find a vector orthogonal to dir to act as helix direction,
     # if not provided switch off random orientation
     if perp is None or perp is False:
-	v1 = np.random.random_sample(3)
-	v1 -= dir * (np.dot(dir, v1))
-	v1 /= np.sqrt(sum(v1*v1))
+        v1 = np.random.random_sample(3)
+        v1 -= dir * (np.dot(dir, v1))
+        v1 /= np.sqrt(sum(v1*v1))
     else:
-	v1 = perp;
+        v1 = perp;
 
     # generate rotational matrix representing the overall rotation of the helix
     R0 = get_rotation_matrix(dir, rot)
-	    
+
     # rotation matrix corresponding to one step along the helix
     R = get_rotation_matrix(dir, [1, "bp"])
 
-    # set the vector a1 (backbone to base) to v1 
+    # set the vector a1 (backbone to base) to v1
     a1 = v1
-    
-    # apply the global rotation to a1 
+
+    # apply the global rotation to a1
     a1 = np.dot(R0, a1)
-    
+
     # set the position of the fist backbone site to start_pos
     rb = np.array(start_pos)
-	    
+
     # set a3 to the direction of the helix
     a3 = dir
     for i in range(bp):
     # work out the position of the centre of mass of the nucleotide
-	rcdm = rb - CM_CENTER_DS * a1
-	
-	# append to newpositions
-	mynewpositions.append(rcdm)
-	mynewa1s.append(a1)
-	mynewa3s.append(a3)
-	
-	# if we are not at the end of the helix, we work out a1 and rb for the 
-	# next nucleotide along the helix
-	if i != bp - 1:
-	    a1 = np.dot(R, a1)
-	    rb += a3 * BASE_BASE
+        rcdm = rb - CM_CENTER_DS * a1
 
-    # if we are working on a double strand, we do a cycle similar 
+        # append to newpositions
+        mynewpositions.append(rcdm)
+        mynewa1s.append(a1)
+        mynewa3s.append(a3)
+
+        # if we are not at the end of the helix, we work out a1 and rb for the
+        # next nucleotide along the helix
+        if i != bp - 1:
+            a1 = np.dot(R, a1)
+            rb += a3 * BASE_BASE
+
+    # if we are working on a double strand, we do a cycle similar
     # to the previous one but backwards
     if double == True:
-	a1 = -a1
-	a3 = -dir
-	R = R.transpose()
-	for i in range(bp):
-	    rcdm = rb - CM_CENTER_DS * a1
-	    mynewpositions.append (rcdm)
-	    mynewa1s.append (a1)
-	    mynewa3s.append (a3)
-	    a1 = np.dot(R, a1)
-	    rb += a3 * BASE_BASE
+        a1 = -a1
+        a3 = -dir
+        R = R.transpose()
+        for i in range(bp):
+            rcdm = rb - CM_CENTER_DS * a1
+            mynewpositions.append (rcdm)
+            mynewa1s.append (a1)
+            mynewa3s.append (a3)
+            a1 = np.dot(R, a1)
+            rb += a3 * BASE_BASE
 
     assert (len (mynewpositions) > 0)
 
@@ -391,10 +393,10 @@ def read_strands(filename):
     try:
         infile = open (filename)
     except:
-        print >> sys.stderr, "Could not open file '%s'. Aborting." % filename
+        print( "Could not open file '%s'. Aborting." % filename, file=sys.stderr )
         sys.exit(2)
 
-    # This block works out the number of nucleotides and strands by reading 
+    # This block works out the number of nucleotides and strands by reading
     # the number of non-empty lines in the input file and the number of letters,
     # taking the possible DOUBLE keyword into account.
     nstrands, nnucl, nbonds = 0, 0, 0
@@ -406,30 +408,29 @@ def read_strands(filename):
         if line[:6] == 'DOUBLE':
             line = line.split()[1]
             length = len(line)
-            print >> sys.stdout, "## Found duplex of %i base pairs" % length
+            print( "## Found duplex of %i base pairs" % length, file=sys.stdout)
             nnucl += 2*length
             nstrands += 2
-	    nbonds += (2*length-2)
+            nbonds += (2*length-2)
         else:
             line = line.split()[0]
             length = len(line)
-            print >> sys.stdout, \
-		    "## Found single strand of %i bases" % length
+            print( "## Found single strand of %i bases" % length, file=sys.stdout)
             nnucl += length
             nstrands += 1
-	    nbonds += length-1
+            nbonds += length-1
     # rewind the sequence input file
     infile.seek(0)
 
-    print >> sys.stdout, "## nstrands, nnucl = ", nstrands, nnucl
+    print( "## nstrands, nnucl = ", nstrands, nnucl, file=sys.stdout)
 
     # generate the data file in LAMMPS format
     try:
         out = open ("data.oxdna", "w")
     except:
-        print >> sys.stderr, "Could not open data file for writing. Aborting."
+        print( "Could not open data file for writing. Aborting.", file=sys.stderr)
         sys.exit(2)
-	
+
     lines = infile.readlines()
     nlines = len(lines)
     i = 1
@@ -440,115 +441,114 @@ def read_strands(filename):
         line = line.upper().strip()
 
         # skip empty lines
-        if len(line) == 0: 
-	    i += 1
-	    continue
+        if len(line) == 0:
+            i += 1
+            continue
 
-	# block for duplexes: last argument of the generate function 
-	# is set to 'True'
+        # block for duplexes: last argument of the generate function
+        # is set to 'True'
         if line[:6] == 'DOUBLE':
             line = line.split()[1]
             length = len(line)
             seq = [(base_to_number[x]) for x in line]
 
-	    myns += 1
-	    for b in xrange(length):
-		basetype.append(seq[b])
-		strandnum.append(myns)
+            myns += 1
+            for b in range(length):
+                basetype.append(seq[b])
+                strandnum.append(myns)
 
-	    for b in xrange(length-1):
-		bondpair = [noffset + b, noffset + b + 1]
-		bonds.append(bondpair)
-	    noffset += length
+            for b in range(length-1):
+                bondpair = [noffset + b, noffset + b + 1]
+                bonds.append(bondpair)
+            noffset += length
 
-	    # create the sequence of the second strand as made of 
-	    # complementary bases
-	    seq2 = [5-s for s in seq]
-	    seq2.reverse()
+            # create the sequence of the second strand as made of
+            # complementary bases
+            seq2 = [5-s for s in seq]
+            seq2.reverse()
 
-	    myns += 1
-	    for b in xrange(length):
-		basetype.append(seq2[b])
-		strandnum.append(myns)
+            myns += 1
+            for b in range(length):
+                basetype.append(seq2[b])
+                strandnum.append(myns)
 
-	    for b in xrange(length-1):
-		bondpair = [noffset + b, noffset + b + 1]
-		bonds.append(bondpair)
-	    noffset += length
- 
-            print >> sys.stdout, "## Created duplex of %i bases" % (2*length)
+            for b in range(length-1):
+                bondpair = [noffset + b, noffset + b + 1]
+                bonds.append(bondpair)
+            noffset += length
 
-	    # generate random position of the first nucleotide
+            print( "## Created duplex of %i bases" % (2*length), file=sys.stdout)
+
+            # generate random position of the first nucleotide
             cdm = box_offset + np.random.random_sample(3) * box
 
-            # generate the random direction of the helix 
+            # generate the random direction of the helix
             axis = np.random.random_sample(3)
             axis /= np.sqrt(np.dot(axis, axis))
 
-            # use the generate function defined above to create 
-	    # the position and orientation vector of the strand 
+            # use the generate function defined above to create
+            # the position and orientation vector of the strand
             newpositions, newa1s, newa3s = generate_strand(len(line), \
-		    sequence=seq, dir=axis, start_pos=cdm, double=True)
+                sequence=seq, dir=axis, start_pos=cdm, double=True)
 
             # generate a new position for the strand until it does not overlap
-	    # with anything already present
-	    start = timer()
+            # with anything already present
+            start = timer()
             while not add_strands(newpositions, newa1s, newa3s):
                 cdm = box_offset + np.random.random_sample(3) * box
                 axis = np.random.random_sample(3)
                 axis /= np.sqrt(np.dot(axis, axis))
                 newpositions, newa1s, newa3s = generate_strand(len(line), \
-		      sequence=seq, dir=axis, start_pos=cdm, double=True)
-                print >> sys.stdout, "## Trying %i" % i
-	    end = timer()
-            print >> sys.stdout, "## Added duplex of %i bases (line %i/%i) in %.2fs, now at %i/%i" % \
-				      (2*length, i, nlines, end-start, len(positions), nnucl)
+                    sequence=seq, dir=axis, start_pos=cdm, double=True)
+                print( "## Trying %i" % i, file=sys.stdout)
+            end = timer()
+            print( "## Added duplex of %i bases (line %i/%i) in %.2fs, now at %i/%i" % \
+				      (2*length, i, nlines, end-start, len(positions), nnucl), file=sys.stdout)
 
-	# block for single strands: last argument of the generate function 
-	# is set to 'False'
+        # block for single strands: last argument of the generate function
+        # is set to 'False'
         else:
             length = len(line)
             seq = [(base_to_number[x]) for x in line]
 
-	    myns += 1
-	    for b in xrange(length):
-		basetype.append(seq[b])
-		strandnum.append(myns)
+            myns += 1
+            for b in range(length):
+                basetype.append(seq[b])
+                strandnum.append(myns)
 
-	    for b in xrange(length-1):
-		bondpair = [noffset + b, noffset + b + 1]
-		bonds.append(bondpair)
-	    noffset += length
+            for b in range(length-1):
+                bondpair = [noffset + b, noffset + b + 1]
+                bonds.append(bondpair)
+            noffset += length
 
-	    # generate random position of the first nucleotide
+            # generate random position of the first nucleotide
             cdm = box_offset + np.random.random_sample(3) * box
 
-            # generate the random direction of the helix 
+            # generate the random direction of the helix
             axis = np.random.random_sample(3)
             axis /= np.sqrt(np.dot(axis, axis))
 
-            print >> sys.stdout, \
-		      "## Created single strand of %i bases" % length
+            print("## Created single strand of %i bases" % length, file=sys.stdout)
 
             newpositions, newa1s, newa3s = generate_strand(length, \
 		      sequence=seq, dir=axis, start_pos=cdm, double=False)
-	    start = timer()
+            start = timer()
             while not add_strands(newpositions, newa1s, newa3s):
                 cdm = box_offset + np.random.random_sample(3) * box
                 axis = np.random.random_sample(3)
-		axis /= np.sqrt(np.dot(axis, axis))
+                axis /= np.sqrt(np.dot(axis, axis))
                 newpositions, newa1s, newa3s = generate_strand(length, \
-			  sequence=seq, dir=axis, start_pos=cdm, double=False)
+                    sequence=seq, dir=axis, start_pos=cdm, double=False)
                 print >> sys.stdout, "## Trying  %i" % (i)
-	    end = timer()
-            print >> sys.stdout, "## Added single strand of %i bases (line %i/%i) in %.2fs, now at %i/%i" % \
-				      (length, i, nlines, end-start,len(positions), nnucl)
+            end = timer()
+            print( "## Added single strand of %i bases (line %i/%i) in %.2fs, now at %i/%i" % \
+				      (length, i, nlines, end-start,len(positions), nnucl), file=sys.stdout)
 
         i += 1
 
     # sanity check
     if not len(positions) == nnucl:
-        print len(positions), nnucl
+        print( len(positions), nnucl )
         raise AssertionError
 
     out.write('# LAMMPS data file\n')
@@ -580,44 +580,41 @@ def read_strands(filename):
     out.write('Atoms\n')
     out.write('\n')
 
-    for i in xrange(nnucl):
-	out.write('%d %d %22.15le %22.15le %22.15le %d 1 1\n' \
-		  % (i+1, basetype[i], \
-		     positions[i][0], positions[i][1], positions[i][2], \
-		     strandnum[i]))
+    for i in range(nnucl):
+        out.write('%d %d %22.15le %22.15le %22.15le %d 1 1\n' \
+            % (i+1, basetype[i], positions[i][0], positions[i][1], positions[i][2], strandnum[i]))
 
     out.write('\n')
     out.write('# Atom-ID, translational, rotational velocity\n')
     out.write('Velocities\n')
     out.write('\n')
 
-    for i in xrange(nnucl):
-	out.write("%d %22.15le %22.15le %22.15le %22.15le %22.15le %22.15le\n" \
-		  % (i+1,0.0,0.0,0.0,0.0,0.0,0.0))
+    for i in range(nnucl):
+        out.write("%d %22.15le %22.15le %22.15le %22.15le %22.15le %22.15le\n" \
+            % (i+1,0.0,0.0,0.0,0.0,0.0,0.0))
 
     out.write('\n')
     out.write('# Atom-ID, shape, quaternion\n')
     out.write('Ellipsoids\n')
     out.write('\n')
 
-    for i in xrange(nnucl):
-	out.write(\
-    "%d %22.15le %22.15le %22.15le %22.15le %22.15le %22.15le %22.15le\n"  \
-      % (i+1,1.1739845031423408,1.1739845031423408,1.1739845031423408, \
-	quaternions[i][0],quaternions[i][1], quaternions[i][2],quaternions[i][3]))
- 
+    for i in range(nnucl):
+        out.write("%d %22.15le %22.15le %22.15le %22.15le %22.15le %22.15le %22.15le\n"  \
+            % (i+1,1.1739845031423408,1.1739845031423408,1.1739845031423408, \
+            quaternions[i][0],quaternions[i][1], quaternions[i][2],quaternions[i][3]))
+
     out.write('\n')
     out.write('# Bond topology\n')
     out.write('Bonds\n')
     out.write('\n')
 
-    for i in xrange(nbonds):
-	out.write("%d  %d  %d  %d\n" % (i+1,1,bonds[i][0],bonds[i][1]))
+    for i in range(nbonds):
+        out.write("%d  %d  %d  %d\n" % (i+1,1,bonds[i][0],bonds[i][1]))
 
     out.close()
 
-    print >> sys.stdout, "## Wrote data to 'data.oxdna'"
-    print >> sys.stdout, "## DONE"
+    print("## Wrote data to 'data.oxdna'", file=sys.stdout)
+    print("## DONE", file=sys.stdout)
 
 # call the above main() function, which executes the program
 read_strands (infile)
@@ -627,4 +624,6 @@ runtime = end_time-start_time
 hours = runtime/3600
 minutes = (runtime-np.rint(hours)*3600)/60
 seconds = (runtime-np.rint(hours)*3600-np.rint(minutes)*60)%60
-print >> sys.stdout, "## Total runtime %ih:%im:%.2fs" % (hours,minutes,seconds)
+print( "## Total runtime %ih:%im:%.2fs" % (hours,minutes,seconds), file=sys.stdout)
+
+
diff --git a/examples/PACKAGES/cgdna/util/generate_simple.py b/examples/PACKAGES/cgdna/util/generate_simple.py
index 33cf1ee7f5..7702bfc7f5 100644
--- a/examples/PACKAGES/cgdna/util/generate_simple.py
+++ b/examples/PACKAGES/cgdna/util/generate_simple.py
@@ -1,5 +1,8 @@
 # Setup tool for oxDNA input in LAMMPS format.
 
+# for python2/3 compatibility
+from __future__ import print_function
+
 import math,numpy as np,sys,os
 
 # system size
@@ -250,59 +253,59 @@ def duplex_array():
       qrot3=math.sin(0.5*twist)
 
       for letter in strand[2]:
-	temp1=[]
-	temp2=[]
+        temp1=[]
+        temp2=[]
 
-	temp1.append(nt2num[letter])
-	temp2.append(compnt2num[letter])
+        temp1.append(nt2num[letter])
+        temp2.append(compnt2num[letter])
 
-	temp1.append([posx1,posy1,posz1])
-	temp2.append([posx2,posy2,posz2])
+        temp1.append([posx1,posy1,posz1])
+        temp2.append([posx2,posy2,posz2])
 
-	vel=[0,0,0,0,0,0]
-	temp1.append(vel)
-	temp2.append(vel)
+        vel=[0,0,0,0,0,0]
+        temp1.append(vel)
+        temp2.append(vel)
 
-	temp1.append(shape)
-	temp2.append(shape)
+        temp1.append(shape)
+        temp2.append(shape)
 
-	temp1.append(quat1)
-	temp2.append(quat2)
+        temp1.append(quat1)
+        temp2.append(quat2)
 
-	quat1_0 = quat1[0]*qrot0 - quat1[1]*qrot1 - quat1[2]*qrot2 - quat1[3]*qrot3 
-	quat1_1 = quat1[0]*qrot1 + quat1[1]*qrot0 + quat1[2]*qrot3 - quat1[3]*qrot2 
-	quat1_2 = quat1[0]*qrot2 + quat1[2]*qrot0 + quat1[3]*qrot1 - quat1[1]*qrot3 
-	quat1_3 = quat1[0]*qrot3 + quat1[3]*qrot0 + quat1[1]*qrot2 + quat1[2]*qrot1 
+        quat1_0 = quat1[0]*qrot0 - quat1[1]*qrot1 - quat1[2]*qrot2 - quat1[3]*qrot3
+        quat1_1 = quat1[0]*qrot1 + quat1[1]*qrot0 + quat1[2]*qrot3 - quat1[3]*qrot2
+        quat1_2 = quat1[0]*qrot2 + quat1[2]*qrot0 + quat1[3]*qrot1 - quat1[1]*qrot3
+        quat1_3 = quat1[0]*qrot3 + quat1[3]*qrot0 + quat1[1]*qrot2 + quat1[2]*qrot1
 
-	quat1 = [quat1_0,quat1_1,quat1_2,quat1_3]
+        quat1 = [quat1_0,quat1_1,quat1_2,quat1_3]
 
-	posx1=axisx - dcomh*(quat1[0]**2+quat1[1]**2-quat1[2]**2-quat1[3]**2)
-	posy1=axisy - dcomh*(2*(quat1[1]*quat1[2]+quat1[0]*quat1[3]))
-	posz1=posz1+risez
+        posx1=axisx - dcomh*(quat1[0]**2+quat1[1]**2-quat1[2]**2-quat1[3]**2)
+        posy1=axisy - dcomh*(2*(quat1[1]*quat1[2]+quat1[0]*quat1[3]))
+        posz1=posz1+risez
 
-	quat2_0 = quat2[0]*qrot0 - quat2[1]*qrot1 - quat2[2]*qrot2 + quat2[3]*qrot3 
-	quat2_1 = quat2[0]*qrot1 + quat2[1]*qrot0 - quat2[2]*qrot3 - quat2[3]*qrot2 
-	quat2_2 = quat2[0]*qrot2 + quat2[2]*qrot0 + quat2[3]*qrot1 + quat2[1]*qrot3 
-	quat2_3 =-quat2[0]*qrot3 + quat2[3]*qrot0 + quat2[1]*qrot2 + quat2[2]*qrot1 
+        quat2_0 = quat2[0]*qrot0 - quat2[1]*qrot1 - quat2[2]*qrot2 + quat2[3]*qrot3
+        quat2_1 = quat2[0]*qrot1 + quat2[1]*qrot0 - quat2[2]*qrot3 - quat2[3]*qrot2
+        quat2_2 = quat2[0]*qrot2 + quat2[2]*qrot0 + quat2[3]*qrot1 + quat2[1]*qrot3
+        quat2_3 =-quat2[0]*qrot3 + quat2[3]*qrot0 + quat2[1]*qrot2 + quat2[2]*qrot1
 
-	quat2 = [quat2_0,quat2_1,quat2_2,quat2_3]
+        quat2 = [quat2_0,quat2_1,quat2_2,quat2_3]
 
-	posx2=axisx + dcomh*(quat1[0]**2+quat1[1]**2-quat1[2]**2-quat1[3]**2)
-	posy2=axisy + dcomh*(2*(quat1[1]*quat1[2]+quat1[0]*quat1[3]))
-	posz2=posz1
+        posx2=axisx + dcomh*(quat1[0]**2+quat1[1]**2-quat1[2]**2-quat1[3]**2)
+        posy2=axisy + dcomh*(2*(quat1[1]*quat1[2]+quat1[0]*quat1[3]))
+        posz2=posz1
 
-	if (len(nucleotide)+1 > strandstart):
-	  topology.append([1,len(nucleotide),len(nucleotide)+1])
-	  comptopo.append([1,len(nucleotide)+len(strand[2]),len(nucleotide)+len(strand[2])+1])
+        if (len(nucleotide)+1 > strandstart):
+          topology.append([1,len(nucleotide),len(nucleotide)+1])
+          comptopo.append([1,len(nucleotide)+len(strand[2]),len(nucleotide)+len(strand[2])+1])
 
-	nucleotide.append(temp1)
-	compstrand.append(temp2)
+        nucleotide.append(temp1)
+        compstrand.append(temp2)
 
       for ib in range(len(compstrand)):
-	nucleotide.append(compstrand[len(compstrand)-1-ib])
+        nucleotide.append(compstrand[len(compstrand)-1-ib])
 
       for ib in range(len(comptopo)):
-	topology.append(comptopo[ib])
+        topology.append(comptopo[ib])
 
   return
 
diff --git a/examples/PACKAGES/dpd-meso/mdpd/in.mdpd b/examples/PACKAGES/dpd-meso/mdpd/in.mdpd
index b0740c8227..2c740f4127 100644
--- a/examples/PACKAGES/dpd-meso/mdpd/in.mdpd
+++ b/examples/PACKAGES/dpd-meso/mdpd/in.mdpd
@@ -16,6 +16,7 @@ neighbor	        0.3 bin
 neigh_modify        every 1 delay 0 check yes
 
 atom_style	        mdpd
+comm_modify vel yes
 
 region     mdpd     block -25 25 -10 10 -10 10 units box
 create_box          1 mdpd
diff --git a/examples/PACKAGES/interlayer/aip_water_2dm/CBNOH.aip.water.2dm b/examples/PACKAGES/interlayer/aip_water_2dm/CBNOH.aip.water.2dm
new file mode 120000
index 0000000000..60c9c3a8f4
--- /dev/null
+++ b/examples/PACKAGES/interlayer/aip_water_2dm/CBNOH.aip.water.2dm
@@ -0,0 +1 @@
+../../../../potentials/CBNOH.aip.water.2dm
\ No newline at end of file
diff --git a/examples/PACKAGES/interlayer/aip_water_2dm/COH.aip.water.2dm b/examples/PACKAGES/interlayer/aip_water_2dm/COH.aip.water.2dm
deleted file mode 120000
index fe5cccfcd2..0000000000
--- a/examples/PACKAGES/interlayer/aip_water_2dm/COH.aip.water.2dm
+++ /dev/null
@@ -1 +0,0 @@
-../../../../potentials/COH.aip.water.2dm
\ No newline at end of file
diff --git a/examples/PACKAGES/interlayer/ilp_tmds/MoS2.ILP b/examples/PACKAGES/interlayer/ilp_tmds/MoS2.ILP
deleted file mode 120000
index 75dd894eef..0000000000
--- a/examples/PACKAGES/interlayer/ilp_tmds/MoS2.ILP
+++ /dev/null
@@ -1 +0,0 @@
-../../../../potentials/MoS2.ILP
\ No newline at end of file
diff --git a/examples/PACKAGES/interlayer/ilp_tmds/TMD.ILP b/examples/PACKAGES/interlayer/ilp_tmds/TMD.ILP
new file mode 120000
index 0000000000..70f7ea18df
--- /dev/null
+++ b/examples/PACKAGES/interlayer/ilp_tmds/TMD.ILP
@@ -0,0 +1 @@
+../../../../potentials/TMD.ILP
\ No newline at end of file
diff --git a/examples/PACKAGES/interlayer/ilp_tmds/in.mos2 b/examples/PACKAGES/interlayer/ilp_tmds/in.mos2
index b77f2fe719..0db4ec12d5 100644
--- a/examples/PACKAGES/interlayer/ilp_tmds/in.mos2
+++ b/examples/PACKAGES/interlayer/ilp_tmds/in.mos2
@@ -12,7 +12,7 @@ mass            4 95.94
 pair_style     	hybrid/overlay sw/mod sw/mod ilp/tmd 16.0
 pair_coeff     	* * sw/mod 1 tmd.sw.mod Mo S S NULL NULL NULL
 pair_coeff     	* * sw/mod 2 tmd.sw.mod NULL NULL NULL Mo S S
-pair_coeff     	* * ilp/tmd  MoS2.ILP Mo S S Mo S S
+pair_coeff     	* * ilp/tmd  TMD.ILP Mo S S Mo S S
 
 # Calculate the pair potential
 compute   	0 all pair ilp/tmd
diff --git a/examples/PACKAGES/pace/README.md b/examples/PACKAGES/pace/README.md
new file mode 100644
index 0000000000..66254b5241
--- /dev/null
+++ b/examples/PACKAGES/pace/README.md
@@ -0,0 +1,9 @@
+# This folder contains examples for pace in LAMMPS
+
+
+## Compute pace usage
+compute/latte_cell_0.data            # lammps data file with C-H-O structure
+compute/latte_cell_0.xyz             # xyz file with C-H-O structure 
+compute/coupling_coefficients.yace   # .yace file containing coupling coefficients (or ACE potential parameters)
+compute/in.compute                   # input file for calling `compute pace`
+
diff --git a/examples/PACKAGES/pace/compute/coupling_coefficients.yace b/examples/PACKAGES/pace/compute/coupling_coefficients.yace
new file mode 100644
index 0000000000..2953222216
--- /dev/null
+++ b/examples/PACKAGES/pace/compute/coupling_coefficients.yace
@@ -0,0 +1,294 @@
+elements: [H, N, O] 
+E0: [0.000000, 0.000000, 0.000000] 
+deltaSplineBins: 0.001000 
+embeddings:
+  0: {ndensity: 1, FS_parameters: [1.0, 1.0], npoti: FinnisSinclair, rho_core_cutoff: 100000, drho_core_cutoff: 250}
+  1: {ndensity: 1, FS_parameters: [1.0, 1.0], npoti: FinnisSinclair, rho_core_cutoff: 100000, drho_core_cutoff: 250}
+  2: {ndensity: 1, FS_parameters: [1.0, 1.0], npoti: FinnisSinclair, rho_core_cutoff: 100000, drho_core_cutoff: 250}
+bonds:
+  [0, 0]: {nradmax: 2, lmax: 2, nradbasemax: 2, radbasename: ChebExpCos, radparameters: [3.3], radcoefficients: [[[1, 0], [1, 0], [1, 0]], [[0, 1], [0, 1], [0, 1]]], prehc: 0, lambdahc: 3.3, rcut: 5.0, dcut: 0.01, rcut_in: 0.1, dcut_in: 0.01, inner_cutoff_type: distance}
+  [0, 1]: {nradmax: 2, lmax: 2, nradbasemax: 2, radbasename: ChebExpCos, radparameters: [3.3], radcoefficients: [[[1, 0], [1, 0], [1, 0]], [[0, 1], [0, 1], [0, 1]]], prehc: 0, lambdahc: 3.3, rcut: 5.5, dcut: 0.01, rcut_in: 0.1, dcut_in: 0.01, inner_cutoff_type: distance}
+  [0, 2]: {nradmax: 2, lmax: 2, nradbasemax: 2, radbasename: ChebExpCos, radparameters: [3.3], radcoefficients: [[[1, 0], [1, 0], [1, 0]], [[0, 1], [0, 1], [0, 1]]], prehc: 0, lambdahc: 3.3, rcut: 5.7, dcut: 0.01, rcut_in: 0.1, dcut_in: 0.01, inner_cutoff_type: distance}
+  [1, 0]: {nradmax: 2, lmax: 2, nradbasemax: 2, radbasename: ChebExpCos, radparameters: [3.3], radcoefficients: [[[1, 0], [1, 0], [1, 0]], [[0, 1], [0, 1], [0, 1]]], prehc: 0, lambdahc: 3.3, rcut: 5.5, dcut: 0.01, rcut_in: 0.1, dcut_in: 0.01, inner_cutoff_type: distance}
+  [1, 1]: {nradmax: 2, lmax: 2, nradbasemax: 2, radbasename: ChebExpCos, radparameters: [3.3], radcoefficients: [[[1, 0], [1, 0], [1, 0]], [[0, 1], [0, 1], [0, 1]]], prehc: 0, lambdahc: 3.3, rcut: 4.4, dcut: 0.01, rcut_in: 0.1, dcut_in: 0.01, inner_cutoff_type: distance}
+  [1, 2]: {nradmax: 2, lmax: 2, nradbasemax: 2, radbasename: ChebExpCos, radparameters: [3.3], radcoefficients: [[[1, 0], [1, 0], [1, 0]], [[0, 1], [0, 1], [0, 1]]], prehc: 0, lambdahc: 3.3, rcut: 5.7, dcut: 0.01, rcut_in: 0.1, dcut_in: 0.01, inner_cutoff_type: distance}
+  [2, 0]: {nradmax: 2, lmax: 2, nradbasemax: 2, radbasename: ChebExpCos, radparameters: [3.3], radcoefficients: [[[1, 0], [1, 0], [1, 0]], [[0, 1], [0, 1], [0, 1]]], prehc: 0, lambdahc: 3.3, rcut: 5.7, dcut: 0.01, rcut_in: 0.1, dcut_in: 0.01, inner_cutoff_type: distance}
+  [2, 1]: {nradmax: 2, lmax: 2, nradbasemax: 2, radbasename: ChebExpCos, radparameters: [3.3], radcoefficients: [[[1, 0], [1, 0], [1, 0]], [[0, 1], [0, 1], [0, 1]]], prehc: 0, lambdahc: 3.3, rcut: 5.7, dcut: 0.01, rcut_in: 0.1, dcut_in: 0.01, inner_cutoff_type: distance}
+  [2, 2]: {nradmax: 2, lmax: 2, nradbasemax: 2, radbasename: ChebExpCos, radparameters: [3.3], radcoefficients: [[[1, 0], [1, 0], [1, 0]], [[0, 1], [0, 1], [0, 1]]], prehc: 0, lambdahc: 3.3, rcut: 5.5, dcut: 0.01, rcut_in: 0.1, dcut_in: 0.01, inner_cutoff_type: distance}
+functions:
+  0:
+    - {mu0: 0, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [0], ns: [1], ls: [0], ms_combs: [0], ctildes: [1.0]}
+    - {mu0: 0, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [0], ns: [2], ls: [0], ms_combs: [0], ctildes: [1.0]}
+    - {mu0: 0, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [1], ns: [2], ls: [0], ms_combs: [0], ctildes: [1.0]}
+    - {mu0: 0, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [1], ns: [1], ls: [0], ms_combs: [0], ctildes: [1.0]}
+    - {mu0: 0, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [2], ns: [1], ls: [0], ms_combs: [0], ctildes: [1.0]}
+    - {mu0: 0, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [2], ns: [2], ls: [0], ms_combs: [0], ctildes: [1.0]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 2], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 2], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 1], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 1], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 2], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 2], ns: [2, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 1], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 2], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 2], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 0], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 0], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 1], ns: [2, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 2], ns: [2, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [2, 2], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 1], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 0], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 1], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 1], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 2], ns: [2, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 0], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 1], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [2, 2], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [2, 2], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 2], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 1], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 2], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 0], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 1], ns: [2, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 2], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 2], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 1], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 0], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 2], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [2, 2], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 1], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 1], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 2], ns: [2, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 2], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 1], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [2, 2], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [2, 2], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 0, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 2], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [1, 1, 2], ns: [1, 1, 1], ls: [1, 2, 1], ms_combs: [-1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -2, 1, 1, -1, 0, 1, 0, -1], ctildes: [0.10540925533894599, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.21081851067789198, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.10540925533894599]}
+    - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 2, 2], ns: [1, 1, 1], ls: [2, 1, 1], ms_combs: [-2, 1, 1, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 2, -1, -1], ctildes: [0.25819888974716115, -0.18257418583505536, -0.18257418583505536, 0.10540925533894599, 0.21081851067789198, 0.10540925533894599, -0.18257418583505536, -0.18257418583505536, 0.25819888974716115]}
+    - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 1, 2], ns: [1, 1, 1], ls: [1, 2, 1], ms_combs: [-1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -2, 1, 1, -1, 0, 1, 0, -1], ctildes: [0.10540925533894599, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.21081851067789198, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.10540925533894599]}
+    - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 0], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]}
+    - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [1, 1, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]}
+    - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [1, 1, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]}
+    - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [1, 2, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]}
+    - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]}
+    - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 1, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]}
+    - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [1, 2, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]}
+    - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 1, 1], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]}
+    - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [1, 1, 1], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]}
+    - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 0, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]}
+    - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [2, 2, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]}
+    - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 0, 0], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]}
+    - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 0, 2], ns: [1, 1, 1], ls: [1, 2, 1], ms_combs: [-1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -2, 1, 1, -1, 0, 1, 0, -1], ctildes: [0.10540925533894599, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.21081851067789198, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.10540925533894599]}
+    - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 1, 2], ns: [1, 1, 1], ls: [2, 1, 1], ms_combs: [-2, 1, 1, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 2, -1, -1], ctildes: [0.25819888974716115, -0.18257418583505536, -0.18257418583505536, 0.10540925533894599, 0.21081851067789198, 0.10540925533894599, -0.18257418583505536, -0.18257418583505536, 0.25819888974716115]}
+    - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 2, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]}
+    - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 0, 1], ns: [1, 1, 1], ls: [1, 2, 1], ms_combs: [-1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -2, 1, 1, -1, 0, 1, 0, -1], ctildes: [0.10540925533894599, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.21081851067789198, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.10540925533894599]}
+    - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 1, 1], ns: [1, 1, 1], ls: [2, 1, 1], ms_combs: [-2, 1, 1, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 2, -1, -1], ctildes: [0.25819888974716115, -0.18257418583505536, -0.18257418583505536, 0.10540925533894599, 0.21081851067789198, 0.10540925533894599, -0.18257418583505536, -0.18257418583505536, 0.25819888974716115]}
+    - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [1, 1, 1], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]}
+    - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 0, 1], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]}
+    - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 1, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]}
+    - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [1, 2, 2], ns: [1, 1, 1], ls: [2, 1, 1], ms_combs: [-2, 1, 1, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 2, -1, -1], ctildes: [0.25819888974716115, -0.18257418583505536, -0.18257418583505536, 0.10540925533894599, 0.21081851067789198, 0.10540925533894599, -0.18257418583505536, -0.18257418583505536, 0.25819888974716115]}
+    - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 1, 1], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]}
+    - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 2, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]}
+    - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [2, 2, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]}
+    - {mu0: 0, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 1], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]}
+    - {mu0: 0, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 0, 0], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 0, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 0, 1], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 0, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 0, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 0, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 1, 1], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 0, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 1, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 0, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 0, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 1, 1, 1], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 0, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 1, 1, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 0, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 1, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 0, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 2, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 0, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [1, 1, 1, 1], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 0, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [1, 1, 1, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 0, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [1, 1, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 0, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [1, 2, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 0, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [2, 2, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+  1:
+    - {mu0: 1, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [1], ns: [1], ls: [0], ms_combs: [0], ctildes: [1.0]}
+    - {mu0: 1, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [1], ns: [2], ls: [0], ms_combs: [0], ctildes: [1.0]}
+    - {mu0: 1, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [2], ns: [1], ls: [0], ms_combs: [0], ctildes: [1.0]}
+    - {mu0: 1, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [2], ns: [2], ls: [0], ms_combs: [0], ctildes: [1.0]}
+    - {mu0: 1, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [0], ns: [2], ls: [0], ms_combs: [0], ctildes: [1.0]}
+    - {mu0: 1, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [0], ns: [1], ls: [0], ms_combs: [0], ctildes: [1.0]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 1], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [2, 2], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 2], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 1], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [2, 2], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 1], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 2], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 2], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 1], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 1], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 1], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 1], ns: [2, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 2], ns: [2, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 1], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 2], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 0], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 2], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 1], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [2, 2], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 2], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 1], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [2, 2], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 0], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 0], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [2, 2], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 2], ns: [2, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 2], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 0], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 2], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 0], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 2], ns: [2, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 2], ns: [2, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [2, 2], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 2], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 2], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 1], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 2], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 0], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 1], ns: [2, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 1], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 2], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 1, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 1], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 0, 1], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]}
+    - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [1, 1, 2], ns: [1, 1, 1], ls: [1, 2, 1], ms_combs: [-1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -2, 1, 1, -1, 0, 1, 0, -1], ctildes: [0.10540925533894599, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.21081851067789198, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.10540925533894599]}
+    - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [1, 1, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]}
+    - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 0, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]}
+    - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [1, 2, 2], ns: [1, 1, 1], ls: [2, 1, 1], ms_combs: [-2, 1, 1, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 2, -1, -1], ctildes: [0.25819888974716115, -0.18257418583505536, -0.18257418583505536, 0.10540925533894599, 0.21081851067789198, 0.10540925533894599, -0.18257418583505536, -0.18257418583505536, 0.25819888974716115]}
+    - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 1, 1], ns: [1, 1, 1], ls: [2, 1, 1], ms_combs: [-2, 1, 1, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 2, -1, -1], ctildes: [0.25819888974716115, -0.18257418583505536, -0.18257418583505536, 0.10540925533894599, 0.21081851067789198, 0.10540925533894599, -0.18257418583505536, -0.18257418583505536, 0.25819888974716115]}
+    - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 0, 0], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]}
+    - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 1, 2], ns: [1, 1, 1], ls: [2, 1, 1], ms_combs: [-2, 1, 1, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 2, -1, -1], ctildes: [0.25819888974716115, -0.18257418583505536, -0.18257418583505536, 0.10540925533894599, 0.21081851067789198, 0.10540925533894599, -0.18257418583505536, -0.18257418583505536, 0.25819888974716115]}
+    - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [1, 1, 1], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]}
+    - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [1, 1, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]}
+    - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 1, 1], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]}
+    - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 1, 1], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]}
+    - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [1, 2, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]}
+    - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 1, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]}
+    - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 2, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]}
+    - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 1, 2], ns: [1, 1, 1], ls: [1, 2, 1], ms_combs: [-1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -2, 1, 1, -1, 0, 1, 0, -1], ctildes: [0.10540925533894599, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.21081851067789198, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.10540925533894599]}
+    - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 0], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]}
+    - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 0, 1], ns: [1, 1, 1], ls: [1, 2, 1], ms_combs: [-1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -2, 1, 1, -1, 0, 1, 0, -1], ctildes: [0.10540925533894599, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.21081851067789198, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.10540925533894599]}
+    - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 2, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]}
+    - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [2, 2, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]}
+    - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 1, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]}
+    - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 2, 2], ns: [1, 1, 1], ls: [2, 1, 1], ms_combs: [-2, 1, 1, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 2, -1, -1], ctildes: [0.25819888974716115, -0.18257418583505536, -0.18257418583505536, 0.10540925533894599, 0.21081851067789198, 0.10540925533894599, -0.18257418583505536, -0.18257418583505536, 0.25819888974716115]}
+    - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]}
+    - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 1], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]}
+    - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 0, 2], ns: [1, 1, 1], ls: [1, 2, 1], ms_combs: [-1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -2, 1, 1, -1, 0, 1, 0, -1], ctildes: [0.10540925533894599, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.21081851067789198, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.10540925533894599]}
+    - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [1, 2, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]}
+    - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [2, 2, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]}
+    - {mu0: 1, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [1, 1, 1], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]}
+    - {mu0: 1, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 0, 0], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 1, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 0, 1], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 1, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 0, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 1, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 1, 1], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 1, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 1, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 1, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 1, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 1, 1, 1], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 1, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 1, 1, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 1, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 1, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 1, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 2, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 1, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [1, 1, 1, 1], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 1, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [1, 1, 1, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 1, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [1, 1, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 1, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [1, 2, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 1, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [2, 2, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+  2:
+    - {mu0: 2, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [0], ns: [1], ls: [0], ms_combs: [0], ctildes: [1.0]}
+    - {mu0: 2, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [1], ns: [1], ls: [0], ms_combs: [0], ctildes: [1.0]}
+    - {mu0: 2, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [0], ns: [2], ls: [0], ms_combs: [0], ctildes: [1.0]}
+    - {mu0: 2, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [1], ns: [2], ls: [0], ms_combs: [0], ctildes: [1.0]}
+    - {mu0: 2, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [2], ns: [1], ls: [0], ms_combs: [0], ctildes: [1.0]}
+    - {mu0: 2, rank: 1, ndensity: 1, num_ms_combs: 1, mus: [2], ns: [2], ls: [0], ms_combs: [0], ctildes: [1.0]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 1], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 2], ns: [2, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 1], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 1], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 2], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 1], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 1], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 1], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 1], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 1], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 2], ns: [2, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 1], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 2], ns: [2, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [2, 2], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 0], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 2], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 1], ns: [2, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 2], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 2], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [2, 2], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 1], ns: [2, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [2, 2], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 0], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 2], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 0], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 0], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 1], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 2], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 0], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 1], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 2], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [0, 2], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 2], ns: [1, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [2, 2], ns: [1, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 1], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [2, 2], ns: [2, 2], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 0], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 2], ns: [1, 1], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [1, 2], ns: [2, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 3, mus: [1, 2], ns: [2, 1], ls: [1, 1], ms_combs: [-1, 1, 0, 0, 1, -1], ctildes: [0.5773502691896257, -0.5773502691896257, 0.5773502691896257]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [0, 2], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 2, rank: 2, ndensity: 1, num_ms_combs: 5, mus: [2, 2], ns: [1, 2], ls: [2, 2], ms_combs: [-2, 2, -1, 1, 0, 0, 1, -1, 2, -2], ctildes: [0.4472135954999579, -0.4472135954999579, 0.447213595499958, -0.4472135954999579, 0.4472135954999579]}
+    - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [1, 2, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]}
+    - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 2, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]}
+    - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 0, 0], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]}
+    - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 2, 2], ns: [1, 1, 1], ls: [2, 1, 1], ms_combs: [-2, 1, 1, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 2, -1, -1], ctildes: [0.25819888974716115, -0.18257418583505536, -0.18257418583505536, 0.10540925533894599, 0.21081851067789198, 0.10540925533894599, -0.18257418583505536, -0.18257418583505536, 0.25819888974716115]}
+    - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [1, 1, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]}
+    - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]}
+    - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [1, 1, 1], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]}
+    - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 0, 2], ns: [1, 1, 1], ls: [1, 2, 1], ms_combs: [-1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -2, 1, 1, -1, 0, 1, 0, -1], ctildes: [0.10540925533894599, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.21081851067789198, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.10540925533894599]}
+    - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 1, 1], ns: [1, 1, 1], ls: [2, 1, 1], ms_combs: [-2, 1, 1, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 2, -1, -1], ctildes: [0.25819888974716115, -0.18257418583505536, -0.18257418583505536, 0.10540925533894599, 0.21081851067789198, 0.10540925533894599, -0.18257418583505536, -0.18257418583505536, 0.25819888974716115]}
+    - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 1, 2], ns: [1, 1, 1], ls: [2, 1, 1], ms_combs: [-2, 1, 1, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 2, -1, -1], ctildes: [0.25819888974716115, -0.18257418583505536, -0.18257418583505536, 0.10540925533894599, 0.21081851067789198, 0.10540925533894599, -0.18257418583505536, -0.18257418583505536, 0.25819888974716115]}
+    - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 0, 1], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]}
+    - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 2, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]}
+    - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 0, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]}
+    - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 0, 1], ns: [1, 1, 1], ls: [1, 2, 1], ms_combs: [-1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -2, 1, 1, -1, 0, 1, 0, -1], ctildes: [0.10540925533894599, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.21081851067789198, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.10540925533894599]}
+    - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 1, 2], ns: [1, 1, 1], ls: [1, 2, 1], ms_combs: [-1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -2, 1, 1, -1, 0, 1, 0, -1], ctildes: [0.10540925533894599, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.21081851067789198, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.10540925533894599]}
+    - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 1], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]}
+    - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 0], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]}
+    - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 1, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]}
+    - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [2, 2, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]}
+    - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [1, 1, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]}
+    - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [2, 2, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]}
+    - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 1, 2], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]}
+    - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [0, 1, 1], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]}
+    - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [1, 1, 2], ns: [1, 1, 1], ls: [1, 2, 1], ms_combs: [-1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -2, 1, 1, -1, 0, 1, 0, -1], ctildes: [0.10540925533894599, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.21081851067789198, -0.18257418583505536, 0.25819888974716115, -0.18257418583505536, 0.10540925533894599]}
+    - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [1, 1, 1], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]}
+    - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [1, 2, 2], ns: [1, 1, 1], ls: [2, 1, 1], ms_combs: [-2, 1, 1, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 2, -1, -1], ctildes: [0.25819888974716115, -0.18257418583505536, -0.18257418583505536, 0.10540925533894599, 0.21081851067789198, 0.10540925533894599, -0.18257418583505536, -0.18257418583505536, 0.25819888974716115]}
+    - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 9, mus: [1, 2, 2], ns: [1, 1, 1], ls: [1, 1, 2], ms_combs: [-1, -1, 2, -1, 0, 1, -1, 1, 0, 0, -1, 1, 0, 0, 0, 0, 1, -1, 1, -1, 0, 1, 0, -1, 1, 1, -2], ctildes: [0.19999999999999998, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.16329931618554522, -0.1414213562373095, 0.08164965809277261, -0.1414213562373095, 0.19999999999999998]}
+    - {mu0: 2, rank: 3, ndensity: 1, num_ms_combs: 19, mus: [0, 1, 1], ns: [1, 1, 1], ls: [2, 2, 2], ms_combs: [-2, 0, 2, -2, 1, 1, -2, 2, 0, -1, -1, 2, -1, 0, 1, -1, 1, 0, -1, 2, -1, 0, -2, 2, 0, -1, 1, 0, 0, 0, 0, 1, -1, 0, 2, -2, 1, -2, 1, 1, -1, 0, 1, 0, -1, 1, 1, -2, 2, -2, 0, 2, -1, -1, 2, 0, -2], ctildes: [0.10690449676496976, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, 0.05345224838248488, -0.10690449676496976, 0.05345224838248488, 0.10690449676496976, -0.1309307341415954, 0.05345224838248488, 0.05345224838248488, -0.1309307341415954, 0.10690449676496976, -0.1309307341415954, 0.10690449676496976]}
+    - {mu0: 2, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 0, 0], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 2, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 0, 1], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 2, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 0, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 2, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 1, 1], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 2, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 1, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 2, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 0, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 2, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 1, 1, 1], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 2, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 1, 1, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 2, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 1, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 2, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [0, 2, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 2, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [1, 1, 1, 1], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 2, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [1, 1, 1, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 2, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [1, 1, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 2, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [1, 2, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
+    - {mu0: 2, rank: 4, ndensity: 1, num_ms_combs: 19, mus: [2, 2, 2, 2], ns: [1, 1, 1, 1], ls: [1, 1, 1, 1], ms_combs: [-1, -1, 1, 1, -1, 0, 0, 1, -1, 0, 1, 0, -1, 1, -1, 1, -1, 1, 0, 0, -1, 1, 1, -1, 0, -1, 0, 1, 0, -1, 1, 0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 1, -1, 0, 1, -1, 0, 0, 1, 0, -1, 1, -1, -1, 1, 1, -1, 0, 0, 1, -1, 1, -1, 1, 0, -1, 0, 1, 0, 0, -1, 1, 1, -1, -1], ctildes: [0.0, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, -0.3333333333333333, 0.3333333333333333, -0.3333333333333333, 0.0, 0.0, 0.3333333333333333, -0.3333333333333333, 0.3333333333333333, 0.0, 0.0, 0.0]}
diff --git a/examples/PACKAGES/pace/compute/in.compute b/examples/PACKAGES/pace/compute/in.compute
new file mode 100644
index 0000000000..a0ef25f606
--- /dev/null
+++ b/examples/PACKAGES/pace/compute/in.compute
@@ -0,0 +1,22 @@
+#info all out log
+units  metal
+atom_style  atomic
+boundary    p p p
+atom_modify    map hash
+boundary  p p p
+read_data  latte_cell_0.data
+mass  1 1.00
+mass  2 14.00
+mass  3 15.999
+
+        # potential settings
+
+pair_style     zero 5.7
+pair_coeff     * *
+
+compute     pace all pace coupling_coefficients.yace 1 0
+
+thermo 1 
+thermo_style  custom step temp c_pace[1][183]
+
+run 0
diff --git a/examples/PACKAGES/pace/compute/latte_cell_0.data b/examples/PACKAGES/pace/compute/latte_cell_0.data
new file mode 100644
index 0000000000..b18ec13a4d
--- /dev/null
+++ b/examples/PACKAGES/pace/compute/latte_cell_0.data
@@ -0,0 +1,172 @@
+latte_cell_0.data (written by ASE) 
+
+161 	 atoms 
+3  atom types
+0.0                      12  xlo xhi
+0.0                      12  ylo yhi
+0.0                      12  zlo zhi
+
+
+Atoms 
+
+     1   3      1.2688096799999999      2.0079938400000001      2.7446829899999998
+     2   1      1.5343068200000001      2.0638766500000001      3.7105626900000002
+     3   1      1.7848279600000001      2.6755003400000001      2.2268847200000002
+     4   1              1.56251195      1.1089126899999999      2.3978115199999999
+     5   1             11.61728216              5.71881094      2.4732045999999999
+     6   2      6.5501865600000002      4.7439566800000001      3.6526025500000001
+     7   1      6.4564895299999998      4.1571673000000002      2.6975267999999999
+     8   2               2.0835561              1.59406078      8.5498047600000007
+     9   1      1.1041162499999999      1.4971771599999999      8.1507879200000009
+    10   1              2.60115534      2.2945960400000001              7.95374187
+    11   1      1.9817723300000001      2.0194066400000001      9.5128239400000005
+    12   1     0.99333338000000004      3.6983907299999998      8.1903947899999991
+    13   3      4.9484070999999998      5.3645501400000004              9.16152503
+    14   1      9.0716170599999995      9.3748453999999999      4.2276462400000003
+    15   2     0.30864418999999998      7.7136657499999997      2.9274995599999998
+    16   1              0.47661671              10.1807211              3.71160091
+    17   1              1.07465334      7.8226921999999997      3.5771466900000002
+    18   1     0.38402249999999999      8.3770493300000002      2.1748437100000002
+    19   1      11.435413410000001      7.7903735999999997      3.4040245499999999
+    20   3      6.1570384599999999             10.25988474              3.50899568
+    21   1      5.5932224399999999      9.5632944700000007      3.1446559000000001
+    22   2      1.7785569000000001      7.6312579300000003      9.1488452299999992
+    23   1      2.5594048599999999              6.96832838      9.3069700199999996
+    24   1              2.12441551      8.4547986999999996      8.6428622900000001
+    25   1              1.04552782      7.1697722800000001      8.5894244999999998
+    26   1     0.34824445999999998             10.17844028      9.1629463799999993
+    27   3      5.9638830399999998      10.723709400000001      9.4568803900000002
+    28   1      6.5890835699999997      10.926486110000001      8.7981925800000003
+    29   2      7.1065890400000002              1.83029753      3.3452543600000002
+    30   1      6.9229304999999997      1.8465022099999999      4.3089037100000001
+    31   1      8.0780433600000006      1.9303052199999999      3.2089521400000001
+    32   1      5.6795373600000003      10.471831630000001      4.3244390499999996
+    33   1              6.82999417     0.95850113000000003      2.9815288199999999
+    34   2      11.383805349999999      4.6301225199999996      2.5393688399999998
+    35   1     0.37927047000000003      4.1943216300000001              2.59073807
+    36   3      5.2376410099999999              1.91523463      9.7240636400000007
+    37   1      4.7887202499999999      2.7036936499999999      9.5698142300000004
+    38   1      9.8129906699999996      9.2075140700000002              4.08265499
+    39   1      4.7980879500000002      1.1403494700000001      9.6739962800000008
+    40   1      5.4455845600000004      2.0102099999999998      10.620773509999999
+    41   3     0.90954338999999995      4.6240093199999999      8.3108110600000007
+    42   1      11.909735319999999      4.7483814000000004      8.2500624600000005
+    43   2      7.3223424499999998               7.5866457      3.0245226500000002
+    44   1      7.4470362200000002      8.3169646700000008      3.7148003300000001
+    45   1      6.9073805300000002               7.9385021      2.1723768699999999
+    46   1      5.5542868500000004      5.1176065800000003      3.7655251999999999
+    47   1      6.8124309500000004      6.7778811599999997      3.3973232499999999
+    48   2     0.29575823000000001             11.04303794      3.1016142499999999
+    49   1     0.86490721999999998             11.83879228      3.6389974500000002
+    50   3              6.85201686      8.0846369300000003      8.8762878799999996
+    51   1      7.3351430100000004      7.4263498700000001      9.3821674799999997
+    52   1      6.7919613300000004      7.7595477199999996      7.9716174799999999
+    53   1      3.8990487699999998      6.4283490399999996      8.8832409600000002
+    54   1              5.95997296      9.9329723199999993      9.4746654699999997
+    55   3      11.403658979999999      10.371960359999999      9.2766092199999992
+    56   1      10.983666360000001      9.5157199800000001      9.1478757300000009
+    57   3      1.5223279700000001      5.3327331100000004     0.57537605999999997
+    58   1      2.3815113999999999      5.7251991200000001     0.77945295999999997
+    59   1     0.92079957000000001      6.0931282299999996     0.62203253000000003
+    60   3             11.23490924      2.9153355200000002      6.7585064099999999
+    61   1      10.792340190000001      2.9755225099999998      5.9000018399999998
+    62   1      10.751242059999999      2.1896156000000002      7.1807401500000001
+    63   3             11.39027944      7.3462855600000001      6.7258299499999996
+    64   1             10.92025679              6.69831954      7.2776696599999999
+    65   1             11.12238028      7.0632020999999998      5.8394107799999997
+    66   3      8.4684319499999994             10.71736286             10.60018556
+    67   1      8.5672201599999998      11.420466080000001             11.25794033
+    68   1      9.0803109800000001             10.04804949              10.9406517
+    69   3      6.5851757299999996      9.9940623399999993      6.5574614899999997
+    70   1      7.0276325799999997             10.76096604      6.1723333699999996
+    71   1      5.7419327400000002      10.384583920000001      6.8228822999999998
+    72   3      1.7600546399999999              1.01771919      5.4926787700000004
+    73   1      1.9704209100000001              1.01748419      6.4429703700000003
+    74   1              1.42973007              0.11076352      5.3470644900000002
+    75   1              2.61130613      9.7034123700000006      10.450306830000001
+    76   3              3.05086908             10.48131334      10.085189310000001
+    77   1      3.0032693199999998             10.93357295      9.3652121000000008
+    78   3      6.4631532199999997      8.7652058299999993      11.967847969999999
+    79   1      6.4506808400000004      9.6596595300000008      11.588956019999999
+    80   1      5.6611629700000003      8.3535737700000006      11.638443329999999
+    81   3      1.1745999300000001      5.2420690800000003      5.1001449699999997
+    82   1              1.31932881      5.5236392399999996      6.0128966300000002
+    83   1     0.58053834999999998      4.4898134599999997      5.2325565000000003
+    84   3      6.7275549699999999     0.78840874999999999      7.3817280900000002
+    85   1      6.3887965600000003              1.54670982      6.8634520400000003
+    86   1      7.6791783999999996     0.94039024000000004      7.2649461000000004
+    87   3      8.5476657199999995   0.0064750299999999997      5.0450514100000001
+    88   1      8.8736290899999997             11.10484108      4.8601807900000003
+    89   1      8.0477597599999999     0.20198361000000001      4.2357399400000002
+    90   3      1.2895030000000001      8.4280097900000008             11.82038504
+    91   1      1.4766666399999999      8.1087866399999999             10.87290333
+    92   1              2.10220669      8.1947620200000006              0.29510553
+    93   3      9.6797907599999995      6.4207335499999996      4.3469150599999997
+    94   1      8.9271530099999996              6.72940235      3.7974122399999999
+    95   1             10.20024126      5.9167739199999998              3.66976111
+    96   3              3.57411616      6.7041021699999996      3.8825478499999999
+    97   1      2.8894899500000002      6.1560529800000001      4.2980848099999998
+    98   1      4.3613707699999997      6.4304732400000004      4.3804965400000002
+    99   3               4.7506556      11.441853350000001              1.12537088
+   100   1      4.0861192800000001      10.748523670000001      1.1923347099999999
+   101   1      5.5035301600000004      10.965688249999999     0.73651277000000004
+   102   3      9.5254526399999992      4.8994443900000002      8.3732284099999994
+   103   1      8.7885959800000002      4.3508043900000004      8.6632831400000008
+   104   1      9.6149067499999994      4.6084911499999999      7.4540068699999997
+   105   3      4.1970746700000001              1.34592128              3.67401439
+   106   1      4.9437011999999996     0.74406280999999996                3.514068
+   107   1      4.1905534900000001      1.7730376000000001      2.7963049400000002
+   108   3              1.88232618             11.95451227     0.60024434000000004
+   109   1      2.0464587299999999             11.02454723              0.38329541
+   110   1      1.1518493700000001     0.17494340999999999             11.99928285
+   111   3      3.7593842199999998             11.01685511      6.4562050800000002
+   112   1      3.2125414299999999              10.4553747      5.8894917099999997
+   113   1      3.4166026899999999      10.821557670000001      7.3296563900000002
+   114   3      9.7039841399999993              3.95001545      11.894743249999999
+   115   1      10.461666060000001      3.9163117999999999      11.285435229999999
+   116   1             10.09834695      4.4026997400000001     0.68193007999999999
+   117   3      8.5639596400000002      3.5169507499999999      5.6224104199999996
+   118   1      8.3966650299999994      2.6262214699999999      5.2638164300000003
+   119   1      7.9695371399999999      4.0825059799999996      5.0049407400000003
+   120   3      9.6736245000000007     0.48030482000000002      7.9257577799999996
+   121   1      9.6131980400000003      11.883419180000001      7.1680923999999999
+   122   1      9.9784050299999993             11.90238635              8.63894187
+   123   3      3.9424153099999999      6.9650296699999998             11.60258943
+   124   1      4.2767152700000004      6.8460048999999996      10.670225220000001
+   125   1      4.6570638500000001      6.5129461500000003    0.091159879999999999
+   126   3      3.0570173199999999      9.6631958499999993      3.6611250599999998
+   127   1      2.5400490100000002      9.5743355000000001      2.8444047600000002
+   128   1      2.9314874400000002      8.7809807200000005      4.0425234200000002
+   129   3      7.4549612700000001      5.8430850799999998      11.011384720000001
+   130   1      8.1675884100000005      5.4639182799999997             10.47644287
+   131   1      6.7135573700000002      5.8393818399999997      10.361099749999999
+   132   3      9.8029139300000008      7.9578901699999998             10.21404942
+   133   1             10.38910242      8.3400641400000008             10.87949429
+   134   1      9.0637612000000001      7.6392374099999998      10.756928869999999
+   135   3      4.4963435599999997      4.1067935799999997             11.73387805
+   136   1      4.5473727899999998      4.9577970899999997             11.19223377
+   137   1      5.3588818399999996      4.1756111699999998     0.20355936999999999
+   138   3      9.5923448100000002      7.3418014600000001              1.34856172
+   139   1      8.8715593300000002      7.4776837199999999              2.05040471
+   140   1      9.0443221699999992      7.2732200799999998              0.54011714
+   141   3      7.0350963100000001              3.22348773               0.7070824
+   142   1      7.1784470499999999      4.1340314300000003      1.0184109699999999
+   143   1      7.7787854400000001      2.7888888399999998              1.15838887
+   144   3      9.2124107800000008     0.48085899999999998              1.21751966
+   145   1      9.6620436499999993      11.657271079999999              1.45318397
+   146   1      9.9404883900000005              1.11619136              1.18684594
+   147   3              1.19704207      9.5859959200000002      6.6190888899999996
+   148   1              0.25606413      9.6737366500000004      6.8319340899999998
+   149   1      1.2690051899999999      8.6249354900000004      6.5480112500000001
+   150   3     0.78256133999999999      2.6040609300000002      11.453408359999999
+   151   1              0.61502181      3.5607405999999999             11.40300991
+   152   1              1.55655312      2.5457368800000002      10.866733030000001
+   153   3      5.8627936099999998      7.1217054800000001              5.89173203
+   154   1      6.3432410700000004      7.9400136699999999      6.0855840299999997
+   155   1      5.5077296699999998      6.8468306800000001      6.7436875799999996
+   156   3      10.887828150000001      9.9637482500000001     0.51092815999999996
+   157   1             11.78841776      10.322043069999999     0.44704989000000001
+   158   1             11.02688182      9.2051906700000004      1.0976661299999999
+   159   3              3.93073389      4.1645674499999998      5.7137877000000001
+   160   1      4.6884062999999996      3.5788913299999998      5.5644605800000004
+   161   1      4.2956948500000003      4.7644888099999996      6.3801669700000003
diff --git a/examples/PACKAGES/pace/compute/latte_cell_0.xyz b/examples/PACKAGES/pace/compute/latte_cell_0.xyz
new file mode 100644
index 0000000000..afe0a27f35
--- /dev/null
+++ b/examples/PACKAGES/pace/compute/latte_cell_0.xyz
@@ -0,0 +1,163 @@
+161
+Lattice="12.0 0.0 0.0 0.0 12.0 0.0 0.0 0.0 12.0" Properties=species:S:1:pos:R:3 pbc="T T T"
+O        1.26880968       2.00799384       2.74468299
+H        1.53430682       2.06387665       3.71056269
+H        1.78482796       2.67550034       2.22688472
+H        1.56251195       1.10891269       2.39781152
+H       11.61728216       5.71881094       2.47320460
+N        6.55018656       4.74395668       3.65260255
+H        6.45648953       4.15716730       2.69752680
+N        2.08355610       1.59406078       8.54980476
+H        1.10411625       1.49717716       8.15078792
+H        2.60115534       2.29459604       7.95374187
+H        1.98177233       2.01940664       9.51282394
+H        0.99333338       3.69839073       8.19039479
+O        4.94840710       5.36455014       9.16152503
+H        9.07161706       9.37484540       4.22764624
+N        0.30864419       7.71366575       2.92749956
+H        0.47661671      10.18072110       3.71160091
+H        1.07465334       7.82269220       3.57714669
+H        0.38402250       8.37704933       2.17484371
+H       11.43541341       7.79037360       3.40402455
+O        6.15703846      10.25988474       3.50899568
+H        5.59322244       9.56329447       3.14465590
+N        1.77855690       7.63125793       9.14884523
+H        2.55940486       6.96832838       9.30697002
+H        2.12441551       8.45479870       8.64286229
+H        1.04552782       7.16977228       8.58942450
+H        0.34824446      10.17844028       9.16294638
+O        5.96388304      10.72370940       9.45688039
+H        6.58908357      10.92648611       8.79819258
+N        7.10658904       1.83029753       3.34525436
+H        6.92293050       1.84650221       4.30890371
+H        8.07804336       1.93030522       3.20895214
+H        5.67953736      10.47183163       4.32443905
+H        6.82999417       0.95850113       2.98152882
+N       11.38380535       4.63012252       2.53936884
+H        0.37927047       4.19432163       2.59073807
+O        5.23764101       1.91523463       9.72406364
+H        4.78872025       2.70369365       9.56981423
+H        9.81299067       9.20751407       4.08265499
+H        4.79808795       1.14034947       9.67399628
+H        5.44558456       2.01021000      10.62077351
+O        0.90954339       4.62400932       8.31081106
+H       11.90973532       4.74838140       8.25006246
+N        7.32234245       7.58664570       3.02452265
+H        7.44703622       8.31696467       3.71480033
+H        6.90738053       7.93850210       2.17237687
+H        5.55428685       5.11760658       3.76552520
+H        6.81243095       6.77788116       3.39732325
+N        0.29575823      11.04303794       3.10161425
+H        0.86490722      11.83879228       3.63899745
+O        6.85201686       8.08463693       8.87628788
+H        7.33514301       7.42634987       9.38216748
+H        6.79196133       7.75954772       7.97161748
+H        3.89904877       6.42834904       8.88324096
+H        5.95997296       9.93297232       9.47466547
+O       11.40365898      10.37196036       9.27660922
+H       10.98366636       9.51571998       9.14787573
+O        1.52232797       5.33273311       0.57537606
+H        2.38151140       5.72519912       0.77945296
+H        0.92079957       6.09312823       0.62203253
+O       11.23490924       2.91533552       6.75850641
+H       10.79234019       2.97552251       5.90000184
+H       10.75124206       2.18961560       7.18074015
+O       11.39027944       7.34628556       6.72582995
+H       10.92025679       6.69831954       7.27766966
+H       11.12238028       7.06320210       5.83941078
+O        8.46843195      10.71736286      10.60018556
+H        8.56722016      11.42046608      11.25794033
+H        9.08031098      10.04804949      10.94065170
+O        6.58517573       9.99406234       6.55746149
+H        7.02763258      10.76096604       6.17233337
+H        5.74193274      10.38458392       6.82288230
+O        1.76005464       1.01771919       5.49267877
+H        1.97042091       1.01748419       6.44297037
+H        1.42973007       0.11076352       5.34706449
+H        2.61130613       9.70341237      10.45030683
+O        3.05086908      10.48131334      10.08518931
+H        3.00326932      10.93357295       9.36521210
+O        6.46315322       8.76520583      11.96784797
+H        6.45068084       9.65965953      11.58895602
+H        5.66116297       8.35357377      11.63844333
+O        1.17459993       5.24206908       5.10014497
+H        1.31932881       5.52363924       6.01289663
+H        0.58053835       4.48981346       5.23255650
+O        6.72755497       0.78840875       7.38172809
+H        6.38879656       1.54670982       6.86345204
+H        7.67917840       0.94039024       7.26494610
+O        8.54766572       0.00647503       5.04505141
+H        8.87362909      11.10484108       4.86018079
+H        8.04775976       0.20198361       4.23573994
+O        1.28950300       8.42800979      11.82038504
+H        1.47666664       8.10878664      10.87290333
+H        2.10220669       8.19476202       0.29510553
+O        9.67979076       6.42073355       4.34691506
+H        8.92715301       6.72940235       3.79741224
+H       10.20024126       5.91677392       3.66976111
+O        3.57411616       6.70410217       3.88254785
+H        2.88948995       6.15605298       4.29808481
+H        4.36137077       6.43047324       4.38049654
+O        4.75065560      11.44185335       1.12537088
+H        4.08611928      10.74852367       1.19233471
+H        5.50353016      10.96568825       0.73651277
+O        9.52545264       4.89944439       8.37322841
+H        8.78859598       4.35080439       8.66328314
+H        9.61490675       4.60849115       7.45400687
+O        4.19707467       1.34592128       3.67401439
+H        4.94370120       0.74406281       3.51406800
+H        4.19055349       1.77303760       2.79630494
+O        1.88232618      11.95451227       0.60024434
+H        2.04645873      11.02454723       0.38329541
+H        1.15184937       0.17494341      11.99928285
+O        3.75938422      11.01685511       6.45620508
+H        3.21254143      10.45537470       5.88949171
+H        3.41660269      10.82155767       7.32965639
+O        9.70398414       3.95001545      11.89474325
+H       10.46166606       3.91631180      11.28543523
+H       10.09834695       4.40269974       0.68193008
+O        8.56395964       3.51695075       5.62241042
+H        8.39666503       2.62622147       5.26381643
+H        7.96953714       4.08250598       5.00494074
+O        9.67362450       0.48030482       7.92575778
+H        9.61319804      11.88341918       7.16809240
+H        9.97840503      11.90238635       8.63894187
+O        3.94241531       6.96502967      11.60258943
+H        4.27671527       6.84600490      10.67022522
+H        4.65706385       6.51294615       0.09115988
+O        3.05701732       9.66319585       3.66112506
+H        2.54004901       9.57433550       2.84440476
+H        2.93148744       8.78098072       4.04252342
+O        7.45496127       5.84308508      11.01138472
+H        8.16758841       5.46391828      10.47644287
+H        6.71355737       5.83938184      10.36109975
+O        9.80291393       7.95789017      10.21404942
+H       10.38910242       8.34006414      10.87949429
+H        9.06376120       7.63923741      10.75692887
+O        4.49634356       4.10679358      11.73387805
+H        4.54737279       4.95779709      11.19223377
+H        5.35888184       4.17561117       0.20355937
+O        9.59234481       7.34180146       1.34856172
+H        8.87155933       7.47768372       2.05040471
+H        9.04432217       7.27322008       0.54011714
+O        7.03509631       3.22348773       0.70708240
+H        7.17844705       4.13403143       1.01841097
+H        7.77878544       2.78888884       1.15838887
+O        9.21241078       0.48085900       1.21751966
+H        9.66204365      11.65727108       1.45318397
+H        9.94048839       1.11619136       1.18684594
+O        1.19704207       9.58599592       6.61908889
+H        0.25606413       9.67373665       6.83193409
+H        1.26900519       8.62493549       6.54801125
+O        0.78256134       2.60406093      11.45340836
+H        0.61502181       3.56074060      11.40300991
+H        1.55655312       2.54573688      10.86673303
+O        5.86279361       7.12170548       5.89173203
+H        6.34324107       7.94001367       6.08558403
+H        5.50772967       6.84683068       6.74368758
+O       10.88782815       9.96374825       0.51092816
+H       11.78841776      10.32204307       0.44704989
+H       11.02688182       9.20519067       1.09766613
+O        3.93073389       4.16456745       5.71378770
+H        4.68840630       3.57889133       5.56446058
+H        4.29569485       4.76448881       6.38016697
diff --git a/examples/PACKAGES/pace/compute/log.5Dec23.compute.g++.1 b/examples/PACKAGES/pace/compute/log.5Dec23.compute.g++.1
new file mode 100644
index 0000000000..e5036cfe5b
--- /dev/null
+++ b/examples/PACKAGES/pace/compute/log.5Dec23.compute.g++.1
@@ -0,0 +1,81 @@
+LAMMPS (21 Nov 2023)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
+  using 1 OpenMP thread(s) per MPI task
+#info all out log
+units  metal
+atom_style  atomic
+boundary    p p p
+atom_modify    map hash
+boundary  p p p
+read_data  latte_cell_0.data
+Reading data file ...
+  orthogonal box = (0 0 0) to (12 12 12)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  161 atoms
+  read_data CPU = 0.001 seconds
+mass  1 1.00
+mass  2 14.00
+mass  3 15.999
+
+        # potential settings
+
+pair_style     zero 5.7
+pair_coeff     * *
+
+compute     pace all pace coupling_coefficients.yace 1 0
+
+thermo 1
+thermo_style  custom step temp c_pace[1][183]
+
+run 0
+WARNING: No fixes with time integration, atoms won't move (src/verlet.cpp:60)
+Generated 0 of 3 mixed pair_coeff terms from geometric mixing rule
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 7.7
+  ghost atom cutoff = 7.7
+  binsize = 3.85, bins = 4 4 4
+  2 neighbor lists, perpetual/occasional/extra = 1 1 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d
+      bin: standard
+  (2) compute pace, occasional
+      attributes: full, newton on
+      pair build: full/bin/atomonly
+      stencil: full/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 6.993 | 6.993 | 6.993 Mbytes
+   Step          Temp      c_pace[1][183]
+         0   0              8.6885642    
+Loop time of 1.217e-06 on 1 procs for 0 steps with 161 atoms
+
+164.3% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0          | 0          | 0          |   0.0 |  0.00
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0          | 0          | 0          |   0.0 |  0.00
+Output  | 0          | 0          | 0          |   0.0 |  0.00
+Modify  | 0          | 0          | 0          |   0.0 |  0.00
+Other   |            | 1.217e-06  |            |       |100.00
+
+Nlocal:            161 ave         161 max         161 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:           1754 ave        1754 max        1754 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:          14230 ave       14230 max       14230 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+FullNghs:        28460 ave       28460 max       28460 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 28460
+Ave neighs/atom = 176.77019
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:00:00
diff --git a/examples/PACKAGES/pace/compute/log.5Dec23.compute.g++.4 b/examples/PACKAGES/pace/compute/log.5Dec23.compute.g++.4
new file mode 100644
index 0000000000..49ca6129b6
--- /dev/null
+++ b/examples/PACKAGES/pace/compute/log.5Dec23.compute.g++.4
@@ -0,0 +1,81 @@
+LAMMPS (21 Nov 2023)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
+  using 1 OpenMP thread(s) per MPI task
+#info all out log
+units  metal
+atom_style  atomic
+boundary    p p p
+atom_modify    map hash
+boundary  p p p
+read_data  latte_cell_0.data
+Reading data file ...
+  orthogonal box = (0 0 0) to (12 12 12)
+  1 by 2 by 2 MPI processor grid
+  reading atoms ...
+  161 atoms
+  read_data CPU = 0.001 seconds
+mass  1 1.00
+mass  2 14.00
+mass  3 15.999
+
+        # potential settings
+
+pair_style     zero 5.7
+pair_coeff     * *
+
+compute     pace all pace coupling_coefficients.yace 1 0
+
+thermo 1
+thermo_style  custom step temp c_pace[1][183]
+
+run 0
+WARNING: No fixes with time integration, atoms won't move (src/verlet.cpp:60)
+Generated 0 of 3 mixed pair_coeff terms from geometric mixing rule
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 7.7
+  ghost atom cutoff = 7.7
+  binsize = 3.85, bins = 4 4 4
+  2 neighbor lists, perpetual/occasional/extra = 1 1 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d
+      bin: standard
+  (2) compute pace, occasional
+      attributes: full, newton on
+      pair build: full/bin/atomonly
+      stencil: full/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 6.97 | 6.97 | 6.971 Mbytes
+   Step          Temp      c_pace[1][183]
+         0   0              8.6885642    
+Loop time of 1.979e-06 on 4 procs for 0 steps with 161 atoms
+
+164.2% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0          | 0          | 0          |   0.0 |  0.00
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0          | 0          | 0          |   0.0 |  0.00
+Output  | 0          | 0          | 0          |   0.0 |  0.00
+Modify  | 0          | 0          | 0          |   0.0 |  0.00
+Other   |            | 1.979e-06  |            |       |100.00
+
+Nlocal:          40.25 ave          44 max          35 min
+Histogram: 1 0 0 0 1 0 0 0 1 1
+Nghost:         1134.5 ave        1159 max        1117 min
+Histogram: 1 1 0 0 1 0 0 0 0 1
+Neighs:         3557.5 ave        4115 max        3189 min
+Histogram: 2 0 0 0 0 1 0 0 0 1
+FullNghs:         7115 ave        7755 max        6158 min
+Histogram: 1 0 0 0 1 0 0 0 0 2
+
+Total # of neighbors = 28460
+Ave neighs/atom = 176.77019
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:00:00
diff --git a/examples/PACKAGES/reaction/create_atoms_polystyrene/in.grow_styrene b/examples/PACKAGES/reaction/create_atoms_polystyrene/in.grow_styrene
index 7860db4e55..dcca29c026 100644
--- a/examples/PACKAGES/reaction/create_atoms_polystyrene/in.grow_styrene
+++ b/examples/PACKAGES/reaction/create_atoms_polystyrene/in.grow_styrene
@@ -40,7 +40,7 @@ fix 1 statted_grp_REACT nvt temp $T $T 100
 
 fix 4 bond_react_MASTER_group temp/rescale 1 $T $T 1 1
 
-thermo_style custom step temp press density f_myrxns[1]
+thermo_style custom step temp press density f_myrxns[*]
 
 thermo 100
 
diff --git a/examples/PACKAGES/reaction/nylon,6-6_melt/in.large_nylon_melt b/examples/PACKAGES/reaction/nylon,6-6_melt/in.large_nylon_melt
index 9678a714d6..635b2c9750 100644
--- a/examples/PACKAGES/reaction/nylon,6-6_melt/in.large_nylon_melt
+++ b/examples/PACKAGES/reaction/nylon,6-6_melt/in.large_nylon_melt
@@ -26,7 +26,7 @@ read_data large_nylon_melt.data.gz &
   extra/angle/per/atom 15 &
   extra/dihedral/per/atom 15 &
   extra/improper/per/atom 25 &
-  extra/special/per/atom 25 
+  extra/special/per/atom 25
 
 velocity all create 800.0 4928459 dist gaussian
 
@@ -50,7 +50,7 @@ fix 1 statted_grp_REACT nvt temp 800 800 100
 # you can use the internally created 'bond_react_MASTER_group', like so:
 # fix 2 bond_react_MASTER_group temp/rescale 1 800 800 10 1
 
-thermo_style custom step temp press density f_myrxns[1] f_myrxns[2] # cumulative reaction counts
+thermo_style custom step temp press density f_myrxns[*] # cumulative reaction counts
 
 # restart 100 restart1 restart2
 
diff --git a/examples/PACKAGES/reaction/tiny_epoxy/in.tiny_epoxy.stabilized b/examples/PACKAGES/reaction/tiny_epoxy/in.tiny_epoxy.stabilized
index 57b03b630f..7e0350cdb0 100644
--- a/examples/PACKAGES/reaction/tiny_epoxy/in.tiny_epoxy.stabilized
+++ b/examples/PACKAGES/reaction/tiny_epoxy/in.tiny_epoxy.stabilized
@@ -20,7 +20,8 @@ improper_style class2
 special_bonds lj/coul 0 0 1
 pair_modify tail yes mix sixthpower
 
-read_data tiny_epoxy.data
+read_data tiny_epoxy.data &
+  extra/special/per/atom 25
 
 velocity all create 300.0 4928459 dist gaussian
 
@@ -44,7 +45,7 @@ fix rxns all bond/react stabilization yes statted_grp .03 &
 
 fix 1 statted_grp_REACT nvt temp 300 300 100
 
-thermo_style custom step temp f_rxns[1] f_rxns[2] f_rxns[3] f_rxns[4]
+thermo_style custom step temp f_rxns[*]
 
 run 2000
 
diff --git a/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized b/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized
index 95b39033db..853bc45f1e 100644
--- a/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized
+++ b/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized
@@ -50,7 +50,7 @@ fix 1 statted_grp_REACT nvt temp 300 300 100
 # by using the internally-created 'bond_react_MASTER_group', like so:
 fix 4 bond_react_MASTER_group temp/rescale 1 300 300 10 1
 
-thermo_style custom step temp press density f_myrxns[1] f_myrxns[2]
+thermo_style custom step temp press density f_myrxns[*]
 
 # restart 100 restart1 restart2
 
diff --git a/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized_variable_probability b/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized_variable_probability
index 88b5a95a41..f3c32f3cbd 100644
--- a/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized_variable_probability
+++ b/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.stabilized_variable_probability
@@ -54,7 +54,7 @@ fix 1 statted_grp_REACT nvt temp 300 300 100
 # by using the internally-created 'bond_react_MASTER_group', like so:
 fix 4 bond_react_MASTER_group temp/rescale 1 300 300 10 1
 
-thermo_style custom step temp press density v_prob1 v_prob2 f_myrxns[1] f_myrxns[2]
+thermo_style custom step temp press density v_prob1 v_prob2 f_myrxns[*]
 
 # restart 100 restart1 restart2
 
diff --git a/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.unstabilized b/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.unstabilized
index a569e28d43..e5cbaaaf86 100644
--- a/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.unstabilized
+++ b/examples/PACKAGES/reaction/tiny_nylon/in.tiny_nylon.unstabilized
@@ -47,7 +47,7 @@ fix myrxns all bond/react stabilization no &
 
 fix 1 all nve/limit .03
 
-thermo_style custom step temp press density f_myrxns[1] f_myrxns[2]
+thermo_style custom step temp press density f_myrxns[*]
 
 # restart 100 restart1 restart2
 
diff --git a/examples/PACKAGES/reaction/tiny_polystyrene/in.tiny_polystyrene.stabilized b/examples/PACKAGES/reaction/tiny_polystyrene/in.tiny_polystyrene.stabilized
index 4ecc481719..230998fcd3 100644
--- a/examples/PACKAGES/reaction/tiny_polystyrene/in.tiny_polystyrene.stabilized
+++ b/examples/PACKAGES/reaction/tiny_polystyrene/in.tiny_polystyrene.stabilized
@@ -51,7 +51,7 @@ fix 1 statted_grp_REACT nvt temp $T $T 100
 
 fix 4 bond_react_MASTER_group temp/rescale 1 $T $T 1 1
 
-thermo_style custom step temp press density f_rxn1[1] f_rxn1[2] f_rxn1[3]
+thermo_style custom step temp press density f_rxn1[*]
 
 run 10000
 
diff --git a/examples/PACKAGES/sna_nnn_slcsa/Zr_mm.eam.fs b/examples/PACKAGES/sna_nnn_slcsa/Zr_mm.eam.fs
new file mode 120000
index 0000000000..f8b779307d
--- /dev/null
+++ b/examples/PACKAGES/sna_nnn_slcsa/Zr_mm.eam.fs
@@ -0,0 +1 @@
+../../../potentials/Zr_mm.eam.fs
\ No newline at end of file
diff --git a/examples/PACKAGES/sna_nnn_slcsa/data.zr_cell b/examples/PACKAGES/sna_nnn_slcsa/data.zr_cell
new file mode 100644
index 0000000000..d7c83fc716
--- /dev/null
+++ b/examples/PACKAGES/sna_nnn_slcsa/data.zr_cell
@@ -0,0 +1,879 @@
+ # Hcp Zr with box vectors H1=[2-1-10], H2=[-12-10], H3=[0001].
+  
+         864  atoms
+           1  atom types
+ 
+      0.000000000000      19.374000000000  xlo xhi
+      0.000000000000      33.556752345839  ylo yhi
+      0.000000000000      30.846000000000  zlo zhi
+ 
+Masses
+ 
+            1   91.22400000             # Zr
+ 
+Atoms # atomic
+ 
+         1    1        0.000000000000       1.864264019213       2.570500000000
+         2    1        0.000000000000       0.000000000000       0.000000000000
+         3    1        1.614500000000       4.660660048033       2.570500000000
+         4    1        1.614500000000       2.796396028820       0.000000000000
+         5    1        3.229000000000       1.864264019213       2.570500000000
+         6    1        3.229000000000       0.000000000000       0.000000000000
+         7    1        4.843500000000       4.660660048033       2.570500000000
+         8    1        4.843500000000       2.796396028820       0.000000000000
+         9    1        6.458000000000       1.864264019213       2.570500000000
+        10    1        6.458000000000       0.000000000000       0.000000000000
+        11    1        8.072500000000       4.660660048033       2.570500000000
+        12    1        8.072500000000       2.796396028820       0.000000000000
+        13    1        9.687000000000       1.864264019213       2.570500000000
+        14    1        9.687000000000       0.000000000000       0.000000000000
+        15    1       11.301500000000       4.660660048033       2.570500000000
+        16    1       11.301500000000       2.796396028820       0.000000000000
+        17    1       12.916000000000       1.864264019213       2.570500000000
+        18    1       12.916000000000       0.000000000000       0.000000000000
+        19    1       14.530500000000       4.660660048033       2.570500000000
+        20    1       14.530500000000       2.796396028820       0.000000000000
+        21    1       16.145000000000       1.864264019213       2.570500000000
+        22    1       16.145000000000       0.000000000000       0.000000000000
+        23    1       17.759500000000       4.660660048033       2.570500000000
+        24    1       17.759500000000       2.796396028820       0.000000000000
+        25    1        0.000000000000       7.457056076853       2.570500000000
+        26    1        0.000000000000       5.592792057640       0.000000000000
+        27    1        1.614500000000      10.253452105673       2.570500000000
+        28    1        1.614500000000       8.389188086460       0.000000000000
+        29    1        3.229000000000       7.457056076853       2.570500000000
+        30    1        3.229000000000       5.592792057640       0.000000000000
+        31    1        4.843500000000      10.253452105673       2.570500000000
+        32    1        4.843500000000       8.389188086460       0.000000000000
+        33    1        6.458000000000       7.457056076853       2.570500000000
+        34    1        6.458000000000       5.592792057640       0.000000000000
+        35    1        8.072500000000      10.253452105673       2.570500000000
+        36    1        8.072500000000       8.389188086460       0.000000000000
+        37    1        9.687000000000       7.457056076853       2.570500000000
+        38    1        9.687000000000       5.592792057640       0.000000000000
+        39    1       11.301500000000      10.253452105673       2.570500000000
+        40    1       11.301500000000       8.389188086460       0.000000000000
+        41    1       12.916000000000       7.457056076853       2.570500000000
+        42    1       12.916000000000       5.592792057640       0.000000000000
+        43    1       14.530500000000      10.253452105673       2.570500000000
+        44    1       14.530500000000       8.389188086460       0.000000000000
+        45    1       16.145000000000       7.457056076853       2.570500000000
+        46    1       16.145000000000       5.592792057640       0.000000000000
+        47    1       17.759500000000      10.253452105673       2.570500000000
+        48    1       17.759500000000       8.389188086460       0.000000000000
+        49    1        0.000000000000      13.049848134493       2.570500000000
+        50    1        0.000000000000      11.185584115280       0.000000000000
+        51    1        1.614500000000      15.846244163313       2.570500000000
+        52    1        1.614500000000      13.981980144100       0.000000000000
+        53    1        3.229000000000      13.049848134493       2.570500000000
+        54    1        3.229000000000      11.185584115280       0.000000000000
+        55    1        4.843500000000      15.846244163313       2.570500000000
+        56    1        4.843500000000      13.981980144100       0.000000000000
+        57    1        6.458000000000      13.049848134493       2.570500000000
+        58    1        6.458000000000      11.185584115280       0.000000000000
+        59    1        8.072500000000      15.846244163313       2.570500000000
+        60    1        8.072500000000      13.981980144100       0.000000000000
+        61    1        9.687000000000      13.049848134493       2.570500000000
+        62    1        9.687000000000      11.185584115280       0.000000000000
+        63    1       11.301500000000      15.846244163313       2.570500000000
+        64    1       11.301500000000      13.981980144100       0.000000000000
+        65    1       12.916000000000      13.049848134493       2.570500000000
+        66    1       12.916000000000      11.185584115280       0.000000000000
+        67    1       14.530500000000      15.846244163313       2.570500000000
+        68    1       14.530500000000      13.981980144100       0.000000000000
+        69    1       16.145000000000      13.049848134493       2.570500000000
+        70    1       16.145000000000      11.185584115280       0.000000000000
+        71    1       17.759500000000      15.846244163313       2.570500000000
+        72    1       17.759500000000      13.981980144100       0.000000000000
+        73    1        0.000000000000      18.642640192133       2.570500000000
+        74    1        0.000000000000      16.778376172920       0.000000000000
+        75    1        1.614500000000      21.439036220953       2.570500000000
+        76    1        1.614500000000      19.574772201740       0.000000000000
+        77    1        3.229000000000      18.642640192133       2.570500000000
+        78    1        3.229000000000      16.778376172920       0.000000000000
+        79    1        4.843500000000      21.439036220953       2.570500000000
+        80    1        4.843500000000      19.574772201740       0.000000000000
+        81    1        6.458000000000      18.642640192133       2.570500000000
+        82    1        6.458000000000      16.778376172920       0.000000000000
+        83    1        8.072500000000      21.439036220953       2.570500000000
+        84    1        8.072500000000      19.574772201740       0.000000000000
+        85    1        9.687000000000      18.642640192133       2.570500000000
+        86    1        9.687000000000      16.778376172920       0.000000000000
+        87    1       11.301500000000      21.439036220953       2.570500000000
+        88    1       11.301500000000      19.574772201740       0.000000000000
+        89    1       12.916000000000      18.642640192133       2.570500000000
+        90    1       12.916000000000      16.778376172920       0.000000000000
+        91    1       14.530500000000      21.439036220953       2.570500000000
+        92    1       14.530500000000      19.574772201740       0.000000000000
+        93    1       16.145000000000      18.642640192133       2.570500000000
+        94    1       16.145000000000      16.778376172920       0.000000000000
+        95    1       17.759500000000      21.439036220953       2.570500000000
+        96    1       17.759500000000      19.574772201740       0.000000000000
+        97    1        0.000000000000      24.235432249773       2.570500000000
+        98    1        0.000000000000      22.371168230560       0.000000000000
+        99    1        1.614500000000      27.031828278593       2.570500000000
+       100    1        1.614500000000      25.167564259380       0.000000000000
+       101    1        3.229000000000      24.235432249773       2.570500000000
+       102    1        3.229000000000      22.371168230560       0.000000000000
+       103    1        4.843500000000      27.031828278593       2.570500000000
+       104    1        4.843500000000      25.167564259380       0.000000000000
+       105    1        6.458000000000      24.235432249773       2.570500000000
+       106    1        6.458000000000      22.371168230560       0.000000000000
+       107    1        8.072500000000      27.031828278593       2.570500000000
+       108    1        8.072500000000      25.167564259380       0.000000000000
+       109    1        9.687000000000      24.235432249773       2.570500000000
+       110    1        9.687000000000      22.371168230560       0.000000000000
+       111    1       11.301500000000      27.031828278593       2.570500000000
+       112    1       11.301500000000      25.167564259380       0.000000000000
+       113    1       12.916000000000      24.235432249773       2.570500000000
+       114    1       12.916000000000      22.371168230560       0.000000000000
+       115    1       14.530500000000      27.031828278593       2.570500000000
+       116    1       14.530500000000      25.167564259380       0.000000000000
+       117    1       16.145000000000      24.235432249773       2.570500000000
+       118    1       16.145000000000      22.371168230560       0.000000000000
+       119    1       17.759500000000      27.031828278593       2.570500000000
+       120    1       17.759500000000      25.167564259380       0.000000000000
+       121    1        0.000000000000      29.828224307413       2.570500000000
+       122    1        0.000000000000      27.963960288200       0.000000000000
+       123    1        1.614500000000      32.624620336233       2.570500000000
+       124    1        1.614500000000      30.760356317019       0.000000000000
+       125    1        3.229000000000      29.828224307413       2.570500000000
+       126    1        3.229000000000      27.963960288200       0.000000000000
+       127    1        4.843500000000      32.624620336233       2.570500000000
+       128    1        4.843500000000      30.760356317019       0.000000000000
+       129    1        6.458000000000      29.828224307413       2.570500000000
+       130    1        6.458000000000      27.963960288200       0.000000000000
+       131    1        8.072500000000      32.624620336233       2.570500000000
+       132    1        8.072500000000      30.760356317019       0.000000000000
+       133    1        9.687000000000      29.828224307413       2.570500000000
+       134    1        9.687000000000      27.963960288200       0.000000000000
+       135    1       11.301500000000      32.624620336233       2.570500000000
+       136    1       11.301500000000      30.760356317019       0.000000000000
+       137    1       12.916000000000      29.828224307413       2.570500000000
+       138    1       12.916000000000      27.963960288200       0.000000000000
+       139    1       14.530500000000      32.624620336233       2.570500000000
+       140    1       14.530500000000      30.760356317019       0.000000000000
+       141    1       16.145000000000      29.828224307413       2.570500000000
+       142    1       16.145000000000      27.963960288200       0.000000000000
+       143    1       17.759500000000      32.624620336233       2.570500000000
+       144    1       17.759500000000      30.760356317019       0.000000000000
+       145    1        0.000000000000       1.864264019213       7.711500000000
+       146    1        0.000000000000       0.000000000000       5.141000000000
+       147    1        1.614500000000       4.660660048033       7.711500000000
+       148    1        1.614500000000       2.796396028820       5.141000000000
+       149    1        3.229000000000       1.864264019213       7.711500000000
+       150    1        3.229000000000       0.000000000000       5.141000000000
+       151    1        4.843500000000       4.660660048033       7.711500000000
+       152    1        4.843500000000       2.796396028820       5.141000000000
+       153    1        6.458000000000       1.864264019213       7.711500000000
+       154    1        6.458000000000       0.000000000000       5.141000000000
+       155    1        8.072500000000       4.660660048033       7.711500000000
+       156    1        8.072500000000       2.796396028820       5.141000000000
+       157    1        9.687000000000       1.864264019213       7.711500000000
+       158    1        9.687000000000       0.000000000000       5.141000000000
+       159    1       11.301500000000       4.660660048033       7.711500000000
+       160    1       11.301500000000       2.796396028820       5.141000000000
+       161    1       12.916000000000       1.864264019213       7.711500000000
+       162    1       12.916000000000       0.000000000000       5.141000000000
+       163    1       14.530500000000       4.660660048033       7.711500000000
+       164    1       14.530500000000       2.796396028820       5.141000000000
+       165    1       16.145000000000       1.864264019213       7.711500000000
+       166    1       16.145000000000       0.000000000000       5.141000000000
+       167    1       17.759500000000       4.660660048033       7.711500000000
+       168    1       17.759500000000       2.796396028820       5.141000000000
+       169    1        0.000000000000       7.457056076853       7.711500000000
+       170    1        0.000000000000       5.592792057640       5.141000000000
+       171    1        1.614500000000      10.253452105673       7.711500000000
+       172    1        1.614500000000       8.389188086460       5.141000000000
+       173    1        3.229000000000       7.457056076853       7.711500000000
+       174    1        3.229000000000       5.592792057640       5.141000000000
+       175    1        4.843500000000      10.253452105673       7.711500000000
+       176    1        4.843500000000       8.389188086460       5.141000000000
+       177    1        6.458000000000       7.457056076853       7.711500000000
+       178    1        6.458000000000       5.592792057640       5.141000000000
+       179    1        8.072500000000      10.253452105673       7.711500000000
+       180    1        8.072500000000       8.389188086460       5.141000000000
+       181    1        9.687000000000       7.457056076853       7.711500000000
+       182    1        9.687000000000       5.592792057640       5.141000000000
+       183    1       11.301500000000      10.253452105673       7.711500000000
+       184    1       11.301500000000       8.389188086460       5.141000000000
+       185    1       12.916000000000       7.457056076853       7.711500000000
+       186    1       12.916000000000       5.592792057640       5.141000000000
+       187    1       14.530500000000      10.253452105673       7.711500000000
+       188    1       14.530500000000       8.389188086460       5.141000000000
+       189    1       16.145000000000       7.457056076853       7.711500000000
+       190    1       16.145000000000       5.592792057640       5.141000000000
+       191    1       17.759500000000      10.253452105673       7.711500000000
+       192    1       17.759500000000       8.389188086460       5.141000000000
+       193    1        0.000000000000      13.049848134493       7.711500000000
+       194    1        0.000000000000      11.185584115280       5.141000000000
+       195    1        1.614500000000      15.846244163313       7.711500000000
+       196    1        1.614500000000      13.981980144100       5.141000000000
+       197    1        3.229000000000      13.049848134493       7.711500000000
+       198    1        3.229000000000      11.185584115280       5.141000000000
+       199    1        4.843500000000      15.846244163313       7.711500000000
+       200    1        4.843500000000      13.981980144100       5.141000000000
+       201    1        6.458000000000      13.049848134493       7.711500000000
+       202    1        6.458000000000      11.185584115280       5.141000000000
+       203    1        8.072500000000      15.846244163313       7.711500000000
+       204    1        8.072500000000      13.981980144100       5.141000000000
+       205    1        9.687000000000      13.049848134493       7.711500000000
+       206    1        9.687000000000      11.185584115280       5.141000000000
+       207    1       11.301500000000      15.846244163313       7.711500000000
+       208    1       11.301500000000      13.981980144100       5.141000000000
+       209    1       12.916000000000      13.049848134493       7.711500000000
+       210    1       12.916000000000      11.185584115280       5.141000000000
+       211    1       14.530500000000      15.846244163313       7.711500000000
+       212    1       14.530500000000      13.981980144100       5.141000000000
+       213    1       16.145000000000      13.049848134493       7.711500000000
+       214    1       16.145000000000      11.185584115280       5.141000000000
+       215    1       17.759500000000      15.846244163313       7.711500000000
+       216    1       17.759500000000      13.981980144100       5.141000000000
+       217    1        0.000000000000      18.642640192133       7.711500000000
+       218    1        0.000000000000      16.778376172920       5.141000000000
+       219    1        1.614500000000      21.439036220953       7.711500000000
+       220    1        1.614500000000      19.574772201740       5.141000000000
+       221    1        3.229000000000      18.642640192133       7.711500000000
+       222    1        3.229000000000      16.778376172920       5.141000000000
+       223    1        4.843500000000      21.439036220953       7.711500000000
+       224    1        4.843500000000      19.574772201740       5.141000000000
+       225    1        6.458000000000      18.642640192133       7.711500000000
+       226    1        6.458000000000      16.778376172920       5.141000000000
+       227    1        8.072500000000      21.439036220953       7.711500000000
+       228    1        8.072500000000      19.574772201740       5.141000000000
+       229    1        9.687000000000      18.642640192133       7.711500000000
+       230    1        9.687000000000      16.778376172920       5.141000000000
+       231    1       11.301500000000      21.439036220953       7.711500000000
+       232    1       11.301500000000      19.574772201740       5.141000000000
+       233    1       12.916000000000      18.642640192133       7.711500000000
+       234    1       12.916000000000      16.778376172920       5.141000000000
+       235    1       14.530500000000      21.439036220953       7.711500000000
+       236    1       14.530500000000      19.574772201740       5.141000000000
+       237    1       16.145000000000      18.642640192133       7.711500000000
+       238    1       16.145000000000      16.778376172920       5.141000000000
+       239    1       17.759500000000      21.439036220953       7.711500000000
+       240    1       17.759500000000      19.574772201740       5.141000000000
+       241    1        0.000000000000      24.235432249773       7.711500000000
+       242    1        0.000000000000      22.371168230560       5.141000000000
+       243    1        1.614500000000      27.031828278593       7.711500000000
+       244    1        1.614500000000      25.167564259380       5.141000000000
+       245    1        3.229000000000      24.235432249773       7.711500000000
+       246    1        3.229000000000      22.371168230560       5.141000000000
+       247    1        4.843500000000      27.031828278593       7.711500000000
+       248    1        4.843500000000      25.167564259380       5.141000000000
+       249    1        6.458000000000      24.235432249773       7.711500000000
+       250    1        6.458000000000      22.371168230560       5.141000000000
+       251    1        8.072500000000      27.031828278593       7.711500000000
+       252    1        8.072500000000      25.167564259380       5.141000000000
+       253    1        9.687000000000      24.235432249773       7.711500000000
+       254    1        9.687000000000      22.371168230560       5.141000000000
+       255    1       11.301500000000      27.031828278593       7.711500000000
+       256    1       11.301500000000      25.167564259380       5.141000000000
+       257    1       12.916000000000      24.235432249773       7.711500000000
+       258    1       12.916000000000      22.371168230560       5.141000000000
+       259    1       14.530500000000      27.031828278593       7.711500000000
+       260    1       14.530500000000      25.167564259380       5.141000000000
+       261    1       16.145000000000      24.235432249773       7.711500000000
+       262    1       16.145000000000      22.371168230560       5.141000000000
+       263    1       17.759500000000      27.031828278593       7.711500000000
+       264    1       17.759500000000      25.167564259380       5.141000000000
+       265    1        0.000000000000      29.828224307413       7.711500000000
+       266    1        0.000000000000      27.963960288200       5.141000000000
+       267    1        1.614500000000      32.624620336233       7.711500000000
+       268    1        1.614500000000      30.760356317019       5.141000000000
+       269    1        3.229000000000      29.828224307413       7.711500000000
+       270    1        3.229000000000      27.963960288200       5.141000000000
+       271    1        4.843500000000      32.624620336233       7.711500000000
+       272    1        4.843500000000      30.760356317019       5.141000000000
+       273    1        6.458000000000      29.828224307413       7.711500000000
+       274    1        6.458000000000      27.963960288200       5.141000000000
+       275    1        8.072500000000      32.624620336233       7.711500000000
+       276    1        8.072500000000      30.760356317019       5.141000000000
+       277    1        9.687000000000      29.828224307413       7.711500000000
+       278    1        9.687000000000      27.963960288200       5.141000000000
+       279    1       11.301500000000      32.624620336233       7.711500000000
+       280    1       11.301500000000      30.760356317019       5.141000000000
+       281    1       12.916000000000      29.828224307413       7.711500000000
+       282    1       12.916000000000      27.963960288200       5.141000000000
+       283    1       14.530500000000      32.624620336233       7.711500000000
+       284    1       14.530500000000      30.760356317019       5.141000000000
+       285    1       16.145000000000      29.828224307413       7.711500000000
+       286    1       16.145000000000      27.963960288200       5.141000000000
+       287    1       17.759500000000      32.624620336233       7.711500000000
+       288    1       17.759500000000      30.760356317019       5.141000000000
+       289    1        0.000000000000       1.864264019213      12.852500000000
+       290    1        0.000000000000       0.000000000000      10.282000000000
+       291    1        1.614500000000       4.660660048033      12.852500000000
+       292    1        1.614500000000       2.796396028820      10.282000000000
+       293    1        3.229000000000       1.864264019213      12.852500000000
+       294    1        3.229000000000       0.000000000000      10.282000000000
+       295    1        4.843500000000       4.660660048033      12.852500000000
+       296    1        4.843500000000       2.796396028820      10.282000000000
+       297    1        6.458000000000       1.864264019213      12.852500000000
+       298    1        6.458000000000       0.000000000000      10.282000000000
+       299    1        8.072500000000       4.660660048033      12.852500000000
+       300    1        8.072500000000       2.796396028820      10.282000000000
+       301    1        9.687000000000       1.864264019213      12.852500000000
+       302    1        9.687000000000       0.000000000000      10.282000000000
+       303    1       11.301500000000       4.660660048033      12.852500000000
+       304    1       11.301500000000       2.796396028820      10.282000000000
+       305    1       12.916000000000       1.864264019213      12.852500000000
+       306    1       12.916000000000       0.000000000000      10.282000000000
+       307    1       14.530500000000       4.660660048033      12.852500000000
+       308    1       14.530500000000       2.796396028820      10.282000000000
+       309    1       16.145000000000       1.864264019213      12.852500000000
+       310    1       16.145000000000       0.000000000000      10.282000000000
+       311    1       17.759500000000       4.660660048033      12.852500000000
+       312    1       17.759500000000       2.796396028820      10.282000000000
+       313    1        0.000000000000       7.457056076853      12.852500000000
+       314    1        0.000000000000       5.592792057640      10.282000000000
+       315    1        1.614500000000      10.253452105673      12.852500000000
+       316    1        1.614500000000       8.389188086460      10.282000000000
+       317    1        3.229000000000       7.457056076853      12.852500000000
+       318    1        3.229000000000       5.592792057640      10.282000000000
+       319    1        4.843500000000      10.253452105673      12.852500000000
+       320    1        4.843500000000       8.389188086460      10.282000000000
+       321    1        6.458000000000       7.457056076853      12.852500000000
+       322    1        6.458000000000       5.592792057640      10.282000000000
+       323    1        8.072500000000      10.253452105673      12.852500000000
+       324    1        8.072500000000       8.389188086460      10.282000000000
+       325    1        9.687000000000       7.457056076853      12.852500000000
+       326    1        9.687000000000       5.592792057640      10.282000000000
+       327    1       11.301500000000      10.253452105673      12.852500000000
+       328    1       11.301500000000       8.389188086460      10.282000000000
+       329    1       12.916000000000       7.457056076853      12.852500000000
+       330    1       12.916000000000       5.592792057640      10.282000000000
+       331    1       14.530500000000      10.253452105673      12.852500000000
+       332    1       14.530500000000       8.389188086460      10.282000000000
+       333    1       16.145000000000       7.457056076853      12.852500000000
+       334    1       16.145000000000       5.592792057640      10.282000000000
+       335    1       17.759500000000      10.253452105673      12.852500000000
+       336    1       17.759500000000       8.389188086460      10.282000000000
+       337    1        0.000000000000      13.049848134493      12.852500000000
+       338    1        0.000000000000      11.185584115280      10.282000000000
+       339    1        1.614500000000      15.846244163313      12.852500000000
+       340    1        1.614500000000      13.981980144100      10.282000000000
+       341    1        3.229000000000      13.049848134493      12.852500000000
+       342    1        3.229000000000      11.185584115280      10.282000000000
+       343    1        4.843500000000      15.846244163313      12.852500000000
+       344    1        4.843500000000      13.981980144100      10.282000000000
+       345    1        6.458000000000      13.049848134493      12.852500000000
+       346    1        6.458000000000      11.185584115280      10.282000000000
+       347    1        8.072500000000      15.846244163313      12.852500000000
+       348    1        8.072500000000      13.981980144100      10.282000000000
+       349    1        9.687000000000      13.049848134493      12.852500000000
+       350    1        9.687000000000      11.185584115280      10.282000000000
+       351    1       11.301500000000      15.846244163313      12.852500000000
+       352    1       11.301500000000      13.981980144100      10.282000000000
+       353    1       12.916000000000      13.049848134493      12.852500000000
+       354    1       12.916000000000      11.185584115280      10.282000000000
+       355    1       14.530500000000      15.846244163313      12.852500000000
+       356    1       14.530500000000      13.981980144100      10.282000000000
+       357    1       16.145000000000      13.049848134493      12.852500000000
+       358    1       16.145000000000      11.185584115280      10.282000000000
+       359    1       17.759500000000      15.846244163313      12.852500000000
+       360    1       17.759500000000      13.981980144100      10.282000000000
+       361    1        0.000000000000      18.642640192133      12.852500000000
+       362    1        0.000000000000      16.778376172920      10.282000000000
+       363    1        1.614500000000      21.439036220953      12.852500000000
+       364    1        1.614500000000      19.574772201740      10.282000000000
+       365    1        3.229000000000      18.642640192133      12.852500000000
+       366    1        3.229000000000      16.778376172920      10.282000000000
+       367    1        4.843500000000      21.439036220953      12.852500000000
+       368    1        4.843500000000      19.574772201740      10.282000000000
+       369    1        6.458000000000      18.642640192133      12.852500000000
+       370    1        6.458000000000      16.778376172920      10.282000000000
+       371    1        8.072500000000      21.439036220953      12.852500000000
+       372    1        8.072500000000      19.574772201740      10.282000000000
+       373    1        9.687000000000      18.642640192133      12.852500000000
+       374    1        9.687000000000      16.778376172920      10.282000000000
+       375    1       11.301500000000      21.439036220953      12.852500000000
+       376    1       11.301500000000      19.574772201740      10.282000000000
+       377    1       12.916000000000      18.642640192133      12.852500000000
+       378    1       12.916000000000      16.778376172920      10.282000000000
+       379    1       14.530500000000      21.439036220953      12.852500000000
+       380    1       14.530500000000      19.574772201740      10.282000000000
+       381    1       16.145000000000      18.642640192133      12.852500000000
+       382    1       16.145000000000      16.778376172920      10.282000000000
+       383    1       17.759500000000      21.439036220953      12.852500000000
+       384    1       17.759500000000      19.574772201740      10.282000000000
+       385    1        0.000000000000      24.235432249773      12.852500000000
+       386    1        0.000000000000      22.371168230560      10.282000000000
+       387    1        1.614500000000      27.031828278593      12.852500000000
+       388    1        1.614500000000      25.167564259380      10.282000000000
+       389    1        3.229000000000      24.235432249773      12.852500000000
+       390    1        3.229000000000      22.371168230560      10.282000000000
+       391    1        4.843500000000      27.031828278593      12.852500000000
+       392    1        4.843500000000      25.167564259380      10.282000000000
+       393    1        6.458000000000      24.235432249773      12.852500000000
+       394    1        6.458000000000      22.371168230560      10.282000000000
+       395    1        8.072500000000      27.031828278593      12.852500000000
+       396    1        8.072500000000      25.167564259380      10.282000000000
+       397    1        9.687000000000      24.235432249773      12.852500000000
+       398    1        9.687000000000      22.371168230560      10.282000000000
+       399    1       11.301500000000      27.031828278593      12.852500000000
+       400    1       11.301500000000      25.167564259380      10.282000000000
+       401    1       12.916000000000      24.235432249773      12.852500000000
+       402    1       12.916000000000      22.371168230560      10.282000000000
+       403    1       14.530500000000      27.031828278593      12.852500000000
+       404    1       14.530500000000      25.167564259380      10.282000000000
+       405    1       16.145000000000      24.235432249773      12.852500000000
+       406    1       16.145000000000      22.371168230560      10.282000000000
+       407    1       17.759500000000      27.031828278593      12.852500000000
+       408    1       17.759500000000      25.167564259380      10.282000000000
+       409    1        0.000000000000      29.828224307413      12.852500000000
+       410    1        0.000000000000      27.963960288200      10.282000000000
+       411    1        1.614500000000      32.624620336233      12.852500000000
+       412    1        1.614500000000      30.760356317019      10.282000000000
+       413    1        3.229000000000      29.828224307413      12.852500000000
+       414    1        3.229000000000      27.963960288200      10.282000000000
+       415    1        4.843500000000      32.624620336233      12.852500000000
+       416    1        4.843500000000      30.760356317019      10.282000000000
+       417    1        6.458000000000      29.828224307413      12.852500000000
+       418    1        6.458000000000      27.963960288200      10.282000000000
+       419    1        8.072500000000      32.624620336233      12.852500000000
+       420    1        8.072500000000      30.760356317019      10.282000000000
+       421    1        9.687000000000      29.828224307413      12.852500000000
+       422    1        9.687000000000      27.963960288200      10.282000000000
+       423    1       11.301500000000      32.624620336233      12.852500000000
+       424    1       11.301500000000      30.760356317019      10.282000000000
+       425    1       12.916000000000      29.828224307413      12.852500000000
+       426    1       12.916000000000      27.963960288200      10.282000000000
+       427    1       14.530500000000      32.624620336233      12.852500000000
+       428    1       14.530500000000      30.760356317019      10.282000000000
+       429    1       16.145000000000      29.828224307413      12.852500000000
+       430    1       16.145000000000      27.963960288200      10.282000000000
+       431    1       17.759500000000      32.624620336233      12.852500000000
+       432    1       17.759500000000      30.760356317019      10.282000000000
+       433    1        0.000000000000       1.864264019213      17.993500000000
+       434    1        0.000000000000       0.000000000000      15.423000000000
+       435    1        1.614500000000       4.660660048033      17.993500000000
+       436    1        1.614500000000       2.796396028820      15.423000000000
+       437    1        3.229000000000       1.864264019213      17.993500000000
+       438    1        3.229000000000       0.000000000000      15.423000000000
+       439    1        4.843500000000       4.660660048033      17.993500000000
+       440    1        4.843500000000       2.796396028820      15.423000000000
+       441    1        6.458000000000       1.864264019213      17.993500000000
+       442    1        6.458000000000       0.000000000000      15.423000000000
+       443    1        8.072500000000       4.660660048033      17.993500000000
+       444    1        8.072500000000       2.796396028820      15.423000000000
+       445    1        9.687000000000       1.864264019213      17.993500000000
+       446    1        9.687000000000       0.000000000000      15.423000000000
+       447    1       11.301500000000       4.660660048033      17.993500000000
+       448    1       11.301500000000       2.796396028820      15.423000000000
+       449    1       12.916000000000       1.864264019213      17.993500000000
+       450    1       12.916000000000       0.000000000000      15.423000000000
+       451    1       14.530500000000       4.660660048033      17.993500000000
+       452    1       14.530500000000       2.796396028820      15.423000000000
+       453    1       16.145000000000       1.864264019213      17.993500000000
+       454    1       16.145000000000       0.000000000000      15.423000000000
+       455    1       17.759500000000       4.660660048033      17.993500000000
+       456    1       17.759500000000       2.796396028820      15.423000000000
+       457    1        0.000000000000       7.457056076853      17.993500000000
+       458    1        0.000000000000       5.592792057640      15.423000000000
+       459    1        1.614500000000      10.253452105673      17.993500000000
+       460    1        1.614500000000       8.389188086460      15.423000000000
+       461    1        3.229000000000       7.457056076853      17.993500000000
+       462    1        3.229000000000       5.592792057640      15.423000000000
+       463    1        4.843500000000      10.253452105673      17.993500000000
+       464    1        4.843500000000       8.389188086460      15.423000000000
+       465    1        6.458000000000       7.457056076853      17.993500000000
+       466    1        6.458000000000       5.592792057640      15.423000000000
+       467    1        8.072500000000      10.253452105673      17.993500000000
+       468    1        8.072500000000       8.389188086460      15.423000000000
+       469    1        9.687000000000       7.457056076853      17.993500000000
+       470    1        9.687000000000       5.592792057640      15.423000000000
+       471    1       11.301500000000      10.253452105673      17.993500000000
+       472    1       11.301500000000       8.389188086460      15.423000000000
+       473    1       12.916000000000       7.457056076853      17.993500000000
+       474    1       12.916000000000       5.592792057640      15.423000000000
+       475    1       14.530500000000      10.253452105673      17.993500000000
+       476    1       14.530500000000       8.389188086460      15.423000000000
+       477    1       16.145000000000       7.457056076853      17.993500000000
+       478    1       16.145000000000       5.592792057640      15.423000000000
+       479    1       17.759500000000      10.253452105673      17.993500000000
+       480    1       17.759500000000       8.389188086460      15.423000000000
+       481    1        0.000000000000      13.049848134493      17.993500000000
+       482    1        0.000000000000      11.185584115280      15.423000000000
+       483    1        1.614500000000      15.846244163313      17.993500000000
+       484    1        1.614500000000      13.981980144100      15.423000000000
+       485    1        3.229000000000      13.049848134493      17.993500000000
+       486    1        3.229000000000      11.185584115280      15.423000000000
+       487    1        4.843500000000      15.846244163313      17.993500000000
+       488    1        4.843500000000      13.981980144100      15.423000000000
+       489    1        6.458000000000      13.049848134493      17.993500000000
+       490    1        6.458000000000      11.185584115280      15.423000000000
+       491    1        8.072500000000      15.846244163313      17.993500000000
+       492    1        8.072500000000      13.981980144100      15.423000000000
+       493    1        9.687000000000      13.049848134493      17.993500000000
+       494    1        9.687000000000      11.185584115280      15.423000000000
+       495    1       11.301500000000      15.846244163313      17.993500000000
+       496    1       11.301500000000      13.981980144100      15.423000000000
+       497    1       12.916000000000      13.049848134493      17.993500000000
+       498    1       12.916000000000      11.185584115280      15.423000000000
+       499    1       14.530500000000      15.846244163313      17.993500000000
+       500    1       14.530500000000      13.981980144100      15.423000000000
+       501    1       16.145000000000      13.049848134493      17.993500000000
+       502    1       16.145000000000      11.185584115280      15.423000000000
+       503    1       17.759500000000      15.846244163313      17.993500000000
+       504    1       17.759500000000      13.981980144100      15.423000000000
+       505    1        0.000000000000      18.642640192133      17.993500000000
+       506    1        0.000000000000      16.778376172920      15.423000000000
+       507    1        1.614500000000      21.439036220953      17.993500000000
+       508    1        1.614500000000      19.574772201740      15.423000000000
+       509    1        3.229000000000      18.642640192133      17.993500000000
+       510    1        3.229000000000      16.778376172920      15.423000000000
+       511    1        4.843500000000      21.439036220953      17.993500000000
+       512    1        4.843500000000      19.574772201740      15.423000000000
+       513    1        6.458000000000      18.642640192133      17.993500000000
+       514    1        6.458000000000      16.778376172920      15.423000000000
+       515    1        8.072500000000      21.439036220953      17.993500000000
+       516    1        8.072500000000      19.574772201740      15.423000000000
+       517    1        9.687000000000      18.642640192133      17.993500000000
+       518    1        9.687000000000      16.778376172920      15.423000000000
+       519    1       11.301500000000      21.439036220953      17.993500000000
+       520    1       11.301500000000      19.574772201740      15.423000000000
+       521    1       12.916000000000      18.642640192133      17.993500000000
+       522    1       12.916000000000      16.778376172920      15.423000000000
+       523    1       14.530500000000      21.439036220953      17.993500000000
+       524    1       14.530500000000      19.574772201740      15.423000000000
+       525    1       16.145000000000      18.642640192133      17.993500000000
+       526    1       16.145000000000      16.778376172920      15.423000000000
+       527    1       17.759500000000      21.439036220953      17.993500000000
+       528    1       17.759500000000      19.574772201740      15.423000000000
+       529    1        0.000000000000      24.235432249773      17.993500000000
+       530    1        0.000000000000      22.371168230560      15.423000000000
+       531    1        1.614500000000      27.031828278593      17.993500000000
+       532    1        1.614500000000      25.167564259380      15.423000000000
+       533    1        3.229000000000      24.235432249773      17.993500000000
+       534    1        3.229000000000      22.371168230560      15.423000000000
+       535    1        4.843500000000      27.031828278593      17.993500000000
+       536    1        4.843500000000      25.167564259380      15.423000000000
+       537    1        6.458000000000      24.235432249773      17.993500000000
+       538    1        6.458000000000      22.371168230560      15.423000000000
+       539    1        8.072500000000      27.031828278593      17.993500000000
+       540    1        8.072500000000      25.167564259380      15.423000000000
+       541    1        9.687000000000      24.235432249773      17.993500000000
+       542    1        9.687000000000      22.371168230560      15.423000000000
+       543    1       11.301500000000      27.031828278593      17.993500000000
+       544    1       11.301500000000      25.167564259380      15.423000000000
+       545    1       12.916000000000      24.235432249773      17.993500000000
+       546    1       12.916000000000      22.371168230560      15.423000000000
+       547    1       14.530500000000      27.031828278593      17.993500000000
+       548    1       14.530500000000      25.167564259380      15.423000000000
+       549    1       16.145000000000      24.235432249773      17.993500000000
+       550    1       16.145000000000      22.371168230560      15.423000000000
+       551    1       17.759500000000      27.031828278593      17.993500000000
+       552    1       17.759500000000      25.167564259380      15.423000000000
+       553    1        0.000000000000      29.828224307413      17.993500000000
+       554    1        0.000000000000      27.963960288200      15.423000000000
+       555    1        1.614500000000      32.624620336233      17.993500000000
+       556    1        1.614500000000      30.760356317019      15.423000000000
+       557    1        3.229000000000      29.828224307413      17.993500000000
+       558    1        3.229000000000      27.963960288200      15.423000000000
+       559    1        4.843500000000      32.624620336233      17.993500000000
+       560    1        4.843500000000      30.760356317019      15.423000000000
+       561    1        6.458000000000      29.828224307413      17.993500000000
+       562    1        6.458000000000      27.963960288200      15.423000000000
+       563    1        8.072500000000      32.624620336233      17.993500000000
+       564    1        8.072500000000      30.760356317019      15.423000000000
+       565    1        9.687000000000      29.828224307413      17.993500000000
+       566    1        9.687000000000      27.963960288200      15.423000000000
+       567    1       11.301500000000      32.624620336233      17.993500000000
+       568    1       11.301500000000      30.760356317019      15.423000000000
+       569    1       12.916000000000      29.828224307413      17.993500000000
+       570    1       12.916000000000      27.963960288200      15.423000000000
+       571    1       14.530500000000      32.624620336233      17.993500000000
+       572    1       14.530500000000      30.760356317019      15.423000000000
+       573    1       16.145000000000      29.828224307413      17.993500000000
+       574    1       16.145000000000      27.963960288200      15.423000000000
+       575    1       17.759500000000      32.624620336233      17.993500000000
+       576    1       17.759500000000      30.760356317019      15.423000000000
+       577    1        0.000000000000       1.864264019213      23.134500000000
+       578    1        0.000000000000       0.000000000000      20.564000000000
+       579    1        1.614500000000       4.660660048033      23.134500000000
+       580    1        1.614500000000       2.796396028820      20.564000000000
+       581    1        3.229000000000       1.864264019213      23.134500000000
+       582    1        3.229000000000       0.000000000000      20.564000000000
+       583    1        4.843500000000       4.660660048033      23.134500000000
+       584    1        4.843500000000       2.796396028820      20.564000000000
+       585    1        6.458000000000       1.864264019213      23.134500000000
+       586    1        6.458000000000       0.000000000000      20.564000000000
+       587    1        8.072500000000       4.660660048033      23.134500000000
+       588    1        8.072500000000       2.796396028820      20.564000000000
+       589    1        9.687000000000       1.864264019213      23.134500000000
+       590    1        9.687000000000       0.000000000000      20.564000000000
+       591    1       11.301500000000       4.660660048033      23.134500000000
+       592    1       11.301500000000       2.796396028820      20.564000000000
+       593    1       12.916000000000       1.864264019213      23.134500000000
+       594    1       12.916000000000       0.000000000000      20.564000000000
+       595    1       14.530500000000       4.660660048033      23.134500000000
+       596    1       14.530500000000       2.796396028820      20.564000000000
+       597    1       16.145000000000       1.864264019213      23.134500000000
+       598    1       16.145000000000       0.000000000000      20.564000000000
+       599    1       17.759500000000       4.660660048033      23.134500000000
+       600    1       17.759500000000       2.796396028820      20.564000000000
+       601    1        0.000000000000       7.457056076853      23.134500000000
+       602    1        0.000000000000       5.592792057640      20.564000000000
+       603    1        1.614500000000      10.253452105673      23.134500000000
+       604    1        1.614500000000       8.389188086460      20.564000000000
+       605    1        3.229000000000       7.457056076853      23.134500000000
+       606    1        3.229000000000       5.592792057640      20.564000000000
+       607    1        4.843500000000      10.253452105673      23.134500000000
+       608    1        4.843500000000       8.389188086460      20.564000000000
+       609    1        6.458000000000       7.457056076853      23.134500000000
+       610    1        6.458000000000       5.592792057640      20.564000000000
+       611    1        8.072500000000      10.253452105673      23.134500000000
+       612    1        8.072500000000       8.389188086460      20.564000000000
+       613    1        9.687000000000       7.457056076853      23.134500000000
+       614    1        9.687000000000       5.592792057640      20.564000000000
+       615    1       11.301500000000      10.253452105673      23.134500000000
+       616    1       11.301500000000       8.389188086460      20.564000000000
+       617    1       12.916000000000       7.457056076853      23.134500000000
+       618    1       12.916000000000       5.592792057640      20.564000000000
+       619    1       14.530500000000      10.253452105673      23.134500000000
+       620    1       14.530500000000       8.389188086460      20.564000000000
+       621    1       16.145000000000       7.457056076853      23.134500000000
+       622    1       16.145000000000       5.592792057640      20.564000000000
+       623    1       17.759500000000      10.253452105673      23.134500000000
+       624    1       17.759500000000       8.389188086460      20.564000000000
+       625    1        0.000000000000      13.049848134493      23.134500000000
+       626    1        0.000000000000      11.185584115280      20.564000000000
+       627    1        1.614500000000      15.846244163313      23.134500000000
+       628    1        1.614500000000      13.981980144100      20.564000000000
+       629    1        3.229000000000      13.049848134493      23.134500000000
+       630    1        3.229000000000      11.185584115280      20.564000000000
+       631    1        4.843500000000      15.846244163313      23.134500000000
+       632    1        4.843500000000      13.981980144100      20.564000000000
+       633    1        6.458000000000      13.049848134493      23.134500000000
+       634    1        6.458000000000      11.185584115280      20.564000000000
+       635    1        8.072500000000      15.846244163313      23.134500000000
+       636    1        8.072500000000      13.981980144100      20.564000000000
+       637    1        9.687000000000      13.049848134493      23.134500000000
+       638    1        9.687000000000      11.185584115280      20.564000000000
+       639    1       11.301500000000      15.846244163313      23.134500000000
+       640    1       11.301500000000      13.981980144100      20.564000000000
+       641    1       12.916000000000      13.049848134493      23.134500000000
+       642    1       12.916000000000      11.185584115280      20.564000000000
+       643    1       14.530500000000      15.846244163313      23.134500000000
+       644    1       14.530500000000      13.981980144100      20.564000000000
+       645    1       16.145000000000      13.049848134493      23.134500000000
+       646    1       16.145000000000      11.185584115280      20.564000000000
+       647    1       17.759500000000      15.846244163313      23.134500000000
+       648    1       17.759500000000      13.981980144100      20.564000000000
+       649    1        0.000000000000      18.642640192133      23.134500000000
+       650    1        0.000000000000      16.778376172920      20.564000000000
+       651    1        1.614500000000      21.439036220953      23.134500000000
+       652    1        1.614500000000      19.574772201740      20.564000000000
+       653    1        3.229000000000      18.642640192133      23.134500000000
+       654    1        3.229000000000      16.778376172920      20.564000000000
+       655    1        4.843500000000      21.439036220953      23.134500000000
+       656    1        4.843500000000      19.574772201740      20.564000000000
+       657    1        6.458000000000      18.642640192133      23.134500000000
+       658    1        6.458000000000      16.778376172920      20.564000000000
+       659    1        8.072500000000      21.439036220953      23.134500000000
+       660    1        8.072500000000      19.574772201740      20.564000000000
+       661    1        9.687000000000      18.642640192133      23.134500000000
+       662    1        9.687000000000      16.778376172920      20.564000000000
+       663    1       11.301500000000      21.439036220953      23.134500000000
+       664    1       11.301500000000      19.574772201740      20.564000000000
+       665    1       12.916000000000      18.642640192133      23.134500000000
+       666    1       12.916000000000      16.778376172920      20.564000000000
+       667    1       14.530500000000      21.439036220953      23.134500000000
+       668    1       14.530500000000      19.574772201740      20.564000000000
+       669    1       16.145000000000      18.642640192133      23.134500000000
+       670    1       16.145000000000      16.778376172920      20.564000000000
+       671    1       17.759500000000      21.439036220953      23.134500000000
+       672    1       17.759500000000      19.574772201740      20.564000000000
+       673    1        0.000000000000      24.235432249773      23.134500000000
+       674    1        0.000000000000      22.371168230560      20.564000000000
+       675    1        1.614500000000      27.031828278593      23.134500000000
+       676    1        1.614500000000      25.167564259380      20.564000000000
+       677    1        3.229000000000      24.235432249773      23.134500000000
+       678    1        3.229000000000      22.371168230560      20.564000000000
+       679    1        4.843500000000      27.031828278593      23.134500000000
+       680    1        4.843500000000      25.167564259380      20.564000000000
+       681    1        6.458000000000      24.235432249773      23.134500000000
+       682    1        6.458000000000      22.371168230560      20.564000000000
+       683    1        8.072500000000      27.031828278593      23.134500000000
+       684    1        8.072500000000      25.167564259380      20.564000000000
+       685    1        9.687000000000      24.235432249773      23.134500000000
+       686    1        9.687000000000      22.371168230560      20.564000000000
+       687    1       11.301500000000      27.031828278593      23.134500000000
+       688    1       11.301500000000      25.167564259380      20.564000000000
+       689    1       12.916000000000      24.235432249773      23.134500000000
+       690    1       12.916000000000      22.371168230560      20.564000000000
+       691    1       14.530500000000      27.031828278593      23.134500000000
+       692    1       14.530500000000      25.167564259380      20.564000000000
+       693    1       16.145000000000      24.235432249773      23.134500000000
+       694    1       16.145000000000      22.371168230560      20.564000000000
+       695    1       17.759500000000      27.031828278593      23.134500000000
+       696    1       17.759500000000      25.167564259380      20.564000000000
+       697    1        0.000000000000      29.828224307413      23.134500000000
+       698    1        0.000000000000      27.963960288200      20.564000000000
+       699    1        1.614500000000      32.624620336233      23.134500000000
+       700    1        1.614500000000      30.760356317019      20.564000000000
+       701    1        3.229000000000      29.828224307413      23.134500000000
+       702    1        3.229000000000      27.963960288200      20.564000000000
+       703    1        4.843500000000      32.624620336233      23.134500000000
+       704    1        4.843500000000      30.760356317019      20.564000000000
+       705    1        6.458000000000      29.828224307413      23.134500000000
+       706    1        6.458000000000      27.963960288200      20.564000000000
+       707    1        8.072500000000      32.624620336233      23.134500000000
+       708    1        8.072500000000      30.760356317019      20.564000000000
+       709    1        9.687000000000      29.828224307413      23.134500000000
+       710    1        9.687000000000      27.963960288200      20.564000000000
+       711    1       11.301500000000      32.624620336233      23.134500000000
+       712    1       11.301500000000      30.760356317019      20.564000000000
+       713    1       12.916000000000      29.828224307413      23.134500000000
+       714    1       12.916000000000      27.963960288200      20.564000000000
+       715    1       14.530500000000      32.624620336233      23.134500000000
+       716    1       14.530500000000      30.760356317019      20.564000000000
+       717    1       16.145000000000      29.828224307413      23.134500000000
+       718    1       16.145000000000      27.963960288200      20.564000000000
+       719    1       17.759500000000      32.624620336233      23.134500000000
+       720    1       17.759500000000      30.760356317019      20.564000000000
+       721    1        0.000000000000       1.864264019213      28.275500000000
+       722    1        0.000000000000       0.000000000000      25.705000000000
+       723    1        1.614500000000       4.660660048033      28.275500000000
+       724    1        1.614500000000       2.796396028820      25.705000000000
+       725    1        3.229000000000       1.864264019213      28.275500000000
+       726    1        3.229000000000       0.000000000000      25.705000000000
+       727    1        4.843500000000       4.660660048033      28.275500000000
+       728    1        4.843500000000       2.796396028820      25.705000000000
+       729    1        6.458000000000       1.864264019213      28.275500000000
+       730    1        6.458000000000       0.000000000000      25.705000000000
+       731    1        8.072500000000       4.660660048033      28.275500000000
+       732    1        8.072500000000       2.796396028820      25.705000000000
+       733    1        9.687000000000       1.864264019213      28.275500000000
+       734    1        9.687000000000       0.000000000000      25.705000000000
+       735    1       11.301500000000       4.660660048033      28.275500000000
+       736    1       11.301500000000       2.796396028820      25.705000000000
+       737    1       12.916000000000       1.864264019213      28.275500000000
+       738    1       12.916000000000       0.000000000000      25.705000000000
+       739    1       14.530500000000       4.660660048033      28.275500000000
+       740    1       14.530500000000       2.796396028820      25.705000000000
+       741    1       16.145000000000       1.864264019213      28.275500000000
+       742    1       16.145000000000       0.000000000000      25.705000000000
+       743    1       17.759500000000       4.660660048033      28.275500000000
+       744    1       17.759500000000       2.796396028820      25.705000000000
+       745    1        0.000000000000       7.457056076853      28.275500000000
+       746    1        0.000000000000       5.592792057640      25.705000000000
+       747    1        1.614500000000      10.253452105673      28.275500000000
+       748    1        1.614500000000       8.389188086460      25.705000000000
+       749    1        3.229000000000       7.457056076853      28.275500000000
+       750    1        3.229000000000       5.592792057640      25.705000000000
+       751    1        4.843500000000      10.253452105673      28.275500000000
+       752    1        4.843500000000       8.389188086460      25.705000000000
+       753    1        6.458000000000       7.457056076853      28.275500000000
+       754    1        6.458000000000       5.592792057640      25.705000000000
+       755    1        8.072500000000      10.253452105673      28.275500000000
+       756    1        8.072500000000       8.389188086460      25.705000000000
+       757    1        9.687000000000       7.457056076853      28.275500000000
+       758    1        9.687000000000       5.592792057640      25.705000000000
+       759    1       11.301500000000      10.253452105673      28.275500000000
+       760    1       11.301500000000       8.389188086460      25.705000000000
+       761    1       12.916000000000       7.457056076853      28.275500000000
+       762    1       12.916000000000       5.592792057640      25.705000000000
+       763    1       14.530500000000      10.253452105673      28.275500000000
+       764    1       14.530500000000       8.389188086460      25.705000000000
+       765    1       16.145000000000       7.457056076853      28.275500000000
+       766    1       16.145000000000       5.592792057640      25.705000000000
+       767    1       17.759500000000      10.253452105673      28.275500000000
+       768    1       17.759500000000       8.389188086460      25.705000000000
+       769    1        0.000000000000      13.049848134493      28.275500000000
+       770    1        0.000000000000      11.185584115280      25.705000000000
+       771    1        1.614500000000      15.846244163313      28.275500000000
+       772    1        1.614500000000      13.981980144100      25.705000000000
+       773    1        3.229000000000      13.049848134493      28.275500000000
+       774    1        3.229000000000      11.185584115280      25.705000000000
+       775    1        4.843500000000      15.846244163313      28.275500000000
+       776    1        4.843500000000      13.981980144100      25.705000000000
+       777    1        6.458000000000      13.049848134493      28.275500000000
+       778    1        6.458000000000      11.185584115280      25.705000000000
+       779    1        8.072500000000      15.846244163313      28.275500000000
+       780    1        8.072500000000      13.981980144100      25.705000000000
+       781    1        9.687000000000      13.049848134493      28.275500000000
+       782    1        9.687000000000      11.185584115280      25.705000000000
+       783    1       11.301500000000      15.846244163313      28.275500000000
+       784    1       11.301500000000      13.981980144100      25.705000000000
+       785    1       12.916000000000      13.049848134493      28.275500000000
+       786    1       12.916000000000      11.185584115280      25.705000000000
+       787    1       14.530500000000      15.846244163313      28.275500000000
+       788    1       14.530500000000      13.981980144100      25.705000000000
+       789    1       16.145000000000      13.049848134493      28.275500000000
+       790    1       16.145000000000      11.185584115280      25.705000000000
+       791    1       17.759500000000      15.846244163313      28.275500000000
+       792    1       17.759500000000      13.981980144100      25.705000000000
+       793    1        0.000000000000      18.642640192133      28.275500000000
+       794    1        0.000000000000      16.778376172920      25.705000000000
+       795    1        1.614500000000      21.439036220953      28.275500000000
+       796    1        1.614500000000      19.574772201740      25.705000000000
+       797    1        3.229000000000      18.642640192133      28.275500000000
+       798    1        3.229000000000      16.778376172920      25.705000000000
+       799    1        4.843500000000      21.439036220953      28.275500000000
+       800    1        4.843500000000      19.574772201740      25.705000000000
+       801    1        6.458000000000      18.642640192133      28.275500000000
+       802    1        6.458000000000      16.778376172920      25.705000000000
+       803    1        8.072500000000      21.439036220953      28.275500000000
+       804    1        8.072500000000      19.574772201740      25.705000000000
+       805    1        9.687000000000      18.642640192133      28.275500000000
+       806    1        9.687000000000      16.778376172920      25.705000000000
+       807    1       11.301500000000      21.439036220953      28.275500000000
+       808    1       11.301500000000      19.574772201740      25.705000000000
+       809    1       12.916000000000      18.642640192133      28.275500000000
+       810    1       12.916000000000      16.778376172920      25.705000000000
+       811    1       14.530500000000      21.439036220953      28.275500000000
+       812    1       14.530500000000      19.574772201740      25.705000000000
+       813    1       16.145000000000      18.642640192133      28.275500000000
+       814    1       16.145000000000      16.778376172920      25.705000000000
+       815    1       17.759500000000      21.439036220953      28.275500000000
+       816    1       17.759500000000      19.574772201740      25.705000000000
+       817    1        0.000000000000      24.235432249773      28.275500000000
+       818    1        0.000000000000      22.371168230560      25.705000000000
+       819    1        1.614500000000      27.031828278593      28.275500000000
+       820    1        1.614500000000      25.167564259380      25.705000000000
+       821    1        3.229000000000      24.235432249773      28.275500000000
+       822    1        3.229000000000      22.371168230560      25.705000000000
+       823    1        4.843500000000      27.031828278593      28.275500000000
+       824    1        4.843500000000      25.167564259380      25.705000000000
+       825    1        6.458000000000      24.235432249773      28.275500000000
+       826    1        6.458000000000      22.371168230560      25.705000000000
+       827    1        8.072500000000      27.031828278593      28.275500000000
+       828    1        8.072500000000      25.167564259380      25.705000000000
+       829    1        9.687000000000      24.235432249773      28.275500000000
+       830    1        9.687000000000      22.371168230560      25.705000000000
+       831    1       11.301500000000      27.031828278593      28.275500000000
+       832    1       11.301500000000      25.167564259380      25.705000000000
+       833    1       12.916000000000      24.235432249773      28.275500000000
+       834    1       12.916000000000      22.371168230560      25.705000000000
+       835    1       14.530500000000      27.031828278593      28.275500000000
+       836    1       14.530500000000      25.167564259380      25.705000000000
+       837    1       16.145000000000      24.235432249773      28.275500000000
+       838    1       16.145000000000      22.371168230560      25.705000000000
+       839    1       17.759500000000      27.031828278593      28.275500000000
+       840    1       17.759500000000      25.167564259380      25.705000000000
+       841    1        0.000000000000      29.828224307413      28.275500000000
+       842    1        0.000000000000      27.963960288200      25.705000000000
+       843    1        1.614500000000      32.624620336233      28.275500000000
+       844    1        1.614500000000      30.760356317019      25.705000000000
+       845    1        3.229000000000      29.828224307413      28.275500000000
+       846    1        3.229000000000      27.963960288200      25.705000000000
+       847    1        4.843500000000      32.624620336233      28.275500000000
+       848    1        4.843500000000      30.760356317019      25.705000000000
+       849    1        6.458000000000      29.828224307413      28.275500000000
+       850    1        6.458000000000      27.963960288200      25.705000000000
+       851    1        8.072500000000      32.624620336233      28.275500000000
+       852    1        8.072500000000      30.760356317019      25.705000000000
+       853    1        9.687000000000      29.828224307413      28.275500000000
+       854    1        9.687000000000      27.963960288200      25.705000000000
+       855    1       11.301500000000      32.624620336233      28.275500000000
+       856    1       11.301500000000      30.760356317019      25.705000000000
+       857    1       12.916000000000      29.828224307413      28.275500000000
+       858    1       12.916000000000      27.963960288200      25.705000000000
+       859    1       14.530500000000      32.624620336233      28.275500000000
+       860    1       14.530500000000      30.760356317019      25.705000000000
+       861    1       16.145000000000      29.828224307413      28.275500000000
+       862    1       16.145000000000      27.963960288200      25.705000000000
+       863    1       17.759500000000      32.624620336233      28.275500000000
+       864    1       17.759500000000      30.760356317019      25.705000000000
diff --git a/examples/PACKAGES/sna_nnn_slcsa/dir.slcsa/lda_scalings.dat b/examples/PACKAGES/sna_nnn_slcsa/dir.slcsa/lda_scalings.dat
new file mode 100644
index 0000000000..68a78f8c40
--- /dev/null
+++ b/examples/PACKAGES/sna_nnn_slcsa/dir.slcsa/lda_scalings.dat
@@ -0,0 +1,55 @@
+0.65552758 -0.08218108 -0.23122826
+2.03065849 0.46494117 0.87297750
+-15.80180341 1.50484584 0.31669351
+0.06060238 -0.47589059 -0.41017499
+4.23928030 -2.27982958 4.87969884
+-1.09746642 -1.28258171 2.03459312
+5.48480653 -0.66345012 3.18471732
+-1.57479966 0.17478998 -0.17156696
+3.85779786 1.59890578 1.78936017
+-1.14469715 -2.15823271 2.14353632
+5.97160056 0.11573423 0.97653410
+4.44645807 -0.15365582 -0.08773622
+3.09452721 0.32439223 1.19779688
+-1.22585061 -0.32185613 0.03949731
+0.44816997 -1.11182687 0.26222208
+0.19532128 0.30397832 -0.57154050
+5.52432571 -0.76685448 0.32647935
+6.37957282 -0.96148815 1.53439397
+2.73798648 -0.69516327 1.73607004
+0.94755899 0.41154702 -0.14095753
+1.50733544 1.22254481 0.26284605
+0.98313431 -1.24195379 0.59009611
+-0.76518592 0.11605047 -0.00304658
+-0.68335076 0.48935564 -0.53834507
+1.86534260 -0.49032664 -0.06298849
+1.52931829 0.64853878 -0.56286214
+2.64217062 -1.37348638 0.22526281
+0.18023516 0.03439864 0.77624538
+2.02366558 0.35432524 0.76748492
+0.80982907 0.31806067 0.08774175
+1.57388194 -1.07822533 0.15886237
+0.41345498 0.38916338 -0.29917607
+-0.24819893 0.13763422 0.45471609
+-2.27933523 -0.01771636 -0.20567577
+1.52275665 0.35306670 0.21266257
+0.28547991 1.05230832 1.16641438
+0.97147437 -0.63973458 -0.37994470
+0.48124764 0.03483500 -0.01982056
+0.74502588 0.14367872 -0.24443596
+0.48813660 0.15632903 -0.88469078
+0.04886450 0.00882595 -0.47920447
+0.03103900 -0.15091487 -0.41193682
+-0.10106190 0.14911569 0.10727243
+-0.15552036 -0.49286545 -0.04644942
+0.27304084 0.35638954 1.13331445
+0.57788886 -0.50269555 0.09110942
+0.36780762 -0.08710371 -0.28478716
+1.01678932 -0.42099561 -0.07317253
+0.06561086 -0.27253002 -0.05366136
+0.22266923 0.19999531 -0.30017173
+-0.18666193 0.02576273 0.27752106
+-0.76718071 0.61299522 0.58296511
+0.60978530 0.04962900 -0.32796430
+-0.11572649 0.03034386 -0.83005753
+0.12675714 0.00004617 -0.37078106
diff --git a/examples/PACKAGES/sna_nnn_slcsa/dir.slcsa/lr_bias.dat b/examples/PACKAGES/sna_nnn_slcsa/dir.slcsa/lr_bias.dat
new file mode 100644
index 0000000000..467f1844d9
--- /dev/null
+++ b/examples/PACKAGES/sna_nnn_slcsa/dir.slcsa/lr_bias.dat
@@ -0,0 +1 @@
+-6.32012657 5.62127377 1.19871662 -0.49986382
diff --git a/examples/PACKAGES/sna_nnn_slcsa/dir.slcsa/lr_decision.dat b/examples/PACKAGES/sna_nnn_slcsa/dir.slcsa/lr_decision.dat
new file mode 100644
index 0000000000..e938d59d8b
--- /dev/null
+++ b/examples/PACKAGES/sna_nnn_slcsa/dir.slcsa/lr_decision.dat
@@ -0,0 +1,4 @@
+-0.42810669 1.25467216 0.93144383 
+0.09624929 -0.80420088 0.48996738 
+-0.09865949 0.39991755 -0.69233982 
+0.43051689 -0.85038883 -0.72907140 
diff --git a/examples/PACKAGES/sna_nnn_slcsa/dir.slcsa/mahalanobis_file.dat b/examples/PACKAGES/sna_nnn_slcsa/dir.slcsa/mahalanobis_file.dat
new file mode 100644
index 0000000000..299ef3c72f
--- /dev/null
+++ b/examples/PACKAGES/sna_nnn_slcsa/dir.slcsa/mahalanobis_file.dat
@@ -0,0 +1,20 @@
+5.0540
+-23.8329 4.6638 3.9805
+1.1377 0.1077 -0.0171
+0.1077 0.8846 -0.2577
+-0.0171 -0.2577 0.6783
+5.2340
+-21.2853 -6.1583 1.7948
+1.7124 0.0341 0.1966
+0.0341 0.6453 0.2880
+0.1966 0.2880 1.8991
+5.0360
+-23.1593 1.3059 -5.7549
+0.7496 -0.0806 -0.1101
+-0.0806 1.1178 0.1667
+-0.1101 0.1667 0.6711
+7.9940
+68.1971 0.1604 -0.0067
+0.9663 -0.1846 0.6622
+-0.1846 8.2371 0.9841
+0.6622 0.9841 5.9601
diff --git a/examples/PACKAGES/sna_nnn_slcsa/dir.slcsa/mean_descriptor.dat b/examples/PACKAGES/sna_nnn_slcsa/dir.slcsa/mean_descriptor.dat
new file mode 100644
index 0000000000..ae50ec809f
--- /dev/null
+++ b/examples/PACKAGES/sna_nnn_slcsa/dir.slcsa/mean_descriptor.dat
@@ -0,0 +1,55 @@
+137.71497059
+0.36342014
+-2.78949838
+1.75623090
+-4.86893969
+-2.31918628
+-3.01873942
+59.70217846
+-8.31239311
+-1.05113276
+-4.08948813
+11.70560234
+17.48710737
+42.43158755
+-6.27727395
+-1.46675636
+-3.40739849
+1.58674150
+13.02515977
+5.67885926
+6.45692906
+4.69273492
+21.59764216
+-7.68805780
+-4.37357550
+-5.79764719
+0.53149261
+-0.00723980
+-2.47811316
+-0.34939237
+-4.59425510
+-4.44056296
+107.64051985
+-9.32851480
+-6.62214151
+-5.69590145
+22.80361437
+9.47641390
+2.25214024
+-0.19403065
+3.05386205
+12.91756406
+135.15381317
+-9.93292065
+-3.73311129
+10.67039500
+9.60945072
+-0.03566872
+21.97944941
+6.70251772
+74.60284853
+-5.99090678
+0.21877973
+-1.19909174
+1.37424965
diff --git a/examples/PACKAGES/sna_nnn_slcsa/in.slcsa b/examples/PACKAGES/sna_nnn_slcsa/in.slcsa
new file mode 100644
index 0000000000..31a8189da3
--- /dev/null
+++ b/examples/PACKAGES/sna_nnn_slcsa/in.slcsa
@@ -0,0 +1,57 @@
+variable        trequis equal 750.0
+variable        prequis_low equal 0.0
+variable        prequis_high equal 25.0e4
+variable        equilSteps equal 200
+variable        runSteps equal 2000
+variable        freqdump equal 200
+variable        pstime equal step*dt
+variable        sxx equal 1.e-4*pxx
+variable        syy equal 1.e-4*pyy
+variable        szz equal 1.e-4*pzz
+variable        sxy equal 1.e-4*pxy
+variable        sxz equal 1.e-4*pxz
+variable        syz equal 1.e-4*pyz
+variable        TK  equal temp
+variable        PE  equal pe
+variable        KE  equal ke
+variable        V   equal vol
+
+dimension       3
+boundary        p p p
+units           metal
+atom_style      atomic
+read_data       data.zr_cell
+replicate       1 5 5
+
+change_box      all triclinic
+
+pair_style      hybrid/overlay zero 9.0 eam/fs
+pair_coeff * *  zero
+pair_coeff * *  eam/fs Zr_mm.eam.fs Zr
+
+timestep        0.002
+
+thermo          50
+thermo_style    custom step pe ke temp vol pxx pyy pzz pxy pyz pxz
+
+# fix             extra all print 50 "${pstime} ${TK} ${PE} ${KE} ${V} ${sxx} ${syy} ${szz} ${sxy} ${sxz} ${syz}"  file thermo_global_npt_low_temperature_Zr_hcp.dat
+
+velocity        all create ${trequis} 42345 dist gaussian
+
+# 1st step : compute the bispectrum on 24 nearest neighbors
+compute         bnnn all sna/atom 9.0 0.99363 8 0.5 1.0 rmin0 0.0 nnn 24 wmode 1 delta 0.25
+
+# 2nd step : perform dimension reduction + logistic regression
+compute         slcsa all slcsa/atom 8 4 dir.slcsa/mean_descriptor.dat dir.slcsa/lda_scalings.dat dir.slcsa/lr_decision.dat dir.slcsa/lr_bias.dat dir.slcsa/mahalanobis_file.dat c_bnnn[*]
+
+#dump            d1 all custom ${freqdump} slcsa_demo.dump id x y z c_slcsa[*]
+
+# for testing only. in production use dump as shown above
+compute max_slcsa all reduce max c_slcsa[*]
+compute min_slcsa all reduce min c_slcsa[*]
+thermo_style    custom step pe ke temp c_max_slcsa[*] c_min_slcsa[*]
+
+#fix            1 all nvt temp ${trequis} ${trequis} 0.100
+fix             1 all npt temp ${trequis} ${trequis} 0.100 tri ${prequis_low} ${prequis_low} 1.0
+
+run             ${equilSteps}
diff --git a/examples/PACKAGES/sna_nnn_slcsa/log.12Dec23.slcsa.g++.1 b/examples/PACKAGES/sna_nnn_slcsa/log.12Dec23.slcsa.g++.1
new file mode 100644
index 0000000000..58c2f40684
--- /dev/null
+++ b/examples/PACKAGES/sna_nnn_slcsa/log.12Dec23.slcsa.g++.1
@@ -0,0 +1,180 @@
+LAMMPS (21 Nov 2023)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
+  using 1 OpenMP thread(s) per MPI task
+variable        trequis equal 750.0
+variable        prequis_low equal 0.0
+variable        prequis_high equal 25.0e4
+variable        equilSteps equal 200
+variable        runSteps equal 2000
+variable        freqdump equal 200
+variable        pstime equal step*dt
+variable        sxx equal 1.e-4*pxx
+variable        syy equal 1.e-4*pyy
+variable        szz equal 1.e-4*pzz
+variable        sxy equal 1.e-4*pxy
+variable        sxz equal 1.e-4*pxz
+variable        syz equal 1.e-4*pyz
+variable        TK  equal temp
+variable        PE  equal pe
+variable        KE  equal ke
+variable        V   equal vol
+
+dimension       3
+boundary        p p p
+units           metal
+atom_style      atomic
+read_data       data.zr_cell
+Reading data file ...
+  orthogonal box = (0 0 0) to (19.374 33.556752 30.846)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  864 atoms
+  read_data CPU = 0.002 seconds
+replicate       1 5 5
+Replication is creating a 1x5x5 = 25 times larger system...
+  orthogonal box = (0 0 0) to (19.374 167.78376 154.23)
+  1 by 1 by 1 MPI processor grid
+  21600 atoms
+  replicate CPU = 0.001 seconds
+
+change_box      all triclinic
+Changing box ...
+  triclinic box = (0 0 0) to (19.374 167.78376 154.23) with tilt (0 0 0)
+
+pair_style      hybrid/overlay zero 9.0 eam/fs
+pair_coeff * *  zero
+pair_coeff * *  eam/fs Zr_mm.eam.fs Zr
+Reading eam/fs potential file Zr_mm.eam.fs with DATE: 2007-06-11
+
+timestep        0.002
+
+thermo          50
+thermo_style    custom step pe ke temp vol pxx pyy pzz pxy pyz pxz
+
+# fix             extra all print 50 "${pstime} ${TK} ${PE} ${KE} ${V} ${sxx} ${syy} ${szz} ${sxy} ${sxz} ${syz}"  file thermo_global_npt_low_temperature_Zr_hcp.dat
+
+velocity        all create ${trequis} 42345 dist gaussian
+velocity        all create 750 42345 dist gaussian
+
+# 1st step : compute the bispectrum on 24 nearest neighbors
+compute         bnnn all sna/atom 9.0 0.99363 8 0.5 1.0 rmin0 0.0 nnn 24 wmode 1 delta 0.25
+
+# 2nd step : perform dimension reduction + logistic regression
+compute         slcsa all slcsa/atom 8 4 dir.slcsa/mean_descriptor.dat dir.slcsa/lda_scalings.dat dir.slcsa/lr_decision.dat dir.slcsa/lr_bias.dat dir.slcsa/mahalanobis_file.dat c_bnnn[*]
+Files used:
+  database mean descriptor: dir.slcsa/mean_descriptor.dat
+  lda scalings            : dir.slcsa/lda_scalings.dat
+  lr decision             : dir.slcsa/lr_decision.dat
+  lr bias                 : dir.slcsa/lr_bias.dat
+  maha stats              : dir.slcsa/mahalanobis_file.dat
+For class 0  maha threshold = 5.054
+  mean B:
+      -23.8329
+        4.6638
+        3.9805
+  icov:
+        1.1377      0.1077     -0.0171
+        0.1077      0.8846     -0.2577
+       -0.0171     -0.2577      0.6783
+For class 1  maha threshold = 5.234
+  mean B:
+      -21.2853
+       -6.1583
+        1.7948
+  icov:
+        1.7124      0.0341      0.1966
+        0.0341      0.6453       0.288
+        0.1966       0.288      1.8991
+For class 2  maha threshold = 5.036
+  mean B:
+      -23.1593
+        1.3059
+       -5.7549
+  icov:
+        0.7496     -0.0806     -0.1101
+       -0.0806      1.1178      0.1667
+       -0.1101      0.1667      0.6711
+For class 3  maha threshold = 7.994
+  mean B:
+       68.1971
+        0.1604
+       -0.0067
+  icov:
+        0.9663     -0.1846      0.6622
+       -0.1846      8.2371      0.9841
+        0.6622      0.9841      5.9601
+
+#dump            d1 all custom ${freqdump} slcsa_demo.dump id x y z c_slcsa[*]
+
+# for testing only. in production use dump as shown above
+compute max_slcsa all reduce max c_slcsa[*]
+compute min_slcsa all reduce min c_slcsa[*]
+thermo_style    custom step pe ke temp c_max_slcsa[*] c_min_slcsa[*]
+
+#fix            1 all nvt temp ${trequis} ${trequis} 0.100
+fix             1 all npt temp ${trequis} ${trequis} 0.100 tri ${prequis_low} ${prequis_low} 1.0
+fix             1 all npt temp 750 ${trequis} 0.100 tri ${prequis_low} ${prequis_low} 1.0
+fix             1 all npt temp 750 750 0.100 tri ${prequis_low} ${prequis_low} 1.0
+fix             1 all npt temp 750 750 0.100 tri 0 ${prequis_low} 1.0
+fix             1 all npt temp 750 750 0.100 tri 0 0 1.0
+
+run             ${equilSteps}
+run             200
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 11
+  ghost atom cutoff = 11
+  binsize = 5.5, bins = 4 31 29
+  3 neighbor lists, perpetual/occasional/extra = 2 1 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton/tri
+      stencil: half/bin/3d/tri
+      bin: standard
+  (2) pair eam/fs, perpetual, trim from (1)
+      attributes: half, newton on, cut 9.6
+      pair build: trim
+      stencil: none
+      bin: none
+  (3) compute sna/atom, occasional
+      attributes: full, newton on
+      pair build: full/bin/atomonly
+      stencil: full/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 31.9 | 31.9 | 31.9 Mbytes
+   Step         PotEng         KinEng          Temp      c_max_slcsa[1] c_max_slcsa[2] c_max_slcsa[3] c_max_slcsa[4] c_max_slcsa[5] c_min_slcsa[1] c_min_slcsa[2] c_min_slcsa[3] c_min_slcsa[4] c_min_slcsa[5]
+         0  -143297.23      2093.9174      750            7.6195146      15.787294      1.2169942      111.01919      2              7.6195146      15.787294      1.2169942      111.01919      2            
+        50  -142154.08      1007.7164      360.9442       8.8091564      19.23244       4.2093382      113.87959      2              5.0327148      9.6817454      0.02610585     106.71863      2            
+       100  -142365.33      1406.6559      503.83647      8.6272189      17.908949      2.9294666      113.75167      2              6.2058895      11.913521      0.033775944    108.66893      2            
+       150  -142188.18      1432.0075      512.91691      8.6441961      18.176321      2.9277374      114.27958      2              5.5899425      10.521867      0.014919473    108.14526      2            
+       200  -142000.4       1481.7247      530.72462      8.5895692      18.65646       3.1725758      114.55015      2              5.5955774      10.776385      0.061469343    108.35384      2            
+Loop time of 36.3759 on 1 procs for 200 steps with 21600 atoms
+
+Performance: 0.950 ns/day, 25.261 hours/ns, 5.498 timesteps/s, 118.760 katom-step/s
+99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 9.0837     | 9.0837     | 9.0837     |   0.0 | 24.97
+Neigh   | 0.52896    | 0.52896    | 0.52896    |   0.0 |  1.45
+Comm    | 0.045416   | 0.045416   | 0.045416   |   0.0 |  0.12
+Output  | 26.548     | 26.548     | 26.548     |   0.0 | 72.98
+Modify  | 0.1493     | 0.1493     | 0.1493     |   0.0 |  0.41
+Other   |            | 0.02088    |            |       |  0.06
+
+Nlocal:          21600 ave       21600 max       21600 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:          36674 ave       36674 max       36674 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    2.61729e+06 ave 2.61729e+06 max 2.61729e+06 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+FullNghs:  5.24007e+06 ave 5.24007e+06 max 5.24007e+06 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 5240069
+Ave neighs/atom = 242.59579
+Neighbor list builds = 4
+Dangerous builds = 0
+Total wall time: 0:00:43
diff --git a/examples/PACKAGES/sna_nnn_slcsa/log.12Dec23.slcsa.g++.4 b/examples/PACKAGES/sna_nnn_slcsa/log.12Dec23.slcsa.g++.4
new file mode 100644
index 0000000000..6436eabe2b
--- /dev/null
+++ b/examples/PACKAGES/sna_nnn_slcsa/log.12Dec23.slcsa.g++.4
@@ -0,0 +1,180 @@
+LAMMPS (21 Nov 2023)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
+  using 1 OpenMP thread(s) per MPI task
+variable        trequis equal 750.0
+variable        prequis_low equal 0.0
+variable        prequis_high equal 25.0e4
+variable        equilSteps equal 200
+variable        runSteps equal 2000
+variable        freqdump equal 200
+variable        pstime equal step*dt
+variable        sxx equal 1.e-4*pxx
+variable        syy equal 1.e-4*pyy
+variable        szz equal 1.e-4*pzz
+variable        sxy equal 1.e-4*pxy
+variable        sxz equal 1.e-4*pxz
+variable        syz equal 1.e-4*pyz
+variable        TK  equal temp
+variable        PE  equal pe
+variable        KE  equal ke
+variable        V   equal vol
+
+dimension       3
+boundary        p p p
+units           metal
+atom_style      atomic
+read_data       data.zr_cell
+Reading data file ...
+  orthogonal box = (0 0 0) to (19.374 33.556752 30.846)
+  1 by 2 by 2 MPI processor grid
+  reading atoms ...
+  864 atoms
+  read_data CPU = 0.002 seconds
+replicate       1 5 5
+Replication is creating a 1x5x5 = 25 times larger system...
+  orthogonal box = (0 0 0) to (19.374 167.78376 154.23)
+  1 by 2 by 2 MPI processor grid
+  21600 atoms
+  replicate CPU = 0.001 seconds
+
+change_box      all triclinic
+Changing box ...
+  triclinic box = (0 0 0) to (19.374 167.78376 154.23) with tilt (0 0 0)
+
+pair_style      hybrid/overlay zero 9.0 eam/fs
+pair_coeff * *  zero
+pair_coeff * *  eam/fs Zr_mm.eam.fs Zr
+Reading eam/fs potential file Zr_mm.eam.fs with DATE: 2007-06-11
+
+timestep        0.002
+
+thermo          50
+thermo_style    custom step pe ke temp vol pxx pyy pzz pxy pyz pxz
+
+# fix             extra all print 50 "${pstime} ${TK} ${PE} ${KE} ${V} ${sxx} ${syy} ${szz} ${sxy} ${sxz} ${syz}"  file thermo_global_npt_low_temperature_Zr_hcp.dat
+
+velocity        all create ${trequis} 42345 dist gaussian
+velocity        all create 750 42345 dist gaussian
+
+# 1st step : compute the bispectrum on 24 nearest neighbors
+compute         bnnn all sna/atom 9.0 0.99363 8 0.5 1.0 rmin0 0.0 nnn 24 wmode 1 delta 0.25
+
+# 2nd step : perform dimension reduction + logistic regression
+compute         slcsa all slcsa/atom 8 4 dir.slcsa/mean_descriptor.dat dir.slcsa/lda_scalings.dat dir.slcsa/lr_decision.dat dir.slcsa/lr_bias.dat dir.slcsa/mahalanobis_file.dat c_bnnn[*]
+Files used:
+  database mean descriptor: dir.slcsa/mean_descriptor.dat
+  lda scalings            : dir.slcsa/lda_scalings.dat
+  lr decision             : dir.slcsa/lr_decision.dat
+  lr bias                 : dir.slcsa/lr_bias.dat
+  maha stats              : dir.slcsa/mahalanobis_file.dat
+For class 0  maha threshold = 5.054
+  mean B:
+      -23.8329
+        4.6638
+        3.9805
+  icov:
+        1.1377      0.1077     -0.0171
+        0.1077      0.8846     -0.2577
+       -0.0171     -0.2577      0.6783
+For class 1  maha threshold = 5.234
+  mean B:
+      -21.2853
+       -6.1583
+        1.7948
+  icov:
+        1.7124      0.0341      0.1966
+        0.0341      0.6453       0.288
+        0.1966       0.288      1.8991
+For class 2  maha threshold = 5.036
+  mean B:
+      -23.1593
+        1.3059
+       -5.7549
+  icov:
+        0.7496     -0.0806     -0.1101
+       -0.0806      1.1178      0.1667
+       -0.1101      0.1667      0.6711
+For class 3  maha threshold = 7.994
+  mean B:
+       68.1971
+        0.1604
+       -0.0067
+  icov:
+        0.9663     -0.1846      0.6622
+       -0.1846      8.2371      0.9841
+        0.6622      0.9841      5.9601
+
+#dump            d1 all custom ${freqdump} slcsa_demo.dump id x y z c_slcsa[*]
+
+# for testing only. in production use dump as shown above
+compute max_slcsa all reduce max c_slcsa[*]
+compute min_slcsa all reduce min c_slcsa[*]
+thermo_style    custom step pe ke temp c_max_slcsa[*] c_min_slcsa[*]
+
+#fix            1 all nvt temp ${trequis} ${trequis} 0.100
+fix             1 all npt temp ${trequis} ${trequis} 0.100 tri ${prequis_low} ${prequis_low} 1.0
+fix             1 all npt temp 750 ${trequis} 0.100 tri ${prequis_low} ${prequis_low} 1.0
+fix             1 all npt temp 750 750 0.100 tri ${prequis_low} ${prequis_low} 1.0
+fix             1 all npt temp 750 750 0.100 tri 0 ${prequis_low} 1.0
+fix             1 all npt temp 750 750 0.100 tri 0 0 1.0
+
+run             ${equilSteps}
+run             200
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 11
+  ghost atom cutoff = 11
+  binsize = 5.5, bins = 4 31 29
+  3 neighbor lists, perpetual/occasional/extra = 2 1 0
+  (1) pair zero, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton/tri
+      stencil: half/bin/3d/tri
+      bin: standard
+  (2) pair eam/fs, perpetual, trim from (1)
+      attributes: half, newton on, cut 9.6
+      pair build: trim
+      stencil: none
+      bin: none
+  (3) compute sna/atom, occasional
+      attributes: full, newton on
+      pair build: full/bin/atomonly
+      stencil: full/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 11.26 | 11.45 | 11.64 Mbytes
+   Step         PotEng         KinEng          Temp      c_max_slcsa[1] c_max_slcsa[2] c_max_slcsa[3] c_max_slcsa[4] c_max_slcsa[5] c_min_slcsa[1] c_min_slcsa[2] c_min_slcsa[3] c_min_slcsa[4] c_min_slcsa[5]
+         0  -143297.23      2093.9174      750            7.6195146      15.787294      1.2169942      111.01919      2              7.6195146      15.787294      1.2169942      111.01919      2            
+        50  -142154.08      1007.7164      360.9442       8.8091564      19.23244       4.2093382      113.87959      2              5.0327148      9.6817454      0.02610585     106.71863      2            
+       100  -142365.33      1406.6559      503.83647      8.6272189      17.908949      2.9294666      113.75167      2              6.2058895      11.913521      0.033775944    108.66893      2            
+       150  -142188.18      1432.0075      512.91691      8.6441961      18.176321      2.9277374      114.27958      2              5.5899425      10.521867      0.014919473    108.14526      2            
+       200  -142000.4       1481.7247      530.72462      8.5895692      18.65646       3.1725758      114.55015      2              5.5955774      10.776385      0.061469343    108.35384      2            
+Loop time of 9.81677 on 4 procs for 200 steps with 21600 atoms
+
+Performance: 3.521 ns/day, 6.817 hours/ns, 20.373 timesteps/s, 440.063 katom-step/s
+99.7% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 2.6508     | 2.6589     | 2.6698     |   0.5 | 27.09
+Neigh   | 0.1516     | 0.15276    | 0.15406    |   0.3 |  1.56
+Comm    | 0.047132   | 0.058969   | 0.066095   |   3.2 |  0.60
+Output  | 6.8886     | 6.8886     | 6.8886     |   0.0 | 70.17
+Modify  | 0.046437   | 0.04661    | 0.046825   |   0.1 |  0.47
+Other   |            | 0.01091    |            |       |  0.11
+
+Nlocal:           5400 ave        5416 max        5393 min
+Histogram: 2 1 0 0 0 0 0 0 0 1
+Nghost:        12902.8 ave       12911 max       12888 min
+Histogram: 1 0 0 0 0 0 1 0 0 2
+Neighs:         654322 ave      655602 max      650912 min
+Histogram: 1 0 0 0 0 0 0 0 0 3
+FullNghs:  1.31002e+06 ave 1.31507e+06 max 1.30683e+06 min
+Histogram: 1 1 0 1 0 0 0 0 0 1
+
+Total # of neighbors = 5240065
+Ave neighs/atom = 242.5956
+Neighbor list builds = 4
+Dangerous builds = 0
+Total wall time: 0:00:11
diff --git a/examples/reaxff/in.reaxff.tatb b/examples/reaxff/in.reaxff.tatb
index 6cf7828cf1..967ed0a1d6 100644
--- a/examples/reaxff/in.reaxff.tatb
+++ b/examples/reaxff/in.reaxff.tatb
@@ -31,8 +31,15 @@ neigh_modify    delay 0 every 5 check no
 fix             1 all nve
 fix             2 all qeq/reaxff 1 0.0 10.0 1.0e-6 reaxff
 fix             4 all reaxff/bonds 5 bonds.reaxff
+compute         bonds all reaxff/atom bonds yes
 variable nqeq   equal f_2
 
+# dumps out the local bond information
+dump 1 all local 5 bonds_local.reaxff c_bonds[1] c_bonds[2] c_bonds[3]
+
+# dumps out the peratom bond information
+dump 2 all custom 5 bonds_atom.reaxff id type q c_bonds[*]
+
 thermo          5
 thermo_style    custom step temp epair etotal press &
                 v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa &
diff --git a/fortran/lammps.f90 b/fortran/lammps.f90
index 28e40bca44..071dffebc0 100644
--- a/fortran/lammps.f90
+++ b/fortran/lammps.f90
@@ -118,6 +118,8 @@ MODULE LIBLAMMPS
     PROCEDURE :: extract_fix            => lmp_extract_fix
     PROCEDURE :: extract_variable       => lmp_extract_variable
     PROCEDURE :: set_variable           => lmp_set_variable
+    PROCEDURE :: set_string_variable    => lmp_set_string_variable
+    PROCEDURE :: set_internal_variable  => lmp_set_internal_variable
     PROCEDURE, PRIVATE :: lmp_gather_atoms_int
     PROCEDURE, PRIVATE :: lmp_gather_atoms_double
     GENERIC   :: gather_atoms           => lmp_gather_atoms_int, &
@@ -557,6 +559,21 @@ MODULE LIBLAMMPS
       INTEGER(c_int) :: lammps_set_variable
     END FUNCTION lammps_set_variable
 
+    FUNCTION lammps_set_string_variable(handle, name, str) BIND(C)
+      IMPORT :: c_int, c_ptr
+      IMPLICIT NONE
+      TYPE(c_ptr), VALUE :: handle, name, str
+      INTEGER(c_int) :: lammps_set_string_variable
+    END FUNCTION lammps_set_string_variable
+
+    FUNCTION lammps_set_internal_variable(handle, name, val) BIND(C)
+      IMPORT :: c_int, c_ptr, c_double
+      IMPLICIT NONE
+      TYPE(c_ptr), VALUE :: handle, name
+      REAL(c_double), VALUE :: val
+      INTEGER(c_int) :: lammps_set_internal_variable
+    END FUNCTION lammps_set_internal_variable
+
     SUBROUTINE lammps_gather_atoms(handle, name, type, count, data) BIND(C)
       IMPORT :: c_int, c_ptr
       IMPLICIT NONE
@@ -1631,6 +1648,43 @@ CONTAINS
     END IF
   END SUBROUTINE lmp_set_variable
 
+  ! equivalent function to lammps_set_string_variable
+  SUBROUTINE lmp_set_string_variable(self, name, str)
+    CLASS(lammps), INTENT(IN) :: self
+    CHARACTER(LEN=*), INTENT(IN) :: name, str
+    INTEGER :: err
+    TYPE(c_ptr) :: Cstr, Cname
+
+    Cstr = f2c_string(str)
+    Cname = f2c_string(name)
+    err = lammps_set_string_variable(self%handle, Cname, Cstr)
+    CALL lammps_free(Cname)
+    CALL lammps_free(Cstr)
+    IF (err /= 0) THEN
+      CALL lmp_error(self, LMP_ERROR_WARNING + LMP_ERROR_WORLD, &
+        'WARNING: unable to set string variable "' // name &
+        // '" [Fortran/set_variable]')
+    END IF
+  END SUBROUTINE lmp_set_string_variable
+
+  ! equivalent function to lammps_set_internal_variable
+  SUBROUTINE lmp_set_internal_variable(self, name, val)
+    CLASS(lammps), INTENT(IN) :: self
+    CHARACTER(LEN=*), INTENT(IN) :: name
+    REAL(KIND=c_double), INTENT(IN) :: val
+    INTEGER :: err
+    TYPE(c_ptr) :: Cstr, Cname
+
+    Cname = f2c_string(name)
+    err = lammps_set_internal_variable(self%handle, Cname, val)
+    CALL lammps_free(Cname)
+    IF (err /= 0) THEN
+      CALL lmp_error(self, LMP_ERROR_WARNING + LMP_ERROR_WORLD, &
+        'WARNING: unable to set internal variable "' // name &
+        // '" [Fortran/set_variable]')
+    END IF
+  END SUBROUTINE lmp_set_internal_variable
+
   ! equivalent function to lammps_gather_atoms (for integers)
   SUBROUTINE lmp_gather_atoms_int(self, name, count, data)
     CLASS(lammps), INTENT(IN) :: self
diff --git a/lib/gpu/Makefile.cuda b/lib/gpu/Makefile.cuda
index be8003e02e..75428c9513 100644
--- a/lib/gpu/Makefile.cuda
+++ b/lib/gpu/Makefile.cuda
@@ -134,11 +134,11 @@ $(OBJ_DIR)/scan_app.cu_o: cudpp_mini/scan_app.cu
 $(GPU_LIB): $(OBJS) $(CUDPP)
 	$(AR) -crusv $(GPU_LIB) $(OBJS) $(CUDPP)
 	@cp $(EXTRAMAKE) Makefile.lammps
- 
+
 # test app for querying device info
 
 $(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H)
-	$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda 
+	$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda
 
 clean:
 	-rm -f $(EXECS) $(GPU_LIB) $(OBJS) $(CUDPP) $(CUHS) *.linkinfo
diff --git a/lib/gpu/Makefile.cuda_mps b/lib/gpu/Makefile.cuda_mps
index 06d2ef0339..22a34c105c 100644
--- a/lib/gpu/Makefile.cuda_mps
+++ b/lib/gpu/Makefile.cuda_mps
@@ -133,11 +133,11 @@ $(OBJ_DIR)/scan_app.cu_o: cudpp_mini/scan_app.cu
 $(GPU_LIB): $(OBJS) $(CUDPP)
 	$(AR) -crusv $(GPU_LIB) $(OBJS) $(CUDPP)
 	@cp $(EXTRAMAKE) Makefile.lammps
- 
+
 # test app for querying device info
 
 $(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H)
-	$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda 
+	$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda
 
 clean:
 	-rm -f $(EXECS) $(GPU_LIB) $(OBJS) $(CUDPP) $(CUHS) *.linkinfo
diff --git a/lib/gpu/Makefile.hip b/lib/gpu/Makefile.hip
index f5a0d03608..0350c841c4 100644
--- a/lib/gpu/Makefile.hip
+++ b/lib/gpu/Makefile.hip
@@ -98,10 +98,10 @@ HIP_GPU_OPTS += $(HIP_OPTS) -I./
 ifeq (spirv,$(HIP_PLATFORM))
 	HIP_HOST_OPTS += -fPIC
 	HIP_GPU_CC  = $(HIP_PATH)/bin/hipcc -c
-	HIP_GPU_OPTS_S = 
+	HIP_GPU_OPTS_S =
 	HIP_GPU_OPTS_E =
 	HIP_KERNEL_SUFFIX = .cpp
-	HIP_LIBS_TARGET = 
+	HIP_LIBS_TARGET =
 	export HCC_AMDGPU_TARGET := $(HIP_ARCH)
 else ifeq (clang,$(HIP_COMPILER))
 	HIP_HOST_OPTS += -fPIC
diff --git a/lib/gpu/Makefile.lammps.mac_ocl b/lib/gpu/Makefile.lammps.mac_ocl
index 0073efa2ba..dbbd789464 100644
--- a/lib/gpu/Makefile.lammps.mac_ocl
+++ b/lib/gpu/Makefile.lammps.mac_ocl
@@ -2,4 +2,4 @@
 
 gpu_SYSINC = -DFFT_SINGLE
 gpu_SYSLIB = -framework OpenCL
-gpu_SYSPATH = 
+gpu_SYSPATH =
diff --git a/lib/gpu/Makefile.lammps.mingw-cross b/lib/gpu/Makefile.lammps.mingw-cross
index 12d833c744..0b304b0e0c 100644
--- a/lib/gpu/Makefile.lammps.mingw-cross
+++ b/lib/gpu/Makefile.lammps.mingw-cross
@@ -2,5 +2,5 @@
 # settings for OpenCL builds
 gpu_SYSINC =
 gpu_SYSLIB = -Wl,--enable-stdcall-fixup -L../../tools/mingw-cross$(LIBOBJDIR) -Wl,-Bdynamic,-lOpenCL,-Bstatic
-gpu_SYSPATH = 
+gpu_SYSPATH =
 
diff --git a/lib/gpu/Makefile.lammps.opencl b/lib/gpu/Makefile.lammps.opencl
index 413ae79210..50f5e63f77 100644
--- a/lib/gpu/Makefile.lammps.opencl
+++ b/lib/gpu/Makefile.lammps.opencl
@@ -2,4 +2,4 @@
 
 gpu_SYSINC =
 gpu_SYSLIB = -lOpenCL
-gpu_SYSPATH = 
+gpu_SYSPATH =
diff --git a/lib/gpu/Makefile.linux b/lib/gpu/Makefile.linux
index 3c37672e01..e02413f3ba 100644
--- a/lib/gpu/Makefile.linux
+++ b/lib/gpu/Makefile.linux
@@ -1,4 +1,4 @@
-# /* ----------------------------------------------------------------------   
+# /* ----------------------------------------------------------------------
 #  Generic Linux Makefile for CUDA
 #     - Change CUDA_ARCH for your GPU
 # ------------------------------------------------------------------------- */
diff --git a/lib/gpu/Makefile.linux_multi b/lib/gpu/Makefile.linux_multi
index 3299bbec3a..e3a76d9934 100644
--- a/lib/gpu/Makefile.linux_multi
+++ b/lib/gpu/Makefile.linux_multi
@@ -1,4 +1,4 @@
-# /* ----------------------------------------------------------------------   
+# /* ----------------------------------------------------------------------
 #  Generic Linux Makefile for CUDA complied for multiple compute capabilities
 #     - Add your GPU to CUDA_CODE
 # ------------------------------------------------------------------------- */
@@ -65,7 +65,7 @@ CUDA_PRECISION = -D_SINGLE_DOUBLE
 
 CUDA_INCLUDE = -I$(CUDA_HOME)/include
 CUDA_LIB = -L$(CUDA_HOME)/lib64 -L$(CUDA_HOME)/lib64/stubs
-CUDA_OPTS = -DUNIX -O3 --use_fast_math $(LMP_INC) -Xcompiler -fPIC
+CUDA_OPTS = -DUNIX -O3 --use_fast_math $(LMP_INC) -Xcompiler -fPIC -allow-unsupported-compiler
 
 CUDR_CPP = mpicxx -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 -fPIC -std=c++11
 CUDR_OPTS = -O2 $(LMP_INC) # -xHost -no-prec-div -ansi-alias
diff --git a/lib/gpu/Makefile.linux_opencl b/lib/gpu/Makefile.linux_opencl
index 43d012dc4a..b4b25544ee 100644
--- a/lib/gpu/Makefile.linux_opencl
+++ b/lib/gpu/Makefile.linux_opencl
@@ -1,4 +1,4 @@
-# /* ----------------------------------------------------------------------   
+# /* ----------------------------------------------------------------------
 #  Generic Linux Makefile for OpenCL - Mixed precision
 # ------------------------------------------------------------------------- */
 
@@ -11,7 +11,7 @@ EXTRAMAKE = Makefile.lammps.opencl
 
 LMP_INC = -DLAMMPS_SMALLBIG
 
-OCL_INC = 
+OCL_INC =
 OCL_CPP = mpic++ -std=c++11 -O3 -DMPICH_IGNORE_CXX_SEEK $(LMP_INC) $(OCL_INC)
 OCL_LINK = -lOpenCL
 OCL_PREC = -D_SINGLE_DOUBLE
diff --git a/lib/gpu/Makefile.mac_opencl b/lib/gpu/Makefile.mac_opencl
index ae7e8ca6fd..3a9fd39f35 100644
--- a/lib/gpu/Makefile.mac_opencl
+++ b/lib/gpu/Makefile.mac_opencl
@@ -1,4 +1,4 @@
-# /* ----------------------------------------------------------------------   
+# /* ----------------------------------------------------------------------
 #  Generic Mac Makefile for OpenCL - Single precision with FFT_SINGLE
 # ------------------------------------------------------------------------- */
 
diff --git a/lib/gpu/Makefile.mac_opencl_mpi b/lib/gpu/Makefile.mac_opencl_mpi
index 9be9f07e93..b0c6e39aae 100644
--- a/lib/gpu/Makefile.mac_opencl_mpi
+++ b/lib/gpu/Makefile.mac_opencl_mpi
@@ -1,4 +1,4 @@
-# /* ----------------------------------------------------------------------   
+# /* ----------------------------------------------------------------------
 #  Generic Mac Makefile for OpenCL - Single precision with FFT_SINGLE
 # ------------------------------------------------------------------------- */
 
diff --git a/lib/gpu/Makefile.oneapi b/lib/gpu/Makefile.oneapi
index 32800676aa..e67f4bb082 100644
--- a/lib/gpu/Makefile.oneapi
+++ b/lib/gpu/Makefile.oneapi
@@ -1,4 +1,4 @@
-# /* ----------------------------------------------------------------------   
+# /* ----------------------------------------------------------------------
 #  Linux Makefile for Intel oneAPI - Mixed precision
 # ------------------------------------------------------------------------- */
 
diff --git a/lib/gpu/Makefile.oneapi_prof b/lib/gpu/Makefile.oneapi_prof
index 1e21597373..58a03392e2 100644
--- a/lib/gpu/Makefile.oneapi_prof
+++ b/lib/gpu/Makefile.oneapi_prof
@@ -1,4 +1,4 @@
-# /* ----------------------------------------------------------------------   
+# /* ----------------------------------------------------------------------
 #  Linux Makefile for Intel oneAPI - Mixed precision (with timing enabled)
 # ------------------------------------------------------------------------- */
 
diff --git a/lib/gpu/Makefile.serial b/lib/gpu/Makefile.serial
index 6c94911f32..67d2ce927d 100644
--- a/lib/gpu/Makefile.serial
+++ b/lib/gpu/Makefile.serial
@@ -1,4 +1,4 @@
-# /* ----------------------------------------------------------------------   
+# /* ----------------------------------------------------------------------
 #  Generic Linux Makefile for CUDA without MPI libraries
 #     - Change CUDA_ARCH for your GPU
 # ------------------------------------------------------------------------- */
diff --git a/lib/gpu/Nvidia.makefile b/lib/gpu/Nvidia.makefile
index 298d404117..d351b87b37 100644
--- a/lib/gpu/Nvidia.makefile
+++ b/lib/gpu/Nvidia.makefile
@@ -11,7 +11,7 @@ HOST_H = lal_answer.h lal_atom.h lal_balance.h lal_base_atomic.h lal_base_amoeba
          lal_base_charge.h lal_base_dipole.h lal_base_dpd.h \
          lal_base_ellipsoid.h lal_base_three.h lal_device.h lal_neighbor.h \
          lal_neighbor_shared.h lal_pre_ocl_config.h $(NVD_H)
-         
+
 # Source files
 SRCS := $(wildcard ./lal_*.cpp)
 OBJS := $(subst ./,$(OBJ_DIR)/,$(SRCS:%.cpp=%.o))
@@ -127,7 +127,7 @@ $(GPU_LIB): $(OBJS) $(CUDPP)
 # test app for querying device info
 
 $(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H)
-	$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda 
+	$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda
 
 clean:
 	-rm -f $(EXECS) $(GPU_LIB) $(OBJS) $(CUDPP) $(CUHS) *.cubin *.linkinfo
diff --git a/lib/gpu/Nvidia.makefile_multi b/lib/gpu/Nvidia.makefile_multi
index ddbee4f2a1..c4b27ebbcb 100644
--- a/lib/gpu/Nvidia.makefile_multi
+++ b/lib/gpu/Nvidia.makefile_multi
@@ -89,7 +89,7 @@ $(GPU_LIB): $(OBJS) $(CUDPP)
 # test app for querying device info
 
 $(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H)
-	$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda 
+	$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda
 
 clean:
 	-rm -f $(EXECS) $(GPU_LIB) $(OBJS) $(CUDPP) $(CUHS) *.linkinfo
diff --git a/lib/gpu/geryon/nvd_device.h b/lib/gpu/geryon/nvd_device.h
index e63a1f56b2..e627a7ca60 100644
--- a/lib/gpu/geryon/nvd_device.h
+++ b/lib/gpu/geryon/nvd_device.h
@@ -138,7 +138,7 @@ class UCL_Device {
   /** \note You cannot delete the default stream **/
   inline void pop_command_queue() {
     if (_cq.size()<2) return;
-    CU_SAFE_CALL_NS(cuStreamDestroy(_cq.back()));
+    cuStreamDestroy(_cq.back());
     _cq.pop_back();
   }
 
@@ -426,8 +426,8 @@ void UCL_Device::clear() {
   if (_device>-1) {
     for (int i=1; i<num_queues(); i++) pop_command_queue();
 #if GERYON_NVD_PRIMARY_CONTEXT
-    CU_SAFE_CALL_NS(cuCtxSetCurrent(_old_context));
-    CU_SAFE_CALL_NS(cuDevicePrimaryCtxRelease(_cu_device));
+    cuCtxSetCurrent(_old_context);
+    cuDevicePrimaryCtxRelease(_cu_device);
 #else
     cuCtxDestroy(_context);
 #endif
diff --git a/lib/gpu/geryon/ocl_mat.h b/lib/gpu/geryon/ocl_mat.h
index 3135594dc3..66ca6ab527 100644
--- a/lib/gpu/geryon/ocl_mat.h
+++ b/lib/gpu/geryon/ocl_mat.h
@@ -54,6 +54,6 @@ namespace ucl_opencl {
 #include "ucl_print.h"
 #undef UCL_PRINT_ALLOW
 
-} // namespace ucl_cudart
+} // namespace ucl_opencl
 
 #endif
diff --git a/lib/gpu/geryon/ocl_memory.h b/lib/gpu/geryon/ocl_memory.h
index 5d8b9808bd..e665654071 100644
--- a/lib/gpu/geryon/ocl_memory.h
+++ b/lib/gpu/geryon/ocl_memory.h
@@ -108,7 +108,7 @@ inline int _host_alloc(mat_type &mat, copy_type &cm, const size_t n,
     return UCL_MEMORY_ERROR;
   *mat.host_ptr() = (typename mat_type::data_type*)
     clEnqueueMapBuffer(cm.cq(),mat.cbegin(),CL_TRUE,
-		       map_perm,0,n,0,NULL,NULL,NULL);
+                       map_perm,0,n,0,NULL,NULL,NULL);
 
   mat.cq()=cm.cq();
   CL_SAFE_CALL(clRetainCommandQueue(mat.cq()));
diff --git a/lib/gpu/lal_amoeba.cpp b/lib/gpu/lal_amoeba.cpp
index 5e19997913..805c4c4b26 100644
--- a/lib/gpu/lal_amoeba.cpp
+++ b/lib/gpu/lal_amoeba.cpp
@@ -281,13 +281,7 @@ int AmoebaT::polar_real(const int eflag, const int vflag) {
 
   const int BX=this->block_size();
   const int GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
-  /*
-  const int cus = this->device->gpu->cus();
-  while (GX < cus && GX > 1) {
-    BX /= 2;
-    GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
-  }
-  */
+
   this->time_pair.start();
 
   // Build the short neighbor list if not done yet
diff --git a/lib/gpu/lal_amoeba.cu b/lib/gpu/lal_amoeba.cu
index e7c313301e..a92509f06d 100644
--- a/lib/gpu/lal_amoeba.cu
+++ b/lib/gpu/lal_amoeba.cu
@@ -2033,13 +2033,13 @@ __kernel void k_amoeba_special15(__global int * dev_nbor,
                           const __global tagint *restrict special15,
                           const int inum, const int nall, const int nbor_pitch,
                           const int t_per_atom) {
-  int tid, ii, offset, n_stride, i;
+  int tid, ii, offset, n_stride, j;
   atom_info(t_per_atom,ii,tid,offset);
 
   if (ii<inum) {
 
     int numj, nbor, nbor_end;
-    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
+    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,j,numj,
               n_stride,nbor_end,nbor);
 
     int n15 = nspecial15[ii];
@@ -2048,7 +2048,7 @@ __kernel void k_amoeba_special15(__global int * dev_nbor,
 
       int sj=dev_packed[nbor];
       int which = sj >> SBBITS & 3;
-      int j = sj & NEIGHMASK;
+      j = sj & NEIGHMASK;
       tagint jtag = tag[j];
 
       if (!which) {
diff --git a/lib/gpu/lal_base_dpd.cpp b/lib/gpu/lal_base_dpd.cpp
index e103699d40..0ddd24d21e 100644
--- a/lib/gpu/lal_base_dpd.cpp
+++ b/lib/gpu/lal_base_dpd.cpp
@@ -56,7 +56,8 @@ int BaseDPDT::init_atomic(const int nlocal, const int nall,
                           const int max_nbors, const int maxspecial,
                           const double cell_size, const double gpu_split,
                           FILE *_screen, const void *pair_program,
-                          const char *k_name, const int onetype) {
+                          const char *k_name, const int onetype,
+                          const int extra_fields) {
   screen=_screen;
 
   int gpu_nbor=0;
@@ -75,7 +76,8 @@ int BaseDPDT::init_atomic(const int nlocal, const int nall,
   bool charge = false;
   bool rot = false;
   bool vel = true;
-  int success=device->init(*ans,charge,rot,nlocal,nall,maxspecial,vel);
+  _extra_fields = extra_fields;
+  int success=device->init(*ans,charge,rot,nlocal,nall,maxspecial,vel,_extra_fields/4);
   if (success!=0)
     return success;
 
diff --git a/lib/gpu/lal_base_dpd.h b/lib/gpu/lal_base_dpd.h
index 9eb56993af..64ec725d95 100644
--- a/lib/gpu/lal_base_dpd.h
+++ b/lib/gpu/lal_base_dpd.h
@@ -53,7 +53,7 @@ class BaseDPD {
                   const int maxspecial, const double cell_size,
                   const double gpu_split, FILE *screen,
                   const void *pair_program, const char *k_name,
-                  const int onetype=0);
+                  const int onetype=0, const int extra_fields=0);
 
   /// Estimate the overhead for GPU context changes and CPU driver
   void estimate_gpu_overhead();
@@ -167,7 +167,6 @@ class BaseDPD {
   /// Atom Data
   Atom<numtyp,acctyp> *atom;
 
-
   // ------------------------ FORCE/ENERGY DATA -----------------------
 
   Answer<numtyp,acctyp> *ans;
@@ -199,7 +198,7 @@ class BaseDPD {
 
  protected:
   bool _compiled;
-  int _block_size, _threads_per_atom, _onetype;
+  int _block_size, _threads_per_atom, _onetype, _extra_fields;
   double  _max_bytes, _max_an_bytes;
   double _gpu_overhead, _driver_overhead;
   UCL_D_Vec<int> *_nbor_data;
diff --git a/lib/gpu/lal_base_sph.cpp b/lib/gpu/lal_base_sph.cpp
new file mode 100644
index 0000000000..22ef5964ea
--- /dev/null
+++ b/lib/gpu/lal_base_sph.cpp
@@ -0,0 +1,362 @@
+/***************************************************************************
+                               base_sph.cpp
+                             -------------------
+                            Trung Nguyen (U Chicago)
+
+  Base class for SPH pair styles needing per-particle data for position,
+  velocity, and type.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : December 2023
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#include "lal_base_sph.h"
+namespace LAMMPS_AL {
+#define BaseSPHT BaseSPH<numtyp, acctyp>
+
+extern Device<PRECISION,ACC_PRECISION> global_device;
+
+template <class numtyp, class acctyp>
+BaseSPHT::BaseSPH() : _compiled(false), _max_bytes(0) {
+  device=&global_device;
+  ans=new Answer<numtyp,acctyp>();
+  nbor=new Neighbor();
+  pair_program=nullptr;
+  ucl_device=nullptr;
+  #if defined(LAL_OCL_EV_JIT)
+  pair_program_noev=nullptr;
+  #endif
+}
+
+template <class numtyp, class acctyp>
+BaseSPHT::~BaseSPH() {
+  delete ans;
+  delete nbor;
+  k_pair_fast.clear();
+  k_pair.clear();
+  if (pair_program) delete pair_program;
+  #if defined(LAL_OCL_EV_JIT)
+  k_pair_noev.clear();
+  if (pair_program_noev) delete pair_program_noev;
+  #endif
+}
+
+template <class numtyp, class acctyp>
+int BaseSPHT::bytes_per_atom_atomic(const int max_nbors) const {
+  return device->atom.bytes_per_atom()+ans->bytes_per_atom()+
+         nbor->bytes_per_atom(max_nbors);
+}
+
+template <class numtyp, class acctyp>
+int BaseSPHT::init_atomic(const int nlocal, const int nall,
+                          const int max_nbors, const int maxspecial,
+                          const double cell_size, const double gpu_split,
+                          FILE *_screen, const void *pair_program,
+                          const char *k_name, const int onetype,
+                          const int extra_fields) {
+  screen=_screen;
+
+  int gpu_nbor=0;
+  if (device->gpu_mode()==Device<numtyp,acctyp>::GPU_NEIGH)
+    gpu_nbor=1;
+  else if (device->gpu_mode()==Device<numtyp,acctyp>::GPU_HYB_NEIGH)
+    gpu_nbor=2;
+
+  int _gpu_host=0;
+  int host_nlocal=hd_balancer.first_host_count(nlocal,gpu_split,gpu_nbor);
+  if (host_nlocal>0)
+    _gpu_host=1;
+
+  _threads_per_atom=device->threads_per_atom();
+
+  bool charge = false;
+  bool rot = false;
+  bool vel = true;
+  _extra_fields = extra_fields;
+  int success=device->init(*ans,charge,rot,nlocal,nall,maxspecial,vel,_extra_fields/4);
+  if (success!=0)
+    return success;
+
+  if (ucl_device!=device->gpu) _compiled=false;
+
+  ucl_device=device->gpu;
+  atom=&device->atom;
+
+  _block_size=device->pair_block_size();
+  compile_kernels(*ucl_device,pair_program,k_name,onetype);
+
+  if (_threads_per_atom>1 && gpu_nbor==0) {
+    nbor->packing(true);
+    _nbor_data=&(nbor->dev_packed);
+  } else
+    _nbor_data=&(nbor->dev_nbor);
+
+  success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
+                  max_nbors,cell_size,false,_threads_per_atom);
+  if (success!=0)
+    return success;
+
+  // Initialize host-device load balancer
+  hd_balancer.init(device,gpu_nbor,gpu_split);
+
+  // Initialize timers for the selected GPU
+  time_pair.init(*ucl_device);
+  time_pair.zero();
+
+  pos_tex.bind_float(atom->x,4);
+  vel_tex.bind_float(atom->v,4);
+
+  _max_an_bytes=ans->gpu_bytes()+nbor->gpu_bytes();
+
+  return success;
+}
+
+template <class numtyp, class acctyp>
+void BaseSPHT::estimate_gpu_overhead() {
+  device->estimate_gpu_overhead(1,_gpu_overhead,_driver_overhead);
+}
+
+template <class numtyp, class acctyp>
+void BaseSPHT::clear_atomic() {
+  // Output any timing information
+  acc_timers();
+  double avg_split=hd_balancer.all_avg_split();
+  _gpu_overhead*=hd_balancer.timestep();
+  _driver_overhead*=hd_balancer.timestep();
+  device->output_times(time_pair,*ans,*nbor,avg_split,_max_bytes+_max_an_bytes,
+                       _gpu_overhead,_driver_overhead,_threads_per_atom,screen);
+
+  time_pair.clear();
+  hd_balancer.clear();
+
+  nbor->clear();
+  ans->clear();
+}
+
+// ---------------------------------------------------------------------------
+// Copy neighbor list from host
+// ---------------------------------------------------------------------------
+template <class numtyp, class acctyp>
+int * BaseSPHT::reset_nbors(const int nall, const int inum, int *ilist,
+                            int *numj, int **firstneigh, bool &success) {
+  success=true;
+
+  int mn=nbor->max_nbor_loop(inum,numj,ilist);
+  resize_atom(inum,nall,success);
+  resize_local(inum,mn,success);
+  if (!success)
+    return nullptr;
+
+  nbor->get_host(inum,ilist,numj,firstneigh,block_size());
+
+  double bytes=ans->gpu_bytes()+nbor->gpu_bytes();
+  if (bytes>_max_an_bytes)
+    _max_an_bytes=bytes;
+
+  return ilist;
+}
+
+// ---------------------------------------------------------------------------
+// Build neighbor list on device
+// ---------------------------------------------------------------------------
+template <class numtyp, class acctyp>
+inline void BaseSPHT::build_nbor_list(const int inum, const int host_inum,
+                                      const int nall, double **host_x,
+                                      int *host_type, double *sublo,
+                                      double *subhi, tagint *tag,
+                                      int **nspecial, tagint **special,
+                                      bool &success) {
+  success=true;
+  resize_atom(inum,nall,success);
+  resize_local(inum,host_inum,nbor->max_nbors(),success);
+  if (!success)
+    return;
+  atom->cast_copy_x(host_x,host_type);
+
+  int mn;
+  nbor->build_nbor_list(host_x, inum, host_inum, nall, *atom, sublo, subhi,
+                        tag, nspecial, special, success, mn, ans->error_flag);
+
+  double bytes=ans->gpu_bytes()+nbor->gpu_bytes();
+  if (bytes>_max_an_bytes)
+    _max_an_bytes=bytes;
+}
+
+// ---------------------------------------------------------------------------
+// Copy nbor list from host if necessary and then calculate forces, virials,..
+// ---------------------------------------------------------------------------
+template <class numtyp, class acctyp>
+void BaseSPHT::compute(const int f_ago, const int inum_full, const int nall,
+                       double **host_x, int *host_type, int *ilist, int *numj,
+                       int **firstneigh, const bool eflag_in, const bool vflag_in,
+                       const bool eatom, const bool vatom, int &host_start,
+                       const double cpu_time, bool &success, tagint *tag,
+                       double **host_v) {
+  acc_timers();
+  int eflag, vflag;
+  if (eatom) eflag=2;
+  else if (eflag_in) eflag=1;
+  else eflag=0;
+  if (vatom) vflag=2;
+  else if (vflag_in) vflag=1;
+  else vflag=0;
+
+  #ifdef LAL_NO_BLOCK_REDUCE
+  if (eflag) eflag=2;
+  if (vflag) vflag=2;
+  #endif
+
+  set_kernel(eflag,vflag);
+  if (inum_full==0) {
+    host_start=0;
+    // Make sure textures are correct if realloc by a different hybrid style
+    resize_atom(0,nall,success);
+    zero_timers();
+    return;
+  }
+
+  int ago=hd_balancer.ago_first(f_ago);
+  int inum=hd_balancer.balance(ago,inum_full,cpu_time);
+  ans->inum(inum);
+  host_start=inum;
+
+  if (ago==0) {
+    reset_nbors(nall, inum, ilist, numj, firstneigh, success);
+    if (!success)
+      return;
+  }
+
+  atom->cast_x_data(host_x,host_type);
+  atom->cast_v_data(host_v,tag);
+  hd_balancer.start_timer();
+  atom->add_x_data(host_x,host_type);
+  atom->add_v_data(host_v,tag);
+
+  const int red_blocks=loop(eflag,vflag);
+  ans->copy_answers(eflag_in,vflag_in,eatom,vatom,ilist,red_blocks);
+  device->add_ans_object(ans);
+  hd_balancer.stop_timer();
+}
+
+// ---------------------------------------------------------------------------
+// Reneighbor on GPU if necessary and then compute forces, virials, energies
+// ---------------------------------------------------------------------------
+template <class numtyp, class acctyp>
+int** BaseSPHT::compute(const int ago, const int inum_full, const int nall,
+                        double **host_x, int *host_type, double *sublo,
+                        double *subhi, tagint *tag, int **nspecial,
+                        tagint **special, const bool eflag_in, const bool vflag_in,
+                        const bool eatom, const bool vatom, int &host_start,
+                        int **ilist, int **jnum, const double cpu_time, bool &success,
+                        double **host_v) {
+  acc_timers();
+  int eflag, vflag;
+  if (eatom) eflag=2;
+  else if (eflag_in) eflag=1;
+  else eflag=0;
+  if (vatom) vflag=2;
+  else if (vflag_in) vflag=1;
+  else vflag=0;
+
+  #ifdef LAL_NO_BLOCK_REDUCE
+  if (eflag) eflag=2;
+  if (vflag) vflag=2;
+  #endif
+
+  set_kernel(eflag,vflag);
+  if (inum_full==0) {
+    host_start=0;
+    // Make sure textures are correct if realloc by a different hybrid style
+    resize_atom(0,nall,success);
+    zero_timers();
+    return nullptr;
+  }
+
+  hd_balancer.balance(cpu_time);
+  int inum=hd_balancer.get_gpu_count(ago,inum_full);
+  ans->inum(inum);
+  host_start=inum;
+
+  // Build neighbor list on GPU if necessary
+  if (ago==0) {
+    build_nbor_list(inum, inum_full-inum, nall, host_x, host_type,
+                    sublo, subhi, tag, nspecial, special, success);
+    if (!success)
+      return nullptr;
+    atom->cast_v_data(host_v,tag);
+    hd_balancer.start_timer();
+  } else {
+    atom->cast_x_data(host_x,host_type);
+    atom->cast_v_data(host_v,tag);
+    hd_balancer.start_timer();
+    atom->add_x_data(host_x,host_type);
+  }
+  atom->add_v_data(host_v,tag);
+  *ilist=nbor->host_ilist.begin();
+  *jnum=nbor->host_acc.begin();
+
+  const int red_blocks=loop(eflag,vflag);
+  ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
+  device->add_ans_object(ans);
+  hd_balancer.stop_timer();
+
+  return nbor->host_jlist.begin()-host_start;
+}
+
+template <class numtyp, class acctyp>
+double BaseSPHT::host_memory_usage_atomic() const {
+  return device->atom.host_memory_usage()+nbor->host_memory_usage()+
+         4*sizeof(numtyp)+sizeof(BaseSPH<numtyp,acctyp>);
+}
+
+template <class numtyp, class acctyp>
+void BaseSPHT::compile_kernels(UCL_Device &dev, const void *pair_str,
+                               const char *kname, const int onetype) {
+  if (_compiled && _onetype==onetype)
+    return;
+
+  _onetype=onetype;
+
+  std::string s_fast=std::string(kname)+"_fast";
+  if (pair_program) delete pair_program;
+  pair_program=new UCL_Program(dev);
+  std::string oclstring = device->compile_string()+" -DEVFLAG=1";
+  if (_onetype) oclstring+=" -DONETYPE="+device->toa(_onetype);
+  pair_program->load_string(pair_str,oclstring.c_str(),nullptr,screen);
+  k_pair_fast.set_function(*pair_program,s_fast.c_str());
+  k_pair.set_function(*pair_program,kname);
+  pos_tex.get_texture(*pair_program,"pos_tex");
+  vel_tex.get_texture(*pair_program,"vel_tex");
+
+  #if defined(LAL_OCL_EV_JIT)
+  oclstring = device->compile_string()+" -DEVFLAG=0";
+  if (_onetype) oclstring+=" -DONETYPE="+device->toa(_onetype);
+  if (pair_program_noev) delete pair_program_noev;
+  pair_program_noev=new UCL_Program(dev);
+  pair_program_noev->load_string(pair_str,oclstring.c_str(),nullptr,screen);
+  k_pair_noev.set_function(*pair_program_noev,s_fast.c_str());
+  #else
+  k_pair_sel = &k_pair_fast;
+  #endif
+
+  _compiled=true;
+
+  #if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0))
+  if (dev.has_subgroup_support()) {
+    size_t mx_subgroup_sz = k_pair_fast.max_subgroup_size(_block_size);
+    #if defined(LAL_OCL_EV_JIT)
+    mx_subgroup_sz = std::min(mx_subgroup_sz, k_pair_noev.max_subgroup_size(_block_size));
+    #endif
+    if (_threads_per_atom > (int)mx_subgroup_sz) _threads_per_atom = mx_subgroup_sz;
+    device->set_simd_size(mx_subgroup_sz);
+  }
+  #endif
+
+}
+
+template class BaseSPH<PRECISION,ACC_PRECISION>;
+}
diff --git a/lib/gpu/lal_base_sph.h b/lib/gpu/lal_base_sph.h
new file mode 100644
index 0000000000..46d2879093
--- /dev/null
+++ b/lib/gpu/lal_base_sph.h
@@ -0,0 +1,209 @@
+/***************************************************************************
+                                base_sph.h
+                             -------------------
+                            Trung Nguyen (U Chicago)
+
+  Base class for SPH pair styles needing per-particle data for position,
+  velocity, and type.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : December 2023
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#ifndef LAL_BASE_SPH_H
+#define LAL_BASE_SPH_H
+
+#include "lal_device.h"
+#include "lal_balance.h"
+#include "mpi.h"
+
+#ifdef USE_OPENCL
+#include "geryon/ocl_texture.h"
+#elif defined(USE_HIP)
+#include "geryon/hip_texture.h"
+#else
+#include "geryon/nvd_texture.h"
+#endif
+
+namespace LAMMPS_AL {
+
+template <class numtyp, class acctyp>
+class BaseSPH {
+ public:
+  BaseSPH();
+  virtual ~BaseSPH();
+
+  /// Clear any previous data and set up for a new LAMMPS run
+  /** \param max_nbors initial number of rows in the neighbor matrix
+    * \param cell_size cutoff + skin
+    * \param gpu_split fraction of particles handled by device
+    * \param k_name name for the kernel for force calculation
+    *
+    * Returns:
+    * -  0 if successful
+    * - -1 if fix gpu not found
+    * - -3 if there is an out of memory error
+    * - -4 if the GPU library was not compiled for GPU
+    * - -5 Double precision is not supported on card **/
+  int init_atomic(const int nlocal, const int nall, const int max_nbors,
+                  const int maxspecial, const double cell_size,
+                  const double gpu_split, FILE *screen,
+                  const void *pair_program, const char *k_name,
+                  const int onetype=0, const int extra_fields=0);
+
+  /// Estimate the overhead for GPU context changes and CPU driver
+  void estimate_gpu_overhead();
+
+  /// Check if there is enough storage for atom arrays and realloc if not
+  /** \param success set to false if insufficient memory **/
+  inline void resize_atom(const int inum, const int nall, bool &success) {
+    if (atom->resize(nall, success)) {
+      pos_tex.bind_float(atom->x,4);
+      vel_tex.bind_float(atom->v,4);
+    }
+    ans->resize(inum,success);
+  }
+
+  /// Check if there is enough storage for neighbors and realloc if not
+  /** \param nlocal number of particles whose nbors must be stored on device
+    * \param host_inum number of particles whose nbors need to copied to host
+    * \param current maximum number of neighbors
+    * \note olist_size=total number of local particles **/
+  inline void resize_local(const int inum, const int max_nbors, bool &success) {
+    nbor->resize(inum,max_nbors,success);
+  }
+
+  /// Check if there is enough storage for neighbors and realloc if not
+  /** \param nlocal number of particles whose nbors must be stored on device
+    * \param host_inum number of particles whose nbors need to copied to host
+    * \param current maximum number of neighbors
+    * \note host_inum is 0 if the host is performing neighboring
+    * \note nlocal+host_inum=total number local particles
+    * \note olist_size=0 **/
+  inline void resize_local(const int inum, const int host_inum,
+                           const int max_nbors, bool &success) {
+    nbor->resize(inum,host_inum,max_nbors,success);
+  }
+
+  /// Clear all host and device data
+  /** \note This is called at the beginning of the init() routine **/
+  void clear_atomic();
+
+  /// Returns memory usage on device per atom
+  int bytes_per_atom_atomic(const int max_nbors) const;
+
+  /// Total host memory used by library for pair style
+  double host_memory_usage_atomic() const;
+
+  /// Accumulate timers
+  inline void acc_timers() {
+    if (device->time_device()) {
+      nbor->acc_timers(screen);
+      time_pair.add_to_total();
+      atom->acc_timers();
+      ans->acc_timers();
+    }
+  }
+
+  /// Zero timers
+  inline void zero_timers() {
+    time_pair.zero();
+    atom->zero_timers();
+    ans->zero_timers();
+  }
+
+  /// Copy neighbor list from host
+  int * reset_nbors(const int nall, const int inum, int *ilist, int *numj,
+                    int **firstneigh, bool &success);
+
+  /// Build neighbor list on device
+  void build_nbor_list(const int inum, const int host_inum,
+                       const int nall, double **host_x, int *host_type,
+                       double *sublo, double *subhi, tagint *tag, int **nspecial,
+                       tagint **special, bool &success);
+
+  /// Pair loop with host neighboring
+  void compute(const int f_ago, const int inum_full, const int nall,
+               double **host_x, int *host_type, int *ilist, int *numj,
+               int **firstneigh, const bool eflag, const bool vflag,
+               const bool eatom, const bool vatom, int &host_start,
+               const double cpu_time, bool &success, tagint *tag,
+               double **v);
+
+  /// Pair loop with device neighboring
+  int** compute(const int ago, const int inum_full, const int nall,
+                double **host_x, int *host_type, double *sublo,
+                double *subhi, tagint *tag, int **nspecial,
+                tagint **special, const bool eflag, const bool vflag,
+                const bool eatom, const bool vatom, int &host_start,
+                int **ilist, int **numj, const double cpu_time, bool &success,
+                double **v);
+
+  // -------------------------- DEVICE DATA -------------------------
+
+  /// Device Properties and Atom and Neighbor storage
+  Device<numtyp,acctyp> *device;
+
+  /// Geryon device
+  UCL_Device *ucl_device;
+
+  /// Device Timers
+  UCL_Timer time_pair;
+
+  /// Host device load balancer
+  Balance<numtyp,acctyp> hd_balancer;
+
+  /// LAMMPS pointer for screen output
+  FILE *screen;
+
+  // --------------------------- ATOM DATA --------------------------
+
+  /// Atom Data
+  Atom<numtyp,acctyp> *atom;
+
+  // ------------------------ FORCE/ENERGY DATA -----------------------
+
+  Answer<numtyp,acctyp> *ans;
+
+  // --------------------------- NBOR DATA ----------------------------
+
+  /// Neighbor data
+  Neighbor *nbor;
+
+  // ------------------------- DEVICE KERNELS -------------------------
+  UCL_Program *pair_program, *pair_program_noev;
+  UCL_Kernel k_pair_fast, k_pair, k_pair_noev, *k_pair_sel;
+  inline int block_size() { return _block_size; }
+  inline void set_kernel(const int eflag, const int vflag) {
+    #if defined(LAL_OCL_EV_JIT)
+    if (eflag || vflag) k_pair_sel = &k_pair_fast;
+    else k_pair_sel = &k_pair_noev;
+    #endif
+  }
+
+
+  // --------------------------- TEXTURES -----------------------------
+  UCL_Texture pos_tex;
+  UCL_Texture vel_tex;
+
+  // ------------------------- COMMON VARS ----------------------------
+
+ protected:
+  bool _compiled;
+  int _block_size, _threads_per_atom, _onetype, _extra_fields;
+  double  _max_bytes, _max_an_bytes;
+  double _gpu_overhead, _driver_overhead;
+  UCL_D_Vec<int> *_nbor_data;
+
+  void compile_kernels(UCL_Device &dev, const void *pair_string,
+                       const char *k, const int onetype);
+  virtual int loop(const int eflag, const int vflag) = 0;
+};
+
+}
+
+#endif
diff --git a/lib/gpu/lal_coul_slater_long.cpp b/lib/gpu/lal_coul_slater_long.cpp
new file mode 100644
index 0000000000..42eb86e8ff
--- /dev/null
+++ b/lib/gpu/lal_coul_slater_long.cpp
@@ -0,0 +1,150 @@
+/***************************************************************************
+                           coul_slater_long_ext.cpp
+                           ------------------------
+                           Trung Nguyen (U Chicago)
+
+  Class for acceleration of the coul/slater/long pair style.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : September 2023
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#if defined(USE_OPENCL)
+#include "coul_slater_long_cl.h"
+#elif defined(USE_CUDART)
+const char *coul_slater_long=0;
+#else
+#include "coul_slater_long_cubin.h"
+#endif
+
+#include "lal_coul_slater_long.h"
+#include <cassert>
+namespace LAMMPS_AL {
+#define CoulSlaterLongT CoulSlaterLong<numtyp, acctyp>
+
+extern Device<PRECISION,ACC_PRECISION> pair_gpu_device;
+
+template <class numtyp, class acctyp>
+CoulSlaterLongT::CoulSlaterLong() : BaseCharge<numtyp,acctyp>(), _allocated(false) {
+}
+
+template <class numtyp, class acctyp>
+CoulSlaterLongT::~CoulSlaterLong() {
+  clear();
+}
+
+template <class numtyp, class acctyp>
+int CoulSlaterLongT::bytes_per_atom(const int max_nbors) const {
+  return this->bytes_per_atom_atomic(max_nbors);
+}
+
+template <class numtyp, class acctyp>
+int CoulSlaterLongT::init(const int ntypes, double **host_scale,
+                    const int nlocal, const int nall, const int max_nbors,
+                    const int maxspecial, const double cell_size,
+                    const double gpu_split, FILE *_screen,
+                    const double host_cut_coulsq, double *host_special_coul,
+                    const double qqrd2e, const double g_ewald, double lamda) {
+  int success;
+  success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,
+                            gpu_split,_screen,coul_slater_long,"k_coul_slater_long");
+  if (success!=0)
+    return success;
+
+  int lj_types=ntypes;
+  shared_types=false;
+  int max_shared_types=this->device->max_shared_types();
+  if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) {
+    lj_types=max_shared_types;
+    shared_types=true;
+  }
+  _lj_types=lj_types;
+
+  // Allocate a host write buffer for data initialization
+  UCL_H_Vec<numtyp> host_write(lj_types*lj_types*32,*(this->ucl_device),
+                               UCL_WRITE_ONLY);
+
+  for (int i=0; i<lj_types*lj_types; i++)
+    host_write[i]=0.0;
+
+  scale.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
+  this->atom->type_pack1(ntypes,lj_types,scale,host_write,host_scale);
+
+  sp_cl.alloc(4,*(this->ucl_device),UCL_READ_ONLY);
+  for (int i=0; i<4; i++) {
+    host_write[i]=host_special_coul[i];
+  }
+  ucl_copy(sp_cl,host_write,4,false);
+
+  _cut_coulsq=host_cut_coulsq;
+  _qqrd2e=qqrd2e;
+  _g_ewald=g_ewald;
+  _lamda=lamda;
+
+  _allocated=true;
+  this->_max_bytes=scale.row_bytes()+sp_cl.row_bytes();
+  return 0;
+}
+
+template <class numtyp, class acctyp>
+void CoulSlaterLongT::reinit(const int ntypes, double **host_scale) {
+  UCL_H_Vec<numtyp> hscale(_lj_types*_lj_types,*(this->ucl_device),
+                           UCL_WRITE_ONLY);
+  this->atom->type_pack1(ntypes,_lj_types,scale,hscale,host_scale);
+}
+
+template <class numtyp, class acctyp>
+void CoulSlaterLongT::clear() {
+  if (!_allocated)
+    return;
+  _allocated=false;
+
+  scale.clear();
+  sp_cl.clear();
+  this->clear_atomic();
+}
+
+template <class numtyp, class acctyp>
+double CoulSlaterLongT::host_memory_usage() const {
+  return this->host_memory_usage_atomic()+sizeof(CoulSlaterLong<numtyp,acctyp>);
+}
+
+// ---------------------------------------------------------------------------
+// Calculate energies, forces, and torques
+// ---------------------------------------------------------------------------
+template <class numtyp, class acctyp>
+int CoulSlaterLongT::loop(const int eflag, const int vflag) {
+  // Compute the block size and grid size to keep all cores busy
+  const int BX=this->block_size();
+  int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
+                               (BX/this->_threads_per_atom)));
+
+  int ainum=this->ans->inum();
+  int nbor_pitch=this->nbor->nbor_pitch();
+  this->time_pair.start();
+  if (shared_types) {
+    this->k_pair_sel->set_size(GX,BX);
+    this->k_pair_sel->run(&this->atom->x, &scale, &sp_cl,
+                          &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+                          &this->ans->force, &this->ans->engv,
+                          &eflag, &vflag, &ainum, &nbor_pitch,
+                          &this->atom->q, &_cut_coulsq, &_qqrd2e, &_g_ewald,
+                          &_lamda, &this->_threads_per_atom);
+  } else {
+    this->k_pair.set_size(GX,BX);
+    this->k_pair.run(&this->atom->x, &scale, &_lj_types, &sp_cl,
+                     &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+                     &this->ans->force, &this->ans->engv, &eflag, &vflag,
+                     &ainum, &nbor_pitch, &this->atom->q, &_cut_coulsq,
+                     &_qqrd2e, &_g_ewald, &_lamda, &this->_threads_per_atom);
+  }
+  this->time_pair.stop();
+  return GX;
+}
+
+template class CoulSlaterLong<PRECISION,ACC_PRECISION>;
+}
diff --git a/lib/gpu/lal_coul_slater_long.cu b/lib/gpu/lal_coul_slater_long.cu
new file mode 100644
index 0000000000..49cf47b8b3
--- /dev/null
+++ b/lib/gpu/lal_coul_slater_long.cu
@@ -0,0 +1,251 @@
+// **************************************************************************
+//                           coul_slater_long.cu
+//                           -------------------
+//                         Trung Nguyen (U Chicago)
+//
+//  Device code for acceleration of the coul/slater/long pair style
+//
+// __________________________________________________________________________
+//    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+// __________________________________________________________________________
+//
+//    begin                : September 2023
+//    email                : ndactrung@gmail.com
+// ***************************************************************************
+
+#if defined(NV_KERNEL) || defined(USE_HIP)
+
+#include "lal_aux_fun1.h"
+#ifndef _DOUBLE_DOUBLE
+_texture( pos_tex,float4);
+_texture( q_tex,float);
+#else
+_texture_2d( pos_tex,int4);
+_texture( q_tex,int2);
+#endif
+
+#else
+#define pos_tex x_
+#define q_tex q_
+#endif
+
+__kernel void k_coul_slater_long(const __global numtyp4 *restrict x_,
+                          const __global numtyp *restrict scale,
+                          const int lj_types,
+                          const __global numtyp *restrict sp_cl_in,
+                          const __global int *dev_nbor,
+                          const __global int *dev_packed,
+                          __global acctyp3 *restrict ans,
+                          __global acctyp *restrict engv,
+                          const int eflag, const int vflag, const int inum,
+                          const int nbor_pitch,
+                          const __global numtyp *restrict q_,
+                          const numtyp cut_coulsq, const numtyp qqrd2e,
+                          const numtyp g_ewald, const numtyp lamda,
+                          const int t_per_atom) {
+  int tid, ii, offset;
+  atom_info(t_per_atom,ii,tid,offset);
+
+  __local numtyp sp_cl[4];
+  int n_stride;
+  local_allocate_store_charge();
+
+  sp_cl[0]=sp_cl_in[0];
+  sp_cl[1]=sp_cl_in[1];
+  sp_cl[2]=sp_cl_in[2];
+  sp_cl[3]=sp_cl_in[3];
+
+  acctyp3 f;
+  f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
+  acctyp e_coul, virial[6];
+  if (EVFLAG) {
+    e_coul=(acctyp)0;
+    for (int i=0; i<6; i++) virial[i]=(acctyp)0;
+  }
+
+  if (ii<inum) {
+    int nbor, nbor_end;
+    int i, numj;
+    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
+              n_stride,nbor_end,nbor);
+
+    numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
+    int itype=ix.w;
+    numtyp qtmp; fetch(qtmp,i,q_tex);
+    numtyp lamdainv = ucl_recip(lamda);
+
+    for ( ; nbor<nbor_end; nbor+=n_stride) {
+      ucl_prefetch(dev_packed+nbor+n_stride);
+      int j=dev_packed[nbor];
+
+      numtyp factor_coul;
+      factor_coul = (numtyp)1.0-sp_cl[sbmask(j)];
+      j &= NEIGHMASK;
+
+      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
+      int jtype=jx.w;
+
+      // Compute r12
+      numtyp delx = ix.x-jx.x;
+      numtyp dely = ix.y-jx.y;
+      numtyp delz = ix.z-jx.z;
+      numtyp rsq = delx*delx+dely*dely+delz*delz;
+
+      int mtype=itype*lj_types+jtype;
+      if (rsq < cut_coulsq) {
+        numtyp r2inv=ucl_recip(rsq);
+        numtyp force, prefactor, _erfc;
+
+        numtyp r = ucl_rsqrt(r2inv);
+        numtyp grij = g_ewald * r;
+        numtyp expm2 = ucl_exp(-grij*grij);
+        numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
+        _erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+        fetch(prefactor,j,q_tex);
+        prefactor *= qqrd2e * scale[mtype] * qtmp/r;
+        numtyp rlamdainv = r * lamdainv;
+        numtyp exprlmdainv = ucl_exp((numtyp)-2.0*rlamdainv);
+        numtyp slater_term = exprlmdainv*((numtyp)1.0 + ((numtyp)2.0*rlamdainv*((numtyp)1.0+rlamdainv)));
+        force = prefactor*(_erfc + EWALD_F*grij*expm2-slater_term);
+        if (factor_coul > (numtyp)0) force -= factor_coul*prefactor*((numtyp)1.0-slater_term);
+        force *= r2inv;
+
+        f.x+=delx*force;
+        f.y+=dely*force;
+        f.z+=delz*force;
+
+        if (EVFLAG && eflag) {
+          numtyp e_slater = ((numtyp)1.0 + rlamdainv)*exprlmdainv;
+          numtyp e = prefactor*(_erfc-e_slater);
+          if (factor_coul > (numtyp)0) e -= factor_coul*prefactor*((numtyp)1.0 - e_slater);
+          e_coul += e;
+        }
+        if (EVFLAG && vflag) {
+          virial[0] += delx*delx*force;
+          virial[1] += dely*dely*force;
+          virial[2] += delz*delz*force;
+          virial[3] += delx*dely*force;
+          virial[4] += delx*delz*force;
+          virial[5] += dely*delz*force;
+        }
+      }
+
+    } // for nbor
+  } // if ii
+  acctyp energy;
+  if (EVFLAG) energy=(acctyp)0.0;
+  store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,
+                  vflag,ans,engv);
+}
+
+__kernel void k_coul_slater_long_fast(const __global numtyp4 *restrict x_,
+                               const __global numtyp *restrict scale_in,
+                               const __global numtyp *restrict sp_cl_in,
+                               const __global int *dev_nbor,
+                               const __global int *dev_packed,
+                               __global acctyp3 *restrict ans,
+                               __global acctyp *restrict engv,
+                               const int eflag, const int vflag, const int inum,
+                               const int nbor_pitch,
+                               const __global numtyp *restrict q_,
+                               const numtyp cut_coulsq, const numtyp qqrd2e,
+                               const numtyp g_ewald, const numtyp lamda,
+                               const int t_per_atom) {
+  int tid, ii, offset;
+  atom_info(t_per_atom,ii,tid,offset);
+
+  __local numtyp scale[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
+  __local numtyp sp_cl[4];
+  int n_stride;
+  local_allocate_store_charge();
+
+  if (tid<4)
+    sp_cl[tid]=sp_cl_in[tid];
+  if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES)
+    scale[tid]=scale_in[tid];
+
+  acctyp3 f;
+  f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
+  acctyp e_coul, virial[6];
+  if (EVFLAG) {
+    e_coul=(acctyp)0;
+    for (int i=0; i<6; i++) virial[i]=(acctyp)0;
+  }
+
+  __syncthreads();
+
+  if (ii<inum) {
+    int nbor, nbor_end;
+    int i, numj;
+    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
+              n_stride,nbor_end,nbor);
+
+    numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
+    numtyp qtmp; fetch(qtmp,i,q_tex);
+    int iw=ix.w;
+    int itype=fast_mul((int)MAX_SHARED_TYPES,iw);
+    numtyp lamdainv = ucl_recip(lamda);
+
+    for ( ; nbor<nbor_end; nbor+=n_stride) {
+      ucl_prefetch(dev_packed+nbor+n_stride);
+      int j=dev_packed[nbor];
+
+      numtyp factor_coul;
+      factor_coul = (numtyp)1.0-sp_cl[sbmask(j)];
+      j &= NEIGHMASK;
+
+      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
+      int mtype=itype+jx.w;
+
+      // Compute r12
+      numtyp delx = ix.x-jx.x;
+      numtyp dely = ix.y-jx.y;
+      numtyp delz = ix.z-jx.z;
+      numtyp rsq = delx*delx+dely*dely+delz*delz;
+
+      if (rsq < cut_coulsq) {
+        numtyp r2inv=ucl_recip(rsq);
+        numtyp force, prefactor, _erfc;
+
+        numtyp r = ucl_rsqrt(r2inv);
+        numtyp grij = g_ewald * r;
+        numtyp expm2 = ucl_exp(-grij*grij);
+        numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
+        _erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+        fetch(prefactor,j,q_tex);
+        prefactor *= qqrd2e * scale[mtype] * qtmp/r;
+        numtyp rlamdainv = r * lamdainv;
+        numtyp exprlmdainv = ucl_exp((numtyp)-2.0*rlamdainv);
+        numtyp slater_term = exprlmdainv*((numtyp)1.0 + ((numtyp)2.0*rlamdainv*((numtyp)1.0+rlamdainv)));
+        force = prefactor*(_erfc + EWALD_F*grij*expm2-slater_term);
+        if (factor_coul > (numtyp)0) force -= factor_coul*prefactor*((numtyp)1.0-slater_term);
+        force *= r2inv;
+
+        f.x+=delx*force;
+        f.y+=dely*force;
+        f.z+=delz*force;
+
+        if (EVFLAG && eflag) {
+          numtyp e_slater = ((numtyp)1.0 + rlamdainv)*exprlmdainv;
+          numtyp e = prefactor*(_erfc-e_slater);
+          if (factor_coul > (numtyp)0) e -= factor_coul*prefactor*((numtyp)1.0 - e_slater);
+          e_coul += e;
+        }
+        if (EVFLAG && vflag) {
+          virial[0] += delx*delx*force;
+          virial[1] += dely*dely*force;
+          virial[2] += delz*delz*force;
+          virial[3] += delx*dely*force;
+          virial[4] += delx*delz*force;
+          virial[5] += dely*delz*force;
+        }
+      }
+
+    } // for nbor
+  } // if ii
+  acctyp energy;
+  if (EVFLAG) energy=(acctyp)0.0;
+  store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,
+                   vflag,ans,engv);
+}
+
diff --git a/lib/gpu/lal_coul_slater_long.h b/lib/gpu/lal_coul_slater_long.h
new file mode 100644
index 0000000000..1731992a16
--- /dev/null
+++ b/lib/gpu/lal_coul_slater_long.h
@@ -0,0 +1,82 @@
+/***************************************************************************
+                             coul_slater_long.h
+                             -------------------
+                           Trung Nguyen (U Chicago)
+
+  Class for acceleration of the coul/slater/long pair style.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : September 2023
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#ifndef LAL_COUL_SLATER_LONG_H
+#define LAL_COUL_SLATER_LONG_H
+
+#include "lal_base_charge.h"
+
+namespace LAMMPS_AL {
+
+template <class numtyp, class acctyp>
+class CoulSlaterLong : public BaseCharge<numtyp, acctyp> {
+ public:
+  CoulSlaterLong();
+  ~CoulSlaterLong();
+
+  /// Clear any previous data and set up for a new LAMMPS run
+  /** \param max_nbors initial number of rows in the neighbor matrix
+    * \param cell_size cutoff + skin
+    * \param gpu_split fraction of particles handled by device
+    *
+    * Returns:
+    * -  0 if successful
+    * - -1 if fix gpu not found
+    * - -3 if there is an out of memory error
+    * - -4 if the GPU library was not compiled for GPU
+    * - -5 Double precision is not supported on card **/
+  int init(const int ntypes, double **scale,
+           const int nlocal, const int nall, const int max_nbors,
+           const int maxspecial, const double cell_size,
+                 const double gpu_split, FILE *screen,
+                 const double host_cut_coulsq, double *host_special_coul,
+                 const double qqrd2e, const double g_ewald, const double lamda);
+
+  /// Send updated coeffs from host to device (to be compatible with fix adapt)
+  void reinit(const int ntypes, double **scale);
+
+  /// Clear all host and device data
+  /** \note This is called at the beginning of the init() routine **/
+  void clear();
+
+  /// Returns memory usage on device per atom
+  int bytes_per_atom(const int max_nbors) const;
+
+  /// Total host memory used by library for pair style
+  double host_memory_usage() const;
+
+  // --------------------------- TYPE DATA --------------------------
+
+  /// scale
+  UCL_D_Vec<numtyp> scale;
+  /// Special Coul values [0-3]
+  UCL_D_Vec<numtyp> sp_cl;
+
+  /// If atom type constants fit in shared memory, use fast kernels
+  bool shared_types;
+
+  /// Number of atom types
+  int _lj_types;
+
+  numtyp _cut_coulsq, _qqrd2e, _g_ewald, _lamda;
+
+ protected:
+  bool _allocated;
+  int loop(const int eflag, const int vflag);
+};
+
+}
+
+#endif
diff --git a/lib/gpu/lal_coul_slater_long_ext.cpp b/lib/gpu/lal_coul_slater_long_ext.cpp
new file mode 100644
index 0000000000..8c34cc5552
--- /dev/null
+++ b/lib/gpu/lal_coul_slater_long_ext.cpp
@@ -0,0 +1,145 @@
+/***************************************************************************
+                           coul_slater_long_ext.cpp
+                           ------------------------
+                           Trung Nguyen (U Chicago)
+
+  Functions for LAMMPS access to coul/slater/long acceleration routines.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : September 2023
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#include <iostream>
+#include <cassert>
+#include <cmath>
+
+#include "lal_coul_slater_long.h"
+
+using namespace std;
+using namespace LAMMPS_AL;
+
+static CoulSlaterLong<PRECISION,ACC_PRECISION> CSLMF;
+
+// ---------------------------------------------------------------------------
+// Allocate memory on host and device and copy constants to device
+// ---------------------------------------------------------------------------
+int csl_gpu_init(const int ntypes, double **host_scale,
+                const int inum, const int nall, const int max_nbors,
+                const int maxspecial, const double cell_size, int &gpu_mode,
+                FILE *screen, double host_cut_coulsq, double *host_special_coul,
+                const double qqrd2e, const double g_ewald, const double lamda) {
+  CSLMF.clear();
+  gpu_mode=CSLMF.device->gpu_mode();
+  double gpu_split=CSLMF.device->particle_split();
+  int first_gpu=CSLMF.device->first_device();
+  int last_gpu=CSLMF.device->last_device();
+  int world_me=CSLMF.device->world_me();
+  int gpu_rank=CSLMF.device->gpu_rank();
+  int procs_per_gpu=CSLMF.device->procs_per_gpu();
+
+  CSLMF.device->init_message(screen,"coul/slater/long",first_gpu,last_gpu);
+
+  bool message=false;
+  if (CSLMF.device->replica_me()==0 && screen)
+    message=true;
+
+  if (message) {
+    fprintf(screen,"Initializing Device and compiling on process 0...");
+    fflush(screen);
+  }
+
+  int init_ok=0;
+  if (world_me==0)
+    init_ok=CSLMF.init(ntypes, host_scale, inum, nall, max_nbors, maxspecial,
+                      cell_size, gpu_split, screen, host_cut_coulsq,
+                      host_special_coul, qqrd2e, g_ewald, lamda);
+
+  CSLMF.device->world_barrier();
+  if (message)
+    fprintf(screen,"Done.\n");
+
+  for (int i=0; i<procs_per_gpu; i++) {
+    if (message) {
+      if (last_gpu-first_gpu==0)
+        fprintf(screen,"Initializing Device %d on core %d...",first_gpu,i);
+      else
+        fprintf(screen,"Initializing Devices %d-%d on core %d...",first_gpu,
+                last_gpu,i);
+      fflush(screen);
+    }
+    if (gpu_rank==i && world_me!=0)
+      init_ok=CSLMF.init(ntypes, host_scale, inum, nall, max_nbors, maxspecial,
+                        cell_size, gpu_split, screen, host_cut_coulsq,
+                        host_special_coul, qqrd2e, g_ewald, lamda);
+
+    CSLMF.device->serialize_init();
+    if (message)
+      fprintf(screen,"Done.\n");
+  }
+  if (message)
+    fprintf(screen,"\n");
+
+  if (init_ok==0)
+    CSLMF.estimate_gpu_overhead();
+  return init_ok;
+}
+
+// ---------------------------------------------------------------------------
+// Copy updated coeffs from host to device
+// ---------------------------------------------------------------------------
+void csl_gpu_reinit(const int ntypes, double **host_scale) {
+  int world_me=CSLMF.device->world_me();
+  int gpu_rank=CSLMF.device->gpu_rank();
+  int procs_per_gpu=CSLMF.device->procs_per_gpu();
+
+  if (world_me==0)
+    CSLMF.reinit(ntypes, host_scale);
+
+  CSLMF.device->world_barrier();
+
+  for (int i=0; i<procs_per_gpu; i++) {
+    if (gpu_rank==i && world_me!=0)
+      CSLMF.reinit(ntypes, host_scale);
+
+    CSLMF.device->serialize_init();
+  }
+}
+
+void csl_gpu_clear() {
+  CSLMF.clear();
+}
+
+int** csl_gpu_compute_n(const int ago, const int inum_full,
+                       const int nall, double **host_x, int *host_type,
+                       double *sublo, double *subhi, tagint *tag, int **nspecial,
+                       tagint **special, const bool eflag, const bool vflag,
+                       const bool eatom, const bool vatom, int &host_start,
+                       int **ilist, int **jnum,  const double cpu_time,
+                       bool &success, double *host_q, double *boxlo,
+                       double *prd) {
+  return CSLMF.compute(ago, inum_full, nall, host_x, host_type, sublo,
+                      subhi, tag, nspecial, special, eflag, vflag, eatom,
+                      vatom, host_start, ilist, jnum, cpu_time, success,
+                      host_q, boxlo, prd);
+}
+
+void csl_gpu_compute(const int ago, const int inum_full, const int nall,
+                    double **host_x, int *host_type, int *ilist, int *numj,
+                    int **firstneigh, const bool eflag, const bool vflag,
+                    const bool eatom, const bool vatom, int &host_start,
+                    const double cpu_time, bool &success, double *host_q,
+                    const int nlocal, double *boxlo, double *prd) {
+  CSLMF.compute(ago,inum_full,nall,host_x,host_type,ilist,numj,
+               firstneigh,eflag,vflag,eatom,vatom,host_start,cpu_time,success,
+               host_q,nlocal,boxlo,prd);
+}
+
+double csl_gpu_bytes() {
+  return CSLMF.host_memory_usage();
+}
+
+
diff --git a/lib/gpu/lal_device.cpp b/lib/gpu/lal_device.cpp
index 70ba373a65..e9ef2294b2 100644
--- a/lib/gpu/lal_device.cpp
+++ b/lib/gpu/lal_device.cpp
@@ -364,6 +364,12 @@ int DeviceT::init_device(MPI_Comm /*world*/, MPI_Comm replica, const int ngpu,
   } else
     _neighbor_shared.setup_auto_cell_size(false,_user_cell_size,_simd_size);
 
+  #ifndef LAL_USE_OLD_NEIGHBOR
+  _use_old_nbor_build = 0;
+  #else
+  _use_old_nbor_build = 1;
+  #endif
+
   return flag;
 }
 
@@ -510,9 +516,13 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
     gpu_nbor=1;
   else if (_gpu_mode==Device<numtyp,acctyp>::GPU_HYB_NEIGH)
     gpu_nbor=2;
+
+  // NOTE: enforce the hybrid mode (binning on the CPU)
+  // when not using sorting on the device
   #if !defined(USE_CUDPP) && !defined(USE_HIP_DEVICE_SORT)
   if (gpu_nbor==1) gpu_nbor=2;
   #endif
+  // or when the device supports subgroups
   #ifndef LAL_USE_OLD_NEIGHBOR
   if (gpu_nbor==1) gpu_nbor=2;
   #endif
@@ -886,19 +896,31 @@ void DeviceT::output_times(UCL_Timer &time_pair, Answer<numtyp,acctyp> &ans,
       }
       if (times[5] > 0.0)
         fprintf(screen,"Device Overhead: %.4f s.\n",times[5]/_replica_size);
-      fprintf(screen,"Average split:   %.4f.\n",avg_split);
-      fprintf(screen,"Lanes / atom:    %d.\n",threads_per_atom);
-      fprintf(screen,"Vector width:    %d.\n", simd_size());
-      fprintf(screen,"Prefetch mode:   ");
-      if (_nbor_prefetch==2) fprintf(screen,"Intrinsics.\n");
-      else if (_nbor_prefetch==1) fprintf(screen,"API.\n");
-      else fprintf(screen,"None.\n");
-      fprintf(screen,"Max Mem / Proc:  %.2f MB.\n",max_mb);
       if (nbor.gpu_nbor()==2)
         fprintf(screen,"CPU Neighbor:    %.4f s.\n",times[8]/_replica_size);
       fprintf(screen,"CPU Cast/Pack:   %.4f s.\n",times[4]/_replica_size);
       fprintf(screen,"CPU Driver_Time: %.4f s.\n",times[6]/_replica_size);
       fprintf(screen,"CPU Idle_Time:   %.4f s.\n",times[7]/_replica_size);
+      fprintf(screen,"Average split:   %.4f.\n",avg_split);
+      fprintf(screen,"Max Mem / Proc:  %.2f MB.\n",max_mb);
+      fprintf(screen,"Prefetch mode:   ");
+      if (_nbor_prefetch==2) fprintf(screen,"Intrinsics.\n");
+      else if (_nbor_prefetch==1) fprintf(screen,"API.\n");
+      else fprintf(screen,"None.\n");
+      fprintf(screen,"Vector width:    %d.\n", simd_size());
+      fprintf(screen,"Lanes / atom:    %d.\n",threads_per_atom);
+      fprintf(screen,"Pair block:      %d.\n",_block_pair);
+      fprintf(screen,"Neigh block:     %d.\n",_block_nbor_build);
+      if (nbor.gpu_nbor()==2) {
+        fprintf(screen,"Neigh mode:      Hybrid (binning on host)");
+        if (_use_old_nbor_build == 1) fprintf(screen," - legacy\n");
+        else  fprintf(screen," with subgroup support\n");
+      } else if (nbor.gpu_nbor()==1) {
+        fprintf(screen,"Neigh mode:      Device");
+        if (_use_old_nbor_build == 1) fprintf(screen," - legacy\n");
+        else  fprintf(screen," - with subgroup support\n");
+      } else if (nbor.gpu_nbor()==0)
+        fprintf(screen,"Neigh mode:      Host\n");
 
       fprintf(screen,"-------------------------------------");
       fprintf(screen,"--------------------------------\n\n");
diff --git a/lib/gpu/lal_device.h b/lib/gpu/lal_device.h
index ba693e551a..d6b52484f1 100644
--- a/lib/gpu/lal_device.h
+++ b/lib/gpu/lal_device.h
@@ -347,6 +347,7 @@ class Device {
   int _pppm_block, _block_nbor_build, _block_cell_2d, _block_cell_id;
   int _max_shared_types, _max_bio_shared_types, _pppm_max_spline;
   int _nbor_prefetch;
+  int _use_old_nbor_build;
 
   UCL_Program *dev_program;
   UCL_Kernel k_zero, k_info;
diff --git a/lib/gpu/lal_eam.cpp b/lib/gpu/lal_eam.cpp
index b7bc7b958a..0a2ed21ab3 100644
--- a/lib/gpu/lal_eam.cpp
+++ b/lib/gpu/lal_eam.cpp
@@ -303,7 +303,7 @@ double EAMT::host_memory_usage() const {
 }
 
 // ---------------------------------------------------------------------------
-// Copy nbor list from host if necessary and then compute atom energies/forces
+// Copy nbor list from host if necessary and then compute per-atom fp
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
 void EAMT::compute(const int f_ago, const int inum_full, const int nlocal,
@@ -379,7 +379,7 @@ void EAMT::compute(const int f_ago, const int inum_full, const int nlocal,
 }
 
 // ---------------------------------------------------------------------------
-// Reneighbor on GPU and then compute per-atom densities
+// Reneighbor on GPU and then compute per-atom fp
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
 int** EAMT::compute(const int ago, const int inum_full, const int nall,
@@ -461,7 +461,7 @@ int** EAMT::compute(const int ago, const int inum_full, const int nall,
 }
 
 // ---------------------------------------------------------------------------
-// Copy nbor list from host if necessary and then calculate forces, virials,..
+// Update per-atom fp, and then calculate forces, virials,..
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
 void EAMT::compute2(int *ilist, const bool eflag, const bool vflag,
diff --git a/lib/gpu/lal_edpd.cpp b/lib/gpu/lal_edpd.cpp
new file mode 100644
index 0000000000..c03591b9ed
--- /dev/null
+++ b/lib/gpu/lal_edpd.cpp
@@ -0,0 +1,285 @@
+/***************************************************************************
+                                   edpd.cpp
+                             -------------------
+                            Trung Dac Nguyen (U Chicago)
+
+  Class for acceleration of the edpd pair style.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : September 2023
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#if defined(USE_OPENCL)
+#include "edpd_cl.h"
+#elif defined(USE_CUDART)
+const char *edpd=0;
+#else
+#include "edpd_cubin.h"
+#endif
+
+#include "lal_edpd.h"
+#include <cassert>
+namespace LAMMPS_AL {
+#define EDPDT EDPD<numtyp, acctyp>
+
+extern Device<PRECISION,ACC_PRECISION> device;
+
+template <class numtyp, class acctyp>
+EDPDT::EDPD() : BaseDPD<numtyp,acctyp>(), _allocated(false) {
+  _max_q_size = 0;
+}
+
+template <class numtyp, class acctyp>
+EDPDT::~EDPD() {
+  clear();
+}
+
+template <class numtyp, class acctyp>
+int EDPDT::bytes_per_atom(const int max_nbors) const {
+  return this->bytes_per_atom_atomic(max_nbors);
+}
+
+template <class numtyp, class acctyp>
+int EDPDT::init(const int ntypes,
+                double **host_cutsq, double **host_a0,
+                double **host_gamma, double **host_cut,
+                double **host_power, double **host_kappa,
+                double **host_powerT, double **host_cutT,
+                double ***host_sc, double ***host_kc, double *host_mass,
+                double *host_special_lj,
+                const int power_flag, const int kappa_flag,
+                const int nlocal, const int nall,
+                const int max_nbors, const int maxspecial,
+                const double cell_size,
+                const double gpu_split, FILE *_screen) {
+  const int max_shared_types=this->device->max_shared_types();
+
+  int onetype=0;
+  #ifdef USE_OPENCL
+  if (maxspecial==0)
+    for (int i=1; i<ntypes; i++)
+      for (int j=i; j<ntypes; j++)
+        if (host_cutsq[i][j]>0) {
+          if (onetype>0)
+            onetype=-1;
+          else if (onetype==0)
+            onetype=i*max_shared_types+j;
+        }
+  if (onetype<0) onetype=0;
+  #endif
+
+  int success;
+  int extra_fields = 4; // round up to accomodate quadruples of numtyp values
+                        // T and cv
+  success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,
+                            gpu_split,_screen,edpd,"k_edpd",onetype,extra_fields);
+  if (success!=0)
+    return success;
+
+  // If atom type constants fit in shared memory use fast kernel
+  int lj_types=ntypes;
+  shared_types=false;
+  if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) {
+    lj_types=max_shared_types;
+    shared_types=true;
+  }
+  _lj_types=lj_types;
+
+  // Allocate a host write buffer for data initialization
+  UCL_H_Vec<numtyp> host_write(lj_types*lj_types*32,*(this->ucl_device),
+                               UCL_WRITE_ONLY);
+
+  for (int i=0; i<lj_types*lj_types; i++)
+    host_write[i]=0.0;
+
+  coeff.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
+  this->atom->type_pack4(ntypes,lj_types,coeff,host_write,host_a0,host_gamma,
+                         host_cut);
+
+  coeff2.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
+  this->atom->type_pack4(ntypes,lj_types,coeff2,host_write,host_power,host_kappa,
+                         host_powerT,host_cutT);
+
+  UCL_H_Vec<numtyp> dview_mass(ntypes, *(this->ucl_device), UCL_WRITE_ONLY);
+  for (int i = 0; i < ntypes; i++)
+    dview_mass[i] = host_mass[i];
+  mass.alloc(ntypes,*(this->ucl_device), UCL_READ_ONLY);
+  ucl_copy(mass,dview_mass,false);
+
+  if (host_sc) {
+    UCL_H_Vec<numtyp4> dview(lj_types*lj_types,*(this->ucl_device),UCL_WRITE_ONLY);;
+    sc.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
+    int n = 0;
+    for (int i = 1; i < ntypes; i++)
+      for (int j = 1; j < ntypes; j++) {
+        dview[n].x = host_sc[i][j][0];
+        dview[n].y = host_sc[i][j][1];
+        dview[n].z = host_sc[i][j][2];
+        dview[n].w = host_sc[i][j][3];
+        n++;
+      }
+    ucl_copy(sc,dview,false);
+  }
+
+  if (host_kc) {
+    UCL_H_Vec<numtyp4> dview(lj_types*lj_types,*(this->ucl_device),UCL_WRITE_ONLY);;
+    kc.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
+    int n = 0;
+    for (int i = 1; i < ntypes; i++)
+      for (int j = 1; j < ntypes; j++) {
+        dview[n].x = host_kc[i][j][0];
+        dview[n].y = host_kc[i][j][1];
+        dview[n].z = host_kc[i][j][2];
+        dview[n].w = host_kc[i][j][3];
+        n++;
+      }
+    ucl_copy(kc,dview,false);
+  }
+
+  UCL_H_Vec<numtyp> host_rsq(lj_types*lj_types,*(this->ucl_device),
+                             UCL_WRITE_ONLY);
+  cutsq.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
+  this->atom->type_pack1(ntypes,lj_types,cutsq,host_rsq,host_cutsq);
+
+  double special_sqrt[4];
+  special_sqrt[0] = sqrt(host_special_lj[0]);
+  special_sqrt[1] = sqrt(host_special_lj[1]);
+  special_sqrt[2] = sqrt(host_special_lj[2]);
+  special_sqrt[3] = sqrt(host_special_lj[3]);
+
+  UCL_H_Vec<double> dview;
+  sp_lj.alloc(4,*(this->ucl_device),UCL_READ_ONLY);
+  dview.view(host_special_lj,4,*(this->ucl_device));
+  ucl_copy(sp_lj,dview,false);
+  sp_sqrt.alloc(4,*(this->ucl_device),UCL_READ_ONLY);
+  dview.view(special_sqrt,4,*(this->ucl_device));
+  ucl_copy(sp_sqrt,dview,false);
+
+  _power_flag = power_flag;
+  _kappa_flag = kappa_flag;
+
+  // allocate per-atom array Q
+
+  int ef_nall=nall;
+  if (ef_nall==0)
+    ef_nall=2000;
+
+  _max_q_size=static_cast<int>(static_cast<double>(ef_nall)*1.10);
+  Q.alloc(_max_q_size,*(this->ucl_device),UCL_READ_WRITE,UCL_READ_WRITE);
+
+  _allocated=true;
+  this->_max_bytes=coeff.row_bytes()+coeff2.row_bytes()+Q.row_bytes()+
+    sc.row_bytes()+kc.row_bytes()+mass.row_bytes()+cutsq.row_bytes()+sp_lj.row_bytes()+sp_sqrt.row_bytes();
+  return 0;
+}
+
+template <class numtyp, class acctyp>
+void EDPDT::clear() {
+  if (!_allocated)
+    return;
+  _allocated=false;
+
+  coeff.clear();
+  coeff2.clear();
+  sc.clear();
+  kc.clear();
+  Q.clear();
+  mass.clear();
+  cutsq.clear();
+  sp_lj.clear();
+  sp_sqrt.clear();
+  this->clear_atomic();
+}
+
+template <class numtyp, class acctyp>
+double EDPDT::host_memory_usage() const {
+  return this->host_memory_usage_atomic()+sizeof(EDPD<numtyp,acctyp>);
+}
+
+template <class numtyp, class acctyp>
+void EDPDT::update_flux(void **flux_ptr) {
+  *flux_ptr=Q.host.begin();
+  Q.update_host(_max_q_size,false);
+}
+
+// ---------------------------------------------------------------------------
+// Calculate energies, forces, and torques
+// ---------------------------------------------------------------------------
+template <class numtyp, class acctyp>
+int EDPDT::loop(const int eflag, const int vflag) {
+
+  int nall = this->atom->nall();
+
+  // Resize Q array if necessary
+  if (nall > _max_q_size) {
+    _max_q_size=static_cast<int>(static_cast<double>(nall)*1.10);
+    Q.resize(_max_q_size);
+  }
+
+  // signal that we need to transfer extra data from the host
+
+  this->atom->extra_data_unavail();
+
+  numtyp4 *pextra=reinterpret_cast<numtyp4*>(&(this->atom->extra[0]));
+
+  int n = 0;
+  int nstride = 1;
+  for (int i = 0; i < nall; i++) {
+    int idx = n+i*nstride;
+    numtyp4 v;
+    v.x = edpd_temp[i];
+    v.y = edpd_cv[i];
+    v.z = 0;
+    v.w = 0;
+    pextra[idx] = v;
+  }
+  this->atom->add_extra_data();
+
+  // Compute the block size and grid size to keep all cores busy
+  const int BX=this->block_size();
+  int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
+                               (BX/this->_threads_per_atom)));
+
+
+  int ainum=this->ans->inum();
+  int nbor_pitch=this->nbor->nbor_pitch();
+  this->time_pair.start();
+  if (shared_types) {
+    this->k_pair_sel->set_size(GX,BX);
+    this->k_pair_sel->run(&this->atom->x, &this->atom->extra, &coeff, &coeff2, &mass,
+                          &sc, &kc, &sp_lj, &sp_sqrt, &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+                          &this->ans->force, &this->ans->engv, &Q, &eflag, &vflag,
+                          &_power_flag, &_kappa_flag, &ainum, &nbor_pitch,
+                          &this->atom->v, &cutsq, &this->_dtinvsqrt, &this->_seed,
+                          &this->_timestep, &this->_threads_per_atom);
+  } else {
+    this->k_pair.set_size(GX,BX);
+    this->k_pair.run(&this->atom->x, &this->atom->extra, &coeff, &coeff2, &mass,
+                     &sc, &kc, &_lj_types, &sp_lj, &sp_sqrt,
+                     &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+                     &this->ans->force, &this->ans->engv, &Q, &eflag, &vflag,
+                     &_power_flag, &_kappa_flag,  &ainum, &nbor_pitch,
+                     &this->atom->v, &cutsq, &this->_dtinvsqrt, &this->_seed,
+                     &this->_timestep, &this->_threads_per_atom);
+  }
+
+  this->time_pair.stop();
+  return GX;
+}
+
+// ---------------------------------------------------------------------------
+// Get the extra data pointers from host
+// ---------------------------------------------------------------------------
+
+template <class numtyp, class acctyp>
+void EDPDT::get_extra_data(double *host_T, double *host_cv) {
+  edpd_temp = host_T;
+  edpd_cv = host_cv;
+}
+
+template class EDPD<PRECISION,ACC_PRECISION>;
+}
diff --git a/lib/gpu/lal_edpd.cu b/lib/gpu/lal_edpd.cu
new file mode 100644
index 0000000000..0982d219eb
--- /dev/null
+++ b/lib/gpu/lal_edpd.cu
@@ -0,0 +1,617 @@
+// **************************************************************************
+//                                   edpd.cu
+//                             -------------------
+//                           Trung Dac Nguyen (U Chicago)
+//
+//  Device code for acceleration of the edpd pair style
+//
+// __________________________________________________________________________
+//    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+// __________________________________________________________________________
+//
+//    begin                : September 2023
+//    email                : ndactrung@gmail.com
+// ***************************************************************************
+
+#if defined(NV_KERNEL) || defined(USE_HIP)
+#include "lal_aux_fun1.h"
+#ifndef _DOUBLE_DOUBLE
+_texture( pos_tex,float4);
+_texture( vel_tex,float4);
+#else
+_texture_2d( pos_tex,int4);
+_texture_2d( vel_tex,int4);
+#endif
+#else
+#define pos_tex x_
+#define vel_tex v_
+#endif
+
+#define EPSILON (numtyp)1.0e-10
+
+//#define _USE_UNIFORM_SARU_LCG
+//#define _USE_UNIFORM_SARU_TEA8
+//#define _USE_GAUSSIAN_SARU_LCG
+
+#if !defined(_USE_UNIFORM_SARU_LCG) && !defined(_USE_UNIFORM_SARU_TEA8) && !defined(_USE_GAUSSIAN_SARU_LCG)
+#define _USE_UNIFORM_SARU_LCG
+#endif
+
+// References:
+// 1. Y. Afshar, F. Schmid, A. Pishevar, S. Worley, Comput. Phys. Comm. 184 (2013), 1119–1128.
+// 2. C. L. Phillips, J. A. Anderson, S. C. Glotzer, Comput. Phys. Comm. 230 (2011), 7191-7201.
+// PRNG period = 3666320093*2^32 ~ 2^64 ~ 10^19
+
+#define LCGA 0x4beb5d59 /* Full period 32 bit LCG */
+#define LCGC 0x2600e1f7
+#define oWeylPeriod 0xda879add /* Prime period 3666320093 */
+#define oWeylOffset 0x8009d14b
+#define TWO_N32 0.232830643653869628906250e-9f /* 2^-32 */
+
+// specifically implemented for steps = 1; high = 1.0; low = -1.0
+// returns uniformly distributed random numbers u in [-1.0;1.0]
+// using the inherent LCG, then multiply u with sqrt(3) to "match"
+// with a normal random distribution.
+// Afshar et al. mutlplies u in [-0.5;0.5] with sqrt(12)
+// Curly brackets to make variables local to the scope.
+#ifdef _USE_UNIFORM_SARU_LCG
+#define SQRT3 (numtyp)1.7320508075688772935274463
+#define saru(seed1, seed2, seed, timestep, randnum) {                         \
+  unsigned int seed3 = seed + timestep;                                       \
+  seed3^=(seed1<<7)^(seed2>>6);                                               \
+  seed2+=(seed1>>4)^(seed3>>15);                                              \
+  seed1^=(seed2<<9)+(seed3<<8);                                               \
+  seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1));                               \
+  seed2+=0x72BE1579*((seed1<<4)  ^ (seed3>>16));                              \
+  seed1^=0x3F38A6ED*((seed3>>5)  ^ (((signed int)seed2)>>22));                \
+  seed2+=seed1*seed3;                                                         \
+  seed1+=seed3 ^ (seed2>>2);                                                  \
+  seed2^=((signed int)seed2)>>17;                                             \
+  unsigned int state  = 0x79dedea3*(seed1^(((signed int)seed1)>>14));         \
+  unsigned int wstate = (state + seed2) ^ (((signed int)state)>>8);           \
+  state  = state + (wstate*(wstate^0xdddf97f5));                              \
+  wstate = 0xABCB96F7 + (wstate>>1);                                          \
+  state = LCGA*state + LCGC;                                                  \
+  wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod);   \
+  unsigned int v = (state ^ (state>>26)) + wstate;                            \
+  unsigned int s = (signed int)((v^(v>>20))*0x6957f5a7);                      \
+  randnum = SQRT3*(s*TWO_N32*(numtyp)2.0-(numtyp)1.0);                        \
+}
+#endif
+
+// specifically implemented for steps = 1; high = 1.0; low = -1.0
+// returns uniformly distributed random numbers u in [-1.0;1.0] using TEA8
+// then multiply u with sqrt(3) to "match" with a normal random distribution
+// Afshar et al. mutlplies u in [-0.5;0.5] with sqrt(12)
+#ifdef _USE_UNIFORM_SARU_TEA8
+#define SQRT3 (numtyp)1.7320508075688772935274463
+#define k0 0xA341316C
+#define k1 0xC8013EA4
+#define k2 0xAD90777D
+#define k3 0x7E95761E
+#define delta 0x9e3779b9
+#define rounds 8
+#define saru(seed1, seed2, seed, timestep, randnum) {                         \
+  unsigned int seed3 = seed + timestep;                                       \
+  seed3^=(seed1<<7)^(seed2>>6);                                               \
+  seed2+=(seed1>>4)^(seed3>>15);                                              \
+  seed1^=(seed2<<9)+(seed3<<8);                                               \
+  seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1));                               \
+  seed2+=0x72BE1579*((seed1<<4)  ^ (seed3>>16));                              \
+  seed1^=0x3F38A6ED*((seed3>>5)  ^ (((signed int)seed2)>>22));                \
+  seed2+=seed1*seed3;                                                         \
+  seed1+=seed3 ^ (seed2>>2);                                                  \
+  seed2^=((signed int)seed2)>>17;                                             \
+  unsigned int state  = 0x79dedea3*(seed1^(((signed int)seed1)>>14));         \
+  unsigned int wstate = (state + seed2) ^ (((signed int)state)>>8);           \
+  state  = state + (wstate*(wstate^0xdddf97f5));                              \
+  wstate = 0xABCB96F7 + (wstate>>1);                                          \
+  unsigned int sum = 0;                                                       \
+  for (int i=0; i < rounds; i++) {                                            \
+    sum += delta;                                                             \
+    state += ((wstate<<4) + k0)^(wstate + sum)^((wstate>>5) + k1);            \
+    wstate += ((state<<4) + k2)^(state + sum)^((state>>5) + k3);              \
+  }                                                                           \
+  unsigned int v = (state ^ (state>>26)) + wstate;                            \
+  unsigned int s = (signed int)((v^(v>>20))*0x6957f5a7);                      \
+  randnum = SQRT3*(s*TWO_N32*(numtyp)2.0-(numtyp)1.0);                        \
+}
+#endif
+
+// specifically implemented for steps = 1; high = 1.0; low = -1.0
+// returns two uniformly distributed random numbers r1 and r2 in [-1.0;1.0],
+// and uses the polar method (Marsaglia's) to transform to a normal random value
+// This is used to compared with CPU DPD using RandMars::gaussian()
+#ifdef _USE_GAUSSIAN_SARU_LCG
+#define saru(seed1, seed2, seed, timestep, randnum) {                         \
+  unsigned int seed3 = seed + timestep;                                       \
+  seed3^=(seed1<<7)^(seed2>>6);                                               \
+  seed2+=(seed1>>4)^(seed3>>15);                                              \
+  seed1^=(seed2<<9)+(seed3<<8);                                               \
+  seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1));                               \
+  seed2+=0x72BE1579*((seed1<<4)  ^ (seed3>>16));                              \
+  seed1^=0x3F38A6ED*((seed3>>5)  ^ (((signed int)seed2)>>22));                \
+  seed2+=seed1*seed3;                                                         \
+  seed1+=seed3 ^ (seed2>>2);                                                  \
+  seed2^=((signed int)seed2)>>17;                                             \
+  unsigned int state=0x12345678;                                              \
+  unsigned int wstate=12345678;                                               \
+  state  = 0x79dedea3*(seed1^(((signed int)seed1)>>14));                      \
+  wstate = (state + seed2) ^ (((signed int)state)>>8);                        \
+  state  = state + (wstate*(wstate^0xdddf97f5));                              \
+  wstate = 0xABCB96F7 + (wstate>>1);                                          \
+  unsigned int v, s;                                                          \
+  numtyp r1, r2, rsq;                                                         \
+  while (1) {                                                                 \
+    state = LCGA*state + LCGC;                                                \
+    wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod); \
+    v = (state ^ (state>>26)) + wstate;                                       \
+    s = (signed int)((v^(v>>20))*0x6957f5a7);                                 \
+    r1 = s*TWO_N32*(numtyp)2.0-(numtyp)1.0;                                   \
+    state = LCGA*state + LCGC;                                                \
+    wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod); \
+    v = (state ^ (state>>26)) + wstate;                                       \
+    s = (signed int)((v^(v>>20))*0x6957f5a7);                                 \
+    r2 = s*TWO_N32*(numtyp)2.0-(numtyp)1.0;                                   \
+    rsq = r1 * r1 + r2 * r2;                                                  \
+    if (rsq < (numtyp)1.0) break;                                             \
+  }                                                                           \
+  numtyp fac = ucl_sqrt((numtyp)-2.0*log(rsq)/rsq);                           \
+  randnum = r2*fac;                                                           \
+}
+#endif
+
+#if (SHUFFLE_AVAIL == 0)
+
+#define store_heatflux(Qi, ii, inum, tid, t_per_atom, offset, Q)             \
+  if (t_per_atom>1) {                                                        \
+    simdsync();                                                              \
+    simd_reduce_add1(t_per_atom, red_acc, offset, tid, Qi);                  \
+  }                                                                          \
+  if (offset==0 && ii<inum) {                                                \
+    Q[ii]=Qi;                                                                \
+  }
+#else
+#define store_heatflux(Qi, ii, inum, tid, t_per_atom, offset, Q)             \
+  if (t_per_atom>1) {                                                        \
+    simd_reduce_add1(t_per_atom,Qi);                                         \
+  }                                                                          \
+  if (offset==0 && ii<inum) {                                                \
+    Q[ii]=Qi;                                                                \
+  }
+#endif
+
+#define MIN(A,B) ((A) < (B) ? (A) : (B))
+#define MAX(A,B) ((A) < (B) ? (B) : (A))
+
+// note the change in coeff: coeff.x = a0, coeff.y = gamma, coeff.z = cut (no sigma)
+
+__kernel void k_edpd(const __global numtyp4 *restrict x_,
+                     const __global numtyp4 *restrict extra,
+                     const __global numtyp4 *restrict coeff,
+                     const __global numtyp4 *restrict coeff2,
+                     const __global numtyp *restrict mass,
+                     const __global numtyp4 *restrict sc,
+                     const __global numtyp4 *restrict kc,
+                     const int lj_types,
+                     const __global numtyp *restrict sp_lj,
+                     const __global numtyp *restrict sp_sqrt,
+                     const __global int * dev_nbor,
+                     const __global int * dev_packed,
+                     __global acctyp3 *restrict ans,
+                     __global acctyp *restrict engv,
+                     __global acctyp *restrict Q,
+                     const int eflag, const int vflag,
+                     const int power_flag, const int kappa_flag,
+                     const int inum, const int nbor_pitch,
+                     const __global numtyp4 *restrict v_,
+                     const __global numtyp *restrict cutsq,
+                     const numtyp dtinvsqrt, const int seed,
+                     const int timestep, const int t_per_atom) {
+  int tid, ii, offset;
+  atom_info(t_per_atom,ii,tid,offset);
+
+  int n_stride;
+  local_allocate_store_pair();
+
+  acctyp3 f;
+  f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
+  acctyp energy, virial[6];
+  if (EVFLAG) {
+    energy=(acctyp)0;
+    for (int i=0; i<6; i++) virial[i]=(acctyp)0;
+  }
+  acctyp Qi = (acctyp)0;
+
+  if (ii<inum) {
+    int i, numj, nbor, nbor_end;
+    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
+              n_stride,nbor_end,nbor);
+
+    numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
+    int itype=ix.w;
+    numtyp mass_itype = mass[itype];
+    numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i];
+    int itag=iv.w;
+
+    const numtyp4 Tcvi = extra[i];
+    numtyp Ti = Tcvi.x;
+    numtyp cvi = Tcvi.y;
+
+    numtyp factor_dpd;
+    for ( ; nbor<nbor_end; nbor+=n_stride) {
+      ucl_prefetch(dev_packed+nbor+n_stride);
+
+      int j=dev_packed[nbor];
+      factor_dpd = sp_lj[sbmask(j)];
+      j &= NEIGHMASK;
+
+      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
+      int jtype=jx.w;
+      numtyp4 jv; fetch4(jv,j,vel_tex); //v_[j];
+      int jtag=jv.w;
+
+      // Compute r12
+      numtyp delx = ix.x-jx.x;
+      numtyp dely = ix.y-jx.y;
+      numtyp delz = ix.z-jx.z;
+      numtyp rsq = delx*delx+dely*dely+delz*delz;
+
+      int mtype=itype*lj_types+jtype;
+      if (rsq<cutsq[mtype]) {
+        numtyp r=ucl_sqrt(rsq);
+        if (r < EPSILON) continue;
+
+        numtyp rinv=ucl_recip(r);
+        numtyp delvx = iv.x - jv.x;
+        numtyp delvy = iv.y - jv.y;
+        numtyp delvz = iv.z - jv.z;
+        numtyp dot = delx*delvx + dely*delvy + delz*delvz;
+        numtyp vijeij = dot*rinv;
+
+        const numtyp coeffx=coeff[mtype].x; // a0[itype][jtype]
+        const numtyp coeffy=coeff[mtype].y; // gamma[itype][jtype]
+        const numtyp coeffz=coeff[mtype].z; // cut[itype][jtype]
+
+        const numtyp4 Tcvj = extra[j];
+        numtyp Tj = Tcvj.x;
+        numtyp cvj = Tcvj.y;
+
+        unsigned int tag1=itag, tag2=jtag;
+        if (tag1 > tag2) {
+          tag1 = jtag; tag2 = itag;
+        }
+
+        numtyp randnum = (numtyp)0.0;
+        saru(tag1, tag2, seed, timestep, randnum);
+
+        numtyp T_ij=(numtyp)0.5*(Ti+Tj);
+        numtyp4 T_pow;
+        T_pow.x = T_ij - (numtyp)1.0;
+        T_pow.y = T_pow.x*T_pow.x;
+        T_pow.z = T_pow.x*T_pow.y;
+        T_pow.w = T_pow.x*T_pow.z;
+
+        numtyp coeff2x = coeff2[mtype].x; //power[itype][jtype]
+        numtyp coeff2y = coeff2[mtype].y; //kappa[itype][jtype]
+        numtyp coeff2z = coeff2[mtype].z; //powerT[itype][jtype]
+        numtyp coeff2w = coeff2[mtype].w; //cutT[itype][jtype]
+        numtyp power_d = coeff2x;
+        if (power_flag) {
+          numtyp factor = (numtyp)1.0;
+          factor += sc[mtype].x*T_pow.x + sc[mtype].y*T_pow.y +
+            sc[mtype].z*T_pow.z + sc[mtype].w*T_pow.w;
+          power_d *= factor;
+        }
+
+        power_d = MAX((numtyp)0.01,power_d);
+        numtyp wc = (numtyp)1.0 - r/coeffz; // cut[itype][jtype]
+        wc = MAX((numtyp)0.0,MIN((numtyp)1.0,wc));
+        numtyp wr = ucl_pow(wc, (numtyp)0.5*power_d);
+
+        numtyp kboltz = (numtyp)1.0;
+        numtyp GammaIJ = coeffy; // gamma[itype][jtype]
+        numtyp SigmaIJ = (numtyp)4.0*GammaIJ*kboltz*Ti*Tj/(Ti+Tj);
+        SigmaIJ = ucl_sqrt(SigmaIJ);
+
+        numtyp force =  coeffx*T_ij*wc; // a0[itype][jtype]
+        force -= GammaIJ *wr*wr *dot*rinv;
+        force += SigmaIJ * wr *randnum * dtinvsqrt;
+        force *= factor_dpd*rinv;
+
+        f.x+=delx*force;
+        f.y+=dely*force;
+        f.z+=delz*force;
+
+        // heat transfer
+
+        if (r < coeff2w) {
+          numtyp wrT = (numtyp)1.0 - r/coeff2w;
+          wrT = MAX((numtyp)0.0,MIN((numtyp)1.0,wrT));
+          wrT = ucl_pow(wrT, (numtyp)0.5*coeff2z); // powerT[itype][jtype]
+          numtyp randnumT = (numtyp)0;
+          saru(tag1, tag2, seed+tag1+tag2, timestep, randnumT); // randomT->gaussian();
+          randnumT = MAX((numtyp)-5.0,MIN(randnum,(numtyp)5.0));
+
+          numtyp kappaT = coeff2y; // kappa[itype][jtype]
+          if (kappa_flag) {
+            numtyp factor = (numtyp)1.0;
+            factor += kc[mtype].x*T_pow.x + kc[mtype].y*T_pow.y +
+              kc[mtype].z*T_pow.z + kc[mtype].w*T_pow.w;
+            kappaT *= factor;
+          }
+
+          numtyp kij = cvi*cvj*kappaT * T_ij*T_ij;
+          numtyp alphaij = ucl_sqrt((numtyp)2.0*kboltz*kij);
+
+          numtyp dQc = kij * wrT*wrT * (Tj - Ti)/(Ti*Tj);
+          numtyp dQd = wr*wr*( GammaIJ * vijeij*vijeij - SigmaIJ*SigmaIJ/mass_itype ) - SigmaIJ * wr *vijeij *randnum;
+          dQd /= (cvi+cvj);
+          numtyp dQr = alphaij * wrT * dtinvsqrt * randnumT;
+          Qi += (dQc + dQd + dQr );
+        }
+
+        if (EVFLAG && eflag) {
+          numtyp e = (numtyp)0.5*coeffx*T_ij*coeffz * wc*wc;
+          energy+=factor_dpd*e;
+        }
+        if (EVFLAG && vflag) {
+          virial[0] += delx*delx*force;
+          virial[1] += dely*dely*force;
+          virial[2] += delz*delz*force;
+          virial[3] += delx*dely*force;
+          virial[4] += delx*delz*force;
+          virial[5] += dely*delz*force;
+        }
+      }
+    } // for nbor
+  } // if ii
+  store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag,
+                ans,engv);
+  store_heatflux(Qi,ii,inum,tid,t_per_atom,offset,Q);
+}
+
+__kernel void k_edpd_fast(const __global numtyp4 *restrict x_,
+                          const __global numtyp4 *restrict extra,
+                          const __global numtyp4 *restrict coeff_in,
+                          const __global numtyp4 *restrict coeff2_in,
+                          const __global numtyp *restrict mass,
+                          const __global numtyp4 *restrict sc_in,
+                          const __global numtyp4 *restrict kc_in,
+                          const __global numtyp *restrict sp_lj_in,
+                          const __global numtyp *restrict sp_sqrt_in,
+                          const __global int * dev_nbor,
+                          const __global int * dev_packed,
+                          __global acctyp3 *restrict ans,
+                          __global acctyp *restrict engv,
+                          __global acctyp *restrict Q,
+                          const int eflag, const int vflag,
+                          const int power_flag, const int kappa_flag,
+                          const int inum, const int nbor_pitch,
+                          const __global numtyp4 *restrict v_,
+                          const __global numtyp *restrict cutsq,
+                          const numtyp dtinvsqrt, const int seed,
+                          const int timestep, const int t_per_atom) {
+  int tid, ii, offset;
+  atom_info(t_per_atom,ii,tid,offset);
+
+  #ifndef ONETYPE
+  __local numtyp4 coeff[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
+  __local numtyp4 coeff2[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
+  __local numtyp4 sc[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
+  __local numtyp4 kc[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
+  __local numtyp sp_lj[4];
+  if (tid<4) {
+    sp_lj[tid]=sp_lj_in[tid];
+  }
+  if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES) {
+    coeff[tid]=coeff_in[tid];
+    coeff2[tid]=coeff2_in[tid];
+    sc[tid]=sc_in[tid];
+    kc[tid]=kc_in[tid];
+  }
+  __syncthreads();
+  #else
+  const numtyp coeffx=coeff_in[ONETYPE].x;   // a0[itype][jtype]
+  const numtyp coeffy=coeff_in[ONETYPE].y;   // gamma[itype][jtype]
+  const numtyp coeffz=coeff_in[ONETYPE].z;   // cut[itype][jtype]
+  const numtyp coeff2x=coeff2_in[ONETYPE].x; // power[itype][jtype]
+  const numtyp coeff2y=coeff2_in[ONETYPE].y; // kappa[itype][jtype]
+  const numtyp coeff2z=coeff2_in[ONETYPE].z; // powerT[itype][jtype]
+  const numtyp coeff2w=coeff2_in[ONETYPE].w; // cutT[itype][jtype]
+  const numtyp cutsq_p=cutsq[ONETYPE];
+  const numtyp scx=sc_in[ONETYPE].x;
+  const numtyp scy=sc_in[ONETYPE].y;
+  const numtyp scz=sc_in[ONETYPE].z;
+  const numtyp scw=sc_in[ONETYPE].w;
+  const numtyp kcx=kc_in[ONETYPE].x;
+  const numtyp kcy=kc_in[ONETYPE].y;
+  const numtyp kcz=kc_in[ONETYPE].z;
+  const numtyp kcw=kc_in[ONETYPE].w;
+  #endif
+
+  int n_stride;
+  local_allocate_store_pair();
+
+  acctyp3 f;
+  f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
+  acctyp energy, virial[6];
+  if (EVFLAG) {
+    energy=(acctyp)0;
+    for (int i=0; i<6; i++) virial[i]=(acctyp)0;
+  }
+  acctyp Qi = (acctyp)0;
+
+  if (ii<inum) {
+    int i, numj, nbor, nbor_end;
+    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
+              n_stride,nbor_end,nbor);
+
+    numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
+    int iw=ix.w;
+    numtyp mass_itype = mass[iw];
+    #ifndef ONETYPE
+    int itype=fast_mul((int)MAX_SHARED_TYPES,iw);
+    #endif
+    numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i];
+    int itag=iv.w;
+
+    const numtyp4 Tcvi = extra[i];
+    numtyp Ti = Tcvi.x;
+    numtyp cvi = Tcvi.y;
+
+    #ifndef ONETYPE
+    numtyp factor_dpd;
+    #endif
+    for ( ; nbor<nbor_end; nbor+=n_stride) {
+      ucl_prefetch(dev_packed+nbor+n_stride);
+
+      int j=dev_packed[nbor];
+      #ifndef ONETYPE
+      factor_dpd = sp_lj[sbmask(j)];
+      j &= NEIGHMASK;
+      #endif
+
+      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
+      #ifndef ONETYPE
+      int mtype=itype+jx.w;
+      const numtyp cutsq_p=cutsq[mtype];
+      #endif
+      numtyp4 jv; fetch4(jv,j,vel_tex); //v_[j];
+      int jtag=jv.w;
+
+      // Compute r12
+      numtyp delx = ix.x-jx.x;
+      numtyp dely = ix.y-jx.y;
+      numtyp delz = ix.z-jx.z;
+      numtyp rsq = delx*delx+dely*dely+delz*delz;
+
+      if (rsq<cutsq_p) {
+        numtyp r=ucl_sqrt(rsq);
+        if (r < EPSILON) continue;
+
+        numtyp rinv=ucl_recip(r);
+        numtyp delvx = iv.x - jv.x;
+        numtyp delvy = iv.y - jv.y;
+        numtyp delvz = iv.z - jv.z;
+        numtyp dot = delx*delvx + dely*delvy + delz*delvz;
+        numtyp vijeij = dot*rinv;
+
+        #ifndef ONETYPE
+        const numtyp coeffx=coeff[mtype].x;   // a0[itype][jtype]
+        const numtyp coeffy=coeff[mtype].y;   // gamma[itype][jtype]
+        const numtyp coeffz=coeff[mtype].z;   // cut[itype][jtype]
+        const numtyp coeff2x=coeff2[mtype].x; // power[itype][jtype]
+        const numtyp coeff2y=coeff2[mtype].y; // kappa[itype][jtype]
+        const numtyp coeff2z=coeff2[mtype].z; // powerT[itype][jtype]
+        const numtyp coeff2w=coeff2[mtype].w; // cutT[itype][jtype]
+        const numtyp scx = sc[mtype].x;
+        const numtyp scy = sc[mtype].y;
+        const numtyp scz = sc[mtype].z;
+        const numtyp scw = sc[mtype].w;
+        const numtyp kcx = kc[mtype].x;
+        const numtyp kcy = kc[mtype].y;
+        const numtyp kcz = kc[mtype].z;
+        const numtyp kcw = kc[mtype].w;
+        #endif
+
+        const numtyp4 Tcvj = extra[j];
+        numtyp Tj = Tcvj.x;
+        numtyp cvj = Tcvj.y;
+
+        unsigned int tag1=itag, tag2=jtag;
+        if (tag1 > tag2) {
+          tag1 = jtag; tag2 = itag;
+        }
+        numtyp randnum = (numtyp)0.0;
+        saru(tag1, tag2, seed, timestep, randnum);
+
+        numtyp T_ij=(numtyp)0.5*(Ti+Tj);
+        numtyp4 T_pow;
+        T_pow.x = T_ij - (numtyp)1.0;
+        T_pow.y = T_pow.x*T_pow.x;
+        T_pow.z = T_pow.x*T_pow.y;
+        T_pow.w = T_pow.x*T_pow.z;
+
+        numtyp power_d = coeff2x; // power[itype][jtype]
+        if (power_flag) {
+          numtyp factor = (numtyp)1.0;
+          factor += scx*T_pow.x + scy*T_pow.y + scz*T_pow.z + scw*T_pow.w;
+          power_d *= factor;
+        }
+
+        power_d = MAX((numtyp)0.01,power_d);
+        numtyp wc = (numtyp)1.0 - r/coeffz; // cut[itype][jtype]
+        wc = MAX((numtyp)0.0,MIN((numtyp)1.0,wc));
+        numtyp wr = ucl_pow((numtyp)wc, (numtyp)0.5*power_d);
+
+        numtyp kboltz = (numtyp)1.0;
+        numtyp GammaIJ = coeffy; // gamma[itype][jtype]
+        numtyp SigmaIJ = (numtyp)4.0*GammaIJ*kboltz*Ti*Tj/(Ti+Tj);
+        SigmaIJ = ucl_sqrt(SigmaIJ);
+
+        numtyp force =  coeffx*T_ij*wc; // a0[itype][jtype]
+        force -= GammaIJ *wr*wr *dot*rinv;
+        force += SigmaIJ* wr *randnum * dtinvsqrt;
+        #ifndef ONETYPE
+        force *= factor_dpd*rinv;
+        #else
+        force *= rinv;
+        #endif
+
+        f.x+=delx*force;
+        f.y+=dely*force;
+        f.z+=delz*force;
+
+        // heat transfer
+
+        if (r < coeff2w) {
+          numtyp wrT = (numtyp)1.0 - r/coeff2w;
+          wrT = MAX((numtyp)0.0,MIN((numtyp)1.0,wrT));
+          wrT = ucl_pow(wrT, (numtyp)0.5*coeff2z); // powerT[itype][jtype]
+          numtyp randnumT = (numtyp)0;
+          saru(tag1, tag2, seed+tag1+tag2, timestep, randnumT); // randomT->gaussian();
+          randnumT = MAX((numtyp)-5.0,MIN(randnum,(numtyp)5.0));
+
+          numtyp kappaT = coeff2y; // kappa[itype][jtype]
+          if (kappa_flag) {
+            numtyp factor = (numtyp)1.0;
+            factor += kcx*T_pow.x +  kcy*T_pow.y + kcz*T_pow.z + kcw*T_pow.w;
+            kappaT *= factor;
+          }
+
+          numtyp kij = cvi*cvj*kappaT * T_ij*T_ij;
+          numtyp alphaij = ucl_sqrt((numtyp)2.0*kboltz*kij);
+
+          numtyp dQc = kij * wrT*wrT * (Tj - Ti )/(Ti*Tj);
+          numtyp dQd = wr*wr*( GammaIJ * vijeij*vijeij - SigmaIJ*SigmaIJ/mass_itype ) - SigmaIJ * wr *vijeij *randnum;
+          dQd /= (cvi+cvj);
+          numtyp dQr = alphaij * wrT * dtinvsqrt * randnumT;
+          Qi += (dQc + dQd + dQr );
+        }
+
+        if (EVFLAG && eflag) {
+          numtyp e = (numtyp)0.5*coeffx*T_ij*coeffz * wc*wc;
+          #ifndef ONETYPE
+          energy+=factor_dpd*e;
+          #else
+          energy+=e;
+          #endif
+        }
+        if (EVFLAG && vflag) {
+          virial[0] += delx*delx*force;
+          virial[1] += dely*dely*force;
+          virial[2] += delz*delz*force;
+          virial[3] += delx*dely*force;
+          virial[4] += delx*delz*force;
+          virial[5] += dely*delz*force;
+        }
+
+      }
+    } // for nbor
+  } // if ii
+
+  store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, ans,engv);
+  store_heatflux(Qi,ii,inum,tid,t_per_atom,offset,Q);
+}
+
diff --git a/lib/gpu/lal_edpd.h b/lib/gpu/lal_edpd.h
new file mode 100644
index 0000000000..e5f7b0633b
--- /dev/null
+++ b/lib/gpu/lal_edpd.h
@@ -0,0 +1,102 @@
+/***************************************************************************
+                                 edpd.h
+                             -------------------
+                            Trung Dac Nguyen (U Chicago)
+
+  Class for acceleration of the edpd pair style.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : September 2023
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#ifndef LAL_EDPD_H
+#define LAL_EDPD_H
+
+#include "lal_base_dpd.h"
+
+namespace LAMMPS_AL {
+
+template <class numtyp, class acctyp>
+class EDPD : public BaseDPD<numtyp, acctyp> {
+ public:
+  EDPD();
+  ~EDPD();
+
+  /// Clear any previous data and set up for a new LAMMPS run
+  /** \param max_nbors initial number of rows in the neighbor matrix
+    * \param cell_size cutoff + skin
+    * \param gpu_split fraction of particles handled by device
+    *
+    * Returns:
+    * -  0 if successful
+    * - -1 if fix gpu not found
+    * - -3 if there is an out of memory error
+    * - -4 if the GPU library was not compiled for GPU
+    * - -5 Double precision is not supported on card **/
+  int init(const int ntypes, double **host_cutsq, double **host_a0,
+           double **host_gamma, double **host_cut, double **host_power,
+           double **host_kappa, double **host_powerT, double **host_cutT,
+           double ***host_sc, double ***host_kc, double *host_mass,
+           double *host_special_lj, const int power_flag, const int kappa_flag,
+           const int nlocal, const int nall, const int max_nbors,
+           const int maxspecial, const double cell_size, const double gpu_split,
+           FILE *screen);
+
+  /// Clear all host and device data
+  /** \note This is called at the beginning of the init() routine **/
+  void clear();
+
+  /// Returns memory usage on device per atom
+  int bytes_per_atom(const int max_nbors) const;
+
+  /// Total host memory used by library for pair style
+  double host_memory_usage() const;
+
+  void get_extra_data(double *host_T, double *host_cv);
+
+  /// copy Q (flux) from device to host
+  void update_flux(void **flux_ptr);
+
+  // --------------------------- TYPE DATA --------------------------
+
+  /// coeff.x = a0, coeff.y = gamma, coeff.z = cut
+  UCL_D_Vec<numtyp4> coeff;
+  /// coeff2.x = power, coeff2.y = kappa, coeff2.z = powerT, coeff2.w = cutT
+  UCL_D_Vec<numtyp4> coeff2;
+
+  UCL_D_Vec<numtyp4> kc, sc;
+  UCL_D_Vec<numtyp> cutsq;
+
+  /// per-type array
+  UCL_D_Vec<numtyp> mass;
+
+  /// Special LJ values
+  UCL_D_Vec<numtyp> sp_lj, sp_sqrt;
+
+  /// If atom type constants fit in shared memory, use fast kernels
+  bool shared_types;
+
+  /// Number of atom types
+  int _lj_types;
+
+  /// Per-atom arrays
+  UCL_Vector<acctyp,acctyp> Q;
+  int _max_q_size;
+
+  int _power_flag, _kappa_flag;
+
+  /// pointer to host data
+  double *edpd_temp, *edpd_cv;
+
+ private:
+  bool _allocated;
+  int loop(const int eflag, const int vflag);
+};
+
+}
+
+#endif
diff --git a/lib/gpu/lal_edpd_ext.cpp b/lib/gpu/lal_edpd_ext.cpp
new file mode 100644
index 0000000000..a9f60c3941
--- /dev/null
+++ b/lib/gpu/lal_edpd_ext.cpp
@@ -0,0 +1,142 @@
+/***************************************************************************
+                                 edpd_ext.cpp
+                             -------------------
+                            Trung Dac Nguyen (U Chicago)
+
+  Functions for LAMMPS access to edpd acceleration routines.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : September 2023
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#include <iostream>
+#include <cassert>
+#include <cmath>
+
+#include "lal_edpd.h"
+
+using namespace std;
+using namespace LAMMPS_AL;
+
+static EDPD<PRECISION,ACC_PRECISION> EDPDMF;
+
+// ---------------------------------------------------------------------------
+// Allocate memory on host and device and copy constants to device
+// ---------------------------------------------------------------------------
+int edpd_gpu_init(const int ntypes, double **cutsq, double **host_a0,
+                  double **host_gamma, double **host_cut, double **host_power,
+                  double **host_kappa, double **host_powerT, double **host_cutT,
+                  double ***host_sc, double ***host_kc, double *host_mass,
+                  double *special_lj, const int power_flag, const int kappa_flag,
+                  const int inum, const int nall,
+                  const int max_nbors,  const int maxspecial,
+                  const double cell_size, int &gpu_mode, FILE *screen) {
+  EDPDMF.clear();
+  gpu_mode=EDPDMF.device->gpu_mode();
+  double gpu_split=EDPDMF.device->particle_split();
+  int first_gpu=EDPDMF.device->first_device();
+  int last_gpu=EDPDMF.device->last_device();
+  int world_me=EDPDMF.device->world_me();
+  int gpu_rank=EDPDMF.device->gpu_rank();
+  int procs_per_gpu=EDPDMF.device->procs_per_gpu();
+
+  EDPDMF.device->init_message(screen,"edpd",first_gpu,last_gpu);
+
+  bool message=false;
+  if (EDPDMF.device->replica_me()==0 && screen)
+    message=true;
+
+  if (message) {
+    fprintf(screen,"Initializing Device and compiling on process 0...");
+    fflush(screen);
+  }
+
+  int init_ok=0;
+  if (world_me==0)
+    init_ok=EDPDMF.init(ntypes, cutsq, host_a0, host_gamma, host_cut,
+                        host_power, host_kappa, host_powerT,
+                        host_cutT, host_sc, host_kc, host_mass,
+                        special_lj, power_flag, kappa_flag,
+                        inum, nall, max_nbors,  maxspecial,
+                        cell_size, gpu_split, screen);
+
+  EDPDMF.device->world_barrier();
+  if (message)
+    fprintf(screen,"Done.\n");
+
+  for (int i=0; i<procs_per_gpu; i++) {
+    if (message) {
+      if (last_gpu-first_gpu==0)
+        fprintf(screen,"Initializing Device %d on core %d...",first_gpu,i);
+      else
+        fprintf(screen,"Initializing Devices %d-%d on core %d...",first_gpu,
+                last_gpu,i);
+      fflush(screen);
+    }
+    if (gpu_rank==i && world_me!=0)
+      init_ok=EDPDMF.init(ntypes, cutsq, host_a0, host_gamma, host_cut,
+                          host_power, host_kappa, host_powerT, host_cutT,
+                          host_sc, host_kc, host_mass,
+                          special_lj, power_flag, kappa_flag,
+                          inum, nall, max_nbors, maxspecial,
+                          cell_size, gpu_split, screen);
+
+    EDPDMF.device->serialize_init();
+    if (message)
+      fprintf(screen,"Done.\n");
+  }
+  if (message)
+    fprintf(screen,"\n");
+
+  if (init_ok==0)
+    EDPDMF.estimate_gpu_overhead();
+  return init_ok;
+}
+
+void edpd_gpu_clear() {
+  EDPDMF.clear();
+}
+
+int ** edpd_gpu_compute_n(const int ago, const int inum_full, const int nall,
+                         double **host_x, int *host_type, double *sublo,
+                         double *subhi, tagint *tag, int **nspecial,
+                         tagint **special, const bool eflag, const bool vflag,
+                         const bool eatom, const bool vatom, int &host_start,
+                         int **ilist, int **jnum, const double cpu_time, bool &success,
+                         double **host_v, const double dtinvsqrt,
+                         const int seed, const int timestep,
+                         double *boxlo, double *prd) {
+  return EDPDMF.compute(ago, inum_full, nall, host_x, host_type, sublo,
+                        subhi, tag, nspecial, special, eflag, vflag, eatom,
+                        vatom, host_start, ilist, jnum, cpu_time, success,
+                        host_v, dtinvsqrt, seed, timestep, boxlo, prd);
+}
+
+void edpd_gpu_compute(const int ago, const int inum_full, const int nall,
+                      double **host_x, int *host_type, int *ilist, int *numj,
+                      int **firstneigh, const bool eflag, const bool vflag,
+                      const bool eatom, const bool vatom, int &host_start,
+                      const double cpu_time, bool &success, tagint *tag,
+                      double **host_v, const double dtinvsqrt,
+                      const int seed, const int timestep,
+                      const int nlocal, double *boxlo, double *prd) {
+  EDPDMF.compute(ago, inum_full, nall, host_x, host_type, ilist, numj,
+                firstneigh, eflag, vflag, eatom, vatom, host_start, cpu_time, success,
+                tag, host_v, dtinvsqrt, seed, timestep, nlocal, boxlo, prd);
+}
+
+void edpd_gpu_get_extra_data(double *host_T, double *host_cv) {
+  EDPDMF.get_extra_data(host_T, host_cv);
+}
+
+void edpd_gpu_update_flux(void **flux_ptr) {
+  EDPDMF.update_flux(flux_ptr);
+}
+
+double edpd_gpu_bytes() {
+  return EDPDMF.host_memory_usage();
+}
diff --git a/lib/gpu/lal_hippo.cpp b/lib/gpu/lal_hippo.cpp
index 8d6ad5dfb2..3511d82b00 100644
--- a/lib/gpu/lal_hippo.cpp
+++ b/lib/gpu/lal_hippo.cpp
@@ -603,13 +603,7 @@ int HippoT::polar_real(const int eflag, const int vflag) {
 
   const int BX=this->block_size();
   const int GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
-  /*
-  const int cus = this->device->gpu->cus();
-  while (GX < cus && GX > 1) {
-    BX /= 2;
-    GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
-  }
-  */
+
   this->time_pair.start();
 
   // Build the short neighbor list if not done yet
diff --git a/lib/gpu/lal_hippo.cu b/lib/gpu/lal_hippo.cu
index 7de7bd594f..01ad8e753a 100644
--- a/lib/gpu/lal_hippo.cu
+++ b/lib/gpu/lal_hippo.cu
@@ -1225,7 +1225,9 @@ __kernel void k_hippo_udirect2b(const __global numtyp4 *restrict x_,
   atom_info(t_per_atom,ii,tid,offset);
 
   int n_stride;
+#if (SHUFFLE_AVAIL == 0)
   local_allocate_store_charge();
+#endif
 
   acctyp _fieldp[6];
   for (int l=0; l<6; l++) _fieldp[l]=(acctyp)0;
@@ -1410,7 +1412,9 @@ __kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
   atom_info(t_per_atom,ii,tid,offset);
 
   int n_stride;
+#if (SHUFFLE_AVAIL == 0)
   local_allocate_store_charge();
+#endif
 
   acctyp _fieldp[6];
   for (int l=0; l<6; l++) _fieldp[l]=(acctyp)0;
@@ -2452,13 +2456,13 @@ __kernel void k_hippo_special15(__global int * dev_nbor,
                           const __global tagint *restrict special15,
                           const int inum, const int nall, const int nbor_pitch,
                           const int t_per_atom) {
-  int tid, ii, offset, n_stride, i;
+  int tid, ii, offset, n_stride, j;
   atom_info(t_per_atom,ii,tid,offset);
 
   if (ii<inum) {
 
     int numj, nbor, nbor_end;
-    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
+    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,j,numj,
               n_stride,nbor_end,nbor);
 
     int n15 = nspecial15[ii];
@@ -2467,7 +2471,7 @@ __kernel void k_hippo_special15(__global int * dev_nbor,
 
       int sj=dev_packed[nbor];
       int which = sj >> SBBITS & 3;
-      int j = sj & NEIGHMASK;
+      j = sj & NEIGHMASK;
       tagint jtag = tag[j];
 
       if (!which) {
diff --git a/lib/gpu/lal_lj_coul_long.h b/lib/gpu/lal_lj_coul_long.h
index bc4fce40a5..ace5a26339 100644
--- a/lib/gpu/lal_lj_coul_long.h
+++ b/lib/gpu/lal_lj_coul_long.h
@@ -78,7 +78,7 @@ class LJCoulLong : public BaseCharge<numtyp, acctyp> {
 
   numtyp _cut_coulsq, _qqrd2e, _g_ewald;
 
- private:
+protected:
   bool _allocated;
   int loop(const int eflag, const int vflag);
 };
diff --git a/lib/gpu/lal_lj_coul_long_soft.cpp b/lib/gpu/lal_lj_coul_long_soft.cpp
new file mode 100644
index 0000000000..80eaaca94a
--- /dev/null
+++ b/lib/gpu/lal_lj_coul_long_soft.cpp
@@ -0,0 +1,174 @@
+/***************************************************************************
+                            lj_coul_long_soft.cpp
+                             -------------------
+                            Trung Nguyen (U Chicago)
+
+  Class for acceleration of the lj/cut/coul/long/soft pair style.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                :
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#if defined(USE_OPENCL)
+#include "lj_coul_long_soft_cl.h"
+#elif defined(USE_CUDART)
+const char *lj_coul_long_soft=0;
+#else
+#include "lj_coul_long_soft_cubin.h"
+#endif
+
+#include "lal_lj_coul_long_soft.h"
+#include <cassert>
+namespace LAMMPS_AL {
+#define LJCoulLongSoftT LJCoulLongSoft<numtyp, acctyp>
+
+extern Device<PRECISION,ACC_PRECISION> device;
+
+template <class numtyp, class acctyp>
+LJCoulLongSoftT::LJCoulLongSoft() : BaseCharge<numtyp,acctyp>(),
+                                    _allocated(false) {
+}
+
+template <class numtyp, class acctyp>
+LJCoulLongSoftT::~LJCoulLongSoft() {
+  clear();
+}
+
+template <class numtyp, class acctyp>
+int LJCoulLongSoftT::bytes_per_atom(const int max_nbors) const {
+  return this->bytes_per_atom_atomic(max_nbors);
+}
+
+template <class numtyp, class acctyp>
+int LJCoulLongSoftT::init(const int ntypes,
+                           double **host_cutsq, double **host_lj1,
+                           double **host_lj2, double **host_lj3,
+                           double **host_lj4, double **host_offset, double **host_epsilon,
+                           double *host_special_lj, const int nlocal,
+                           const int nall, const int max_nbors,
+                           const int maxspecial, const double cell_size,
+                           const double gpu_split, FILE *_screen,
+                           double **host_cut_ljsq, const double host_cut_coulsq,
+                           double *host_special_coul, const double qqrd2e,
+                           const double g_ewald) {
+  int success;
+  success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
+                            _screen,lj_coul_long_soft,"k_lj_coul_long_soft");
+  if (success!=0)
+    return success;
+
+  // If atom type constants fit in shared memory use fast kernel
+  int lj_types=ntypes;
+  shared_types=false;
+  int max_shared_types=this->device->max_shared_types();
+  if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) {
+    lj_types=max_shared_types;
+    shared_types=true;
+  }
+  _lj_types=lj_types;
+
+  // Allocate a host write buffer for data initialization
+  UCL_H_Vec<numtyp> host_write(lj_types*lj_types*32,*(this->ucl_device),
+                               UCL_WRITE_ONLY);
+
+  for (int i=0; i<lj_types*lj_types; i++)
+    host_write[i]=0.0;
+
+  lj1.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
+  this->atom->type_pack4(ntypes,lj_types,lj1,host_write,host_lj1,host_lj2,
+           host_cutsq, host_cut_ljsq);
+
+  lj3.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
+  this->atom->type_pack4(ntypes,lj_types,lj3,host_write,host_lj3,host_lj4,
+                         host_offset, host_epsilon);
+
+  sp_lj.alloc(8,*(this->ucl_device),UCL_READ_ONLY);
+  for (int i=0; i<4; i++) {
+    host_write[i]=host_special_lj[i];
+    host_write[i+4]=host_special_coul[i];
+  }
+  ucl_copy(sp_lj,host_write,8,false);
+
+  _cut_coulsq=host_cut_coulsq;
+  _qqrd2e=qqrd2e;
+  _g_ewald=g_ewald;
+
+  _allocated=true;
+  this->_max_bytes=lj1.row_bytes()+lj3.row_bytes()+sp_lj.row_bytes();
+  return 0;
+}
+
+template <class numtyp, class acctyp>
+void LJCoulLongSoftT::reinit(const int ntypes, double **host_cutsq, double **host_lj1,
+                         double **host_lj2, double **host_lj3, double **host_lj4,
+                         double **host_offset, double **host_epsilon, double **host_cut_ljsq) {
+  // Allocate a host write buffer for data initialization
+  UCL_H_Vec<numtyp> host_write(_lj_types*_lj_types*32,*(this->ucl_device),
+                               UCL_WRITE_ONLY);
+
+  for (int i=0; i<_lj_types*_lj_types; i++)
+    host_write[i]=0.0;
+
+  this->atom->type_pack4(ntypes,_lj_types,lj1,host_write,host_lj1,host_lj2,
+                         host_cutsq, host_cut_ljsq);
+  this->atom->type_pack4(ntypes,_lj_types,lj3,host_write,host_lj3,host_lj4,
+                         host_offset, host_epsilon);
+}
+
+template <class numtyp, class acctyp>
+void LJCoulLongSoftT::clear() {
+  if (!_allocated)
+    return;
+  _allocated=false;
+
+  lj1.clear();
+  lj3.clear();
+  sp_lj.clear();
+  this->clear_atomic();
+}
+
+template <class numtyp, class acctyp>
+double LJCoulLongSoftT::host_memory_usage() const {
+  return this->host_memory_usage_atomic()+sizeof(LJCoulLongSoft<numtyp,acctyp>);
+}
+
+// ---------------------------------------------------------------------------
+// Calculate energies, forces, and torques
+// ---------------------------------------------------------------------------
+template <class numtyp, class acctyp>
+int LJCoulLongSoftT::loop(const int eflag, const int vflag) {
+  // Compute the block size and grid size to keep all cores busy
+  const int BX=this->block_size();
+  int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
+                               (BX/this->_threads_per_atom)));
+
+  int ainum=this->ans->inum();
+  int nbor_pitch=this->nbor->nbor_pitch();
+  this->time_pair.start();
+  if (shared_types) {
+    this->k_pair_sel->set_size(GX,BX);
+    this->k_pair_sel->run(&this->atom->x, &lj1, &lj3, &sp_lj,
+                          &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+                          &this->ans->force, &this->ans->engv, &eflag,
+                          &vflag, &ainum, &nbor_pitch, &this->atom->q,
+                          &_cut_coulsq, &_qqrd2e, &_g_ewald,
+                          &this->_threads_per_atom);
+  } else {
+    this->k_pair.set_size(GX,BX);
+    this->k_pair.run(&this->atom->x, &lj1, &lj3,
+                     &_lj_types, &sp_lj, &this->nbor->dev_nbor,
+                     &this->_nbor_data->begin(), &this->ans->force,
+                     &this->ans->engv, &eflag, &vflag, &ainum,
+                     &nbor_pitch, &this->atom->q, &_cut_coulsq,
+                     &_qqrd2e, &_g_ewald, &this->_threads_per_atom);
+  }
+  this->time_pair.stop();
+  return GX;
+}
+
+template class LJCoulLongSoft<PRECISION,ACC_PRECISION>;
+}
diff --git a/lib/gpu/lal_lj_coul_long_soft.cu b/lib/gpu/lal_lj_coul_long_soft.cu
new file mode 100644
index 0000000000..e311bb5d3b
--- /dev/null
+++ b/lib/gpu/lal_lj_coul_long_soft.cu
@@ -0,0 +1,290 @@
+// **************************************************************************
+//                            lj_coul_long_soft.cu
+//                             -------------------
+//                           Trung Nguyen (U Chicago)
+//
+//  Device code for acceleration of the lj/cut/coul/long/soft pair style
+//
+// __________________________________________________________________________
+//    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+// __________________________________________________________________________
+//
+//    begin                :
+//    email                : ndactrung@gmail.com
+// ***************************************************************************
+
+#if defined(NV_KERNEL) || defined(USE_HIP)
+
+#include "lal_aux_fun1.h"
+#ifndef _DOUBLE_DOUBLE
+_texture( pos_tex,float4);
+_texture( q_tex,float);
+#else
+_texture_2d( pos_tex,int4);
+_texture( q_tex,int2);
+#endif
+
+#else
+#define pos_tex x_
+#define q_tex q_
+#endif
+
+__kernel void k_lj_coul_long_soft(const __global numtyp4 *restrict x_,
+                             const __global numtyp4 *restrict lj1,
+                             const __global numtyp4 *restrict lj3,
+                             const int lj_types,
+                             const __global numtyp *restrict sp_lj_in,
+                             const __global int *dev_nbor,
+                             const __global int *dev_packed,
+                             __global acctyp3 *restrict ans,
+                             __global acctyp *restrict engv,
+                             const int eflag, const int vflag, const int inum,
+                             const int nbor_pitch,
+                             const __global numtyp *restrict q_,
+                             const numtyp cut_coulsq, const numtyp qqrd2e,
+                             const numtyp g_ewald, const int t_per_atom) {
+  int tid, ii, offset;
+  atom_info(t_per_atom,ii,tid,offset);
+
+  __local numtyp sp_lj[8];
+  int n_stride;
+  local_allocate_store_charge();
+
+  sp_lj[0]=sp_lj_in[0];
+  sp_lj[1]=sp_lj_in[1];
+  sp_lj[2]=sp_lj_in[2];
+  sp_lj[3]=sp_lj_in[3];
+  sp_lj[4]=sp_lj_in[4];
+  sp_lj[5]=sp_lj_in[5];
+  sp_lj[6]=sp_lj_in[6];
+  sp_lj[7]=sp_lj_in[7];
+
+  acctyp3 f;
+  f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
+  acctyp energy, e_coul, virial[6];
+  if (EVFLAG) {
+    energy=(acctyp)0;
+    e_coul=(acctyp)0;
+    for (int i=0; i<6; i++) virial[i]=(acctyp)0;
+  }
+
+  if (ii<inum) {
+    int nbor, nbor_end;
+    int i, numj;
+    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
+              n_stride,nbor_end,nbor);
+
+    numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
+    numtyp qtmp; fetch(qtmp,i,q_tex);
+    int itype=ix.w;
+
+    for ( ; nbor<nbor_end; nbor+=n_stride) {
+      ucl_prefetch(dev_packed+nbor+n_stride);
+      int j=dev_packed[nbor];
+
+      numtyp factor_lj, factor_coul;
+      factor_lj = sp_lj[sbmask(j)];
+      factor_coul = (numtyp)1.0-sp_lj[sbmask(j)+4];
+      j &= NEIGHMASK;
+
+      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
+      int jtype=jx.w;
+
+      // Compute r12
+      numtyp delx = ix.x-jx.x;
+      numtyp dely = ix.y-jx.y;
+      numtyp delz = ix.z-jx.z;
+      numtyp rsq = delx*delx+dely*dely+delz*delz;
+
+      int mtype=itype*lj_types+jtype;
+      if (rsq<lj1[mtype].z) {
+        numtyp r2inv=ucl_recip(rsq);
+        numtyp forcecoul, force_lj, force, prefactor, _erfc;
+        numtyp denc, denlj, r4sig6;
+
+        if (rsq < lj1[mtype].w) {
+          r4sig6 = rsq*rsq / lj1[mtype].y;
+          denlj = lj3[mtype].x + rsq*r4sig6;
+          force_lj = lj1[mtype].x * lj3[mtype].w *
+            ((numtyp)48.0*r4sig6/(denlj*denlj*denlj) - (numtyp)24.0*r4sig6/(denlj*denlj));
+        } else
+          force_lj = (numtyp)0.0;
+
+        if (rsq < cut_coulsq) {
+          numtyp r = ucl_rsqrt(r2inv);
+          numtyp grij = g_ewald * r;
+          numtyp expm2 = ucl_exp(-grij*grij);
+          numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
+          _erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+          fetch(prefactor,j,q_tex);
+
+          denc = ucl_sqrt(lj3[mtype].y + rsq);
+          prefactor *= qqrd2e * lj1[mtype].x * qtmp / (denc*denc*denc);
+
+          forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2-factor_coul);
+        } else
+          forcecoul = (numtyp)0.0;
+
+        force = factor_lj * force_lj + forcecoul;
+
+        f.x+=delx*force;
+        f.y+=dely*force;
+        f.z+=delz*force;
+
+        if (EVFLAG && eflag) {
+          if (rsq < cut_coulsq) {
+            prefactor *= (denc*denc);
+            e_coul += prefactor*(_erfc-factor_coul);
+          }
+          if (rsq < lj1[mtype].w) {
+            numtyp e= lj1[mtype].x * (numtyp)4.0 * lj3[mtype].w *
+              ((numtyp)1.0/(denlj*denlj) - (numtyp)1.0/denlj);
+            energy+=factor_lj*(e-lj3[mtype].z);
+          }
+        }
+        if (EVFLAG && vflag) {
+          virial[0] += delx*delx*force;
+          virial[1] += dely*dely*force;
+          virial[2] += delz*delz*force;
+          virial[3] += delx*dely*force;
+          virial[4] += delx*delz*force;
+          virial[5] += dely*delz*force;
+        }
+      }
+
+    } // for nbor
+  } // if ii
+  store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,
+                  vflag,ans,engv);
+}
+
+__kernel void k_lj_coul_long_soft_fast(const __global numtyp4 *restrict x_,
+                                  const __global numtyp4 *restrict lj1_in,
+                                  const __global numtyp4 *restrict lj3_in,
+                                  const __global numtyp *restrict sp_lj_in,
+                                  const __global int *dev_nbor,
+                                  const __global int *dev_packed,
+                                  __global acctyp3 *restrict ans,
+                                  __global acctyp *restrict engv,
+                                  const int eflag, const int vflag,
+                                  const int inum,  const int nbor_pitch,
+                                  const __global numtyp *restrict q_,
+                                  const numtyp cut_coulsq, const numtyp qqrd2e,
+                                  const numtyp g_ewald, const int t_per_atom) {
+  int tid, ii, offset;
+  atom_info(t_per_atom,ii,tid,offset);
+
+  __local numtyp4 lj1[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
+  __local numtyp4 lj3[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
+  __local numtyp sp_lj[8];
+  int n_stride;
+  local_allocate_store_charge();
+
+  if (tid<8)
+    sp_lj[tid]=sp_lj_in[tid];
+  if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES) {
+    lj1[tid]=lj1_in[tid];
+    if (EVFLAG && eflag)
+      lj3[tid]=lj3_in[tid];
+  }
+
+  acctyp3 f;
+  f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
+  acctyp energy, e_coul, virial[6];
+  if (EVFLAG) {
+    energy=(acctyp)0;
+    e_coul=(acctyp)0;
+    for (int i=0; i<6; i++) virial[i]=(acctyp)0;
+  }
+
+  __syncthreads();
+
+  if (ii<inum) {
+    int nbor, nbor_end;
+    int i, numj;
+    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
+              n_stride,nbor_end,nbor);
+
+    numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
+    numtyp qtmp; fetch(qtmp,i,q_tex);
+    int iw=ix.w;
+    int itype=fast_mul((int)MAX_SHARED_TYPES,iw);
+
+    for ( ; nbor<nbor_end; nbor+=n_stride) {
+      ucl_prefetch(dev_packed+nbor+n_stride);
+      int j=dev_packed[nbor];
+
+      numtyp factor_lj, factor_coul;
+      factor_lj = sp_lj[sbmask(j)];
+      factor_coul = (numtyp)1.0-sp_lj[sbmask(j)+4];
+      j &= NEIGHMASK;
+
+      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
+      int mtype=itype+jx.w;
+
+      // Compute r12
+      numtyp delx = ix.x-jx.x;
+      numtyp dely = ix.y-jx.y;
+      numtyp delz = ix.z-jx.z;
+      numtyp rsq = delx*delx+dely*dely+delz*delz;
+
+      if (rsq<lj1[mtype].z) {
+        numtyp forcecoul, force_lj, force, prefactor, _erfc;
+        numtyp denc, denlj, r4sig6;
+
+        if (rsq < lj1[mtype].w) {
+          r4sig6 = rsq*rsq / lj1[mtype].y;
+          denlj = lj3[mtype].x + rsq*r4sig6;
+          force_lj = lj1[mtype].x * lj3[mtype].w *
+            ((numtyp)48.0*r4sig6/(denlj*denlj*denlj) - (numtyp)24.0*r4sig6/(denlj*denlj));
+        } else
+          force_lj = (numtyp)0.0;
+
+        if (rsq < cut_coulsq) {
+          numtyp r = ucl_sqrt(rsq);
+          numtyp grij = g_ewald * r;
+          numtyp expm2 = ucl_exp(-grij*grij);
+          numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
+          _erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+          fetch(prefactor,j,q_tex);
+
+          denc = ucl_sqrt(lj3[mtype].y + rsq);
+          prefactor *= qqrd2e * lj1[mtype].x * qtmp / (denc*denc*denc);
+
+          forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2-factor_coul);
+        } else
+          forcecoul = (numtyp)0.0;
+
+        force = forcecoul + factor_lj*force_lj;
+
+        f.x+=delx*force;
+        f.y+=dely*force;
+        f.z+=delz*force;
+
+        if (EVFLAG && eflag) {
+          if (rsq < cut_coulsq) {
+            prefactor *= (denc*denc);
+            e_coul += prefactor*(_erfc-factor_coul);
+          }
+          if (rsq < lj1[mtype].w) {
+            numtyp e= lj1[mtype].x * (numtyp)4.0 * lj3[mtype].w *
+              ((numtyp)1.0/(denlj*denlj) - (numtyp)1.0/denlj);
+            energy+=factor_lj*(e-lj3[mtype].z);
+          }
+        }
+        if (EVFLAG && vflag) {
+          virial[0] += delx*delx*force;
+          virial[1] += dely*dely*force;
+          virial[2] += delz*delz*force;
+          virial[3] += delx*dely*force;
+          virial[4] += delx*delz*force;
+          virial[5] += dely*delz*force;
+        }
+      }
+
+    } // for nbor
+  } // if ii
+  store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,
+                  vflag,ans,engv);
+}
+
diff --git a/lib/gpu/lal_lj_coul_long_soft.h b/lib/gpu/lal_lj_coul_long_soft.h
new file mode 100644
index 0000000000..b3d4bff4a4
--- /dev/null
+++ b/lib/gpu/lal_lj_coul_long_soft.h
@@ -0,0 +1,88 @@
+/***************************************************************************
+                             lj_coul_long_soft.h
+                             -------------------
+                            Trung Nguyen (U Chicago)
+
+  Class for acceleration of the lj/cut/coul/long/soft pair style.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                :
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#ifndef LAL_LJ_COUL_LONG_SOFT_H
+#define LAL_LJ_COUL_LONG_SOFT_H
+
+#include "lal_base_charge.h"
+
+namespace LAMMPS_AL {
+
+template <class numtyp, class acctyp>
+class LJCoulLongSoft : public BaseCharge<numtyp, acctyp> {
+ public:
+  LJCoulLongSoft();
+  ~LJCoulLongSoft();
+
+  /// Clear any previous data and set up for a new LAMMPS run
+  /** \param max_nbors initial number of rows in the neighbor matrix
+    * \param cell_size cutoff + skin
+    * \param gpu_split fraction of particles handled by device
+    *
+    * Returns:
+    * -  0 if successful
+    * - -1 if fix gpu not found
+    * - -3 if there is an out of memory error
+    * - -4 if the GPU library was not compiled for GPU
+    * - -5 Double precision is not supported on card **/
+  int init(const int ntypes, double **host_cutsq,
+           double **host_lj1, double **host_lj2, double **host_lj3,
+           double **host_lj4, double **host_offset, double **host_epsilon, double *host_special_lj,
+           const int nlocal, const int nall, const int max_nbors,
+           const int maxspecial, const double cell_size,
+           const double gpu_split, FILE *screen, double **host_cut_ljsq,
+           const double host_cut_coulsq, double *host_special_coul,
+           const double qqrd2e, const double g_ewald);
+
+  /// Send updated coeffs from host to device (to be compatible with fix adapt)
+  void reinit(const int ntypes, double **host_cutsq,
+              double **host_lj1, double **host_lj2, double **host_lj3,
+              double **host_lj4, double **host_offset, double **host_epsilon, double **host_cut_ljsq);
+
+  /// Clear all host and device data
+  /** \note This is called at the beginning of the init() routine **/
+  void clear();
+
+  /// Returns memory usage on device per atom
+  int bytes_per_atom(const int max_nbors) const;
+
+  /// Total host memory used by library for pair style
+  double host_memory_usage() const;
+
+  // --------------------------- TYPE DATA --------------------------
+
+  /// lj1.x = lj1, lj1.y = lj2, lj1.z = cutsq, lj1.w = cutsq_vdw
+  UCL_D_Vec<numtyp4> lj1;
+  /// lj3.x = lj3, lj3.y = lj4, lj3.z = offset, lj3.w = epsilon
+  UCL_D_Vec<numtyp4> lj3;
+  /// Special LJ values [0-3] and Special Coul values [4-7]
+  UCL_D_Vec<numtyp> sp_lj;
+
+  /// If atom type constants fit in shared memory, use fast kernels
+  bool shared_types;
+
+  /// Number of atom types
+  int _lj_types;
+
+  numtyp _cut_coulsq, _qqrd2e, _g_ewald;
+
+protected:
+  bool _allocated;
+  int loop(const int eflag, const int vflag);
+};
+
+}
+
+#endif
diff --git a/lib/gpu/lal_lj_coul_long_soft_ext.cpp b/lib/gpu/lal_lj_coul_long_soft_ext.cpp
new file mode 100644
index 0000000000..cb2657c03b
--- /dev/null
+++ b/lib/gpu/lal_lj_coul_long_soft_ext.cpp
@@ -0,0 +1,151 @@
+/***************************************************************************
+                            lj_coul_long_soft_ext.cpp
+                            -------------------------
+                            Trung Nguyen (U Chicago)
+
+  Functions for LAMMPS access to lj/cut/coul/long/soft acceleration routines.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                :
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#include <iostream>
+#include <cassert>
+#include <cmath>
+
+#include "lal_lj_coul_long_soft.h"
+
+using namespace std;
+using namespace LAMMPS_AL;
+
+static LJCoulLongSoft<PRECISION,ACC_PRECISION> LJCLSMF;
+
+// ---------------------------------------------------------------------------
+// Allocate memory on host and device and copy constants to device
+// ---------------------------------------------------------------------------
+int ljcls_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
+                  double **host_lj2, double **host_lj3, double **host_lj4,
+                  double **offset, double **epsilon, double *special_lj, const int inum,
+                  const int nall, const int max_nbors, const int maxspecial,
+                  const double cell_size, int &gpu_mode, FILE *screen,
+                  double **host_cut_ljsq, double host_cut_coulsq,
+                  double *host_special_coul, const double qqrd2e,
+                  const double g_ewald) {
+  LJCLSMF.clear();
+  gpu_mode=LJCLSMF.device->gpu_mode();
+  double gpu_split=LJCLSMF.device->particle_split();
+  int first_gpu=LJCLSMF.device->first_device();
+  int last_gpu=LJCLSMF.device->last_device();
+  int world_me=LJCLSMF.device->world_me();
+  int gpu_rank=LJCLSMF.device->gpu_rank();
+  int procs_per_gpu=LJCLSMF.device->procs_per_gpu();
+
+  LJCLSMF.device->init_message(screen,"lj/cut/coul/long/soft",first_gpu,last_gpu);
+
+  bool message=false;
+  if (LJCLSMF.device->replica_me()==0 && screen)
+    message=true;
+
+  if (message) {
+    fprintf(screen,"Initializing Device and compiling on process 0...");
+    fflush(screen);
+  }
+
+  int init_ok=0;
+  if (world_me==0)
+    init_ok=LJCLSMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
+                        offset, epsilon, special_lj, inum, nall, max_nbors, maxspecial,
+                        cell_size, gpu_split, screen, host_cut_ljsq,
+                        host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
+
+  LJCLSMF.device->world_barrier();
+  if (message)
+    fprintf(screen,"Done.\n");
+
+  for (int i=0; i<procs_per_gpu; i++) {
+    if (message) {
+      if (last_gpu-first_gpu==0)
+        fprintf(screen,"Initializing Device %d on core %d...",first_gpu,i);
+      else
+        fprintf(screen,"Initializing Devices %d-%d on core %d...",first_gpu,
+                last_gpu,i);
+      fflush(screen);
+    }
+    if (gpu_rank==i && world_me!=0)
+      init_ok=LJCLSMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
+                          offset, epsilon, special_lj, inum, nall, max_nbors, maxspecial,
+                          cell_size, gpu_split, screen, host_cut_ljsq,
+                          host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
+
+    LJCLSMF.device->gpu_barrier();
+    if (message)
+      fprintf(screen,"Done.\n");
+  }
+  if (message)
+    fprintf(screen,"\n");
+
+  if (init_ok==0)
+    LJCLSMF.estimate_gpu_overhead();
+  return init_ok;
+}
+
+// ---------------------------------------------------------------------------
+// Copy updated coeffs from host to device
+// ---------------------------------------------------------------------------
+void ljcls_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
+                    double **host_lj2, double **host_lj3, double **host_lj4,
+                    double **offset, double **epsilon, double **host_cut_ljsq) {
+  int world_me=LJCLSMF.device->world_me();
+  int gpu_rank=LJCLSMF.device->gpu_rank();
+  int procs_per_gpu=LJCLSMF.device->procs_per_gpu();
+
+  if (world_me==0)
+    LJCLSMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
+                  offset, epsilon, host_cut_ljsq);
+  LJCLSMF.device->world_barrier();
+
+  for (int i=0; i<procs_per_gpu; i++) {
+    if (gpu_rank==i && world_me!=0)
+      LJCLSMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
+                    offset, epsilon, host_cut_ljsq);
+    LJCLSMF.device->gpu_barrier();
+  }
+}
+
+void ljcls_gpu_clear() {
+  LJCLSMF.clear();
+}
+
+int** ljcls_gpu_compute_n(const int ago, const int inum_full,
+                         const int nall, double **host_x, int *host_type,
+                         double *sublo, double *subhi, tagint *tag, int **nspecial,
+                         tagint **special, const bool eflag, const bool vflag,
+                         const bool eatom, const bool vatom, int &host_start,
+                         int **ilist, int **jnum,  const double cpu_time,
+                         bool &success, double *host_q, double *boxlo,
+                         double *prd) {
+  return LJCLSMF.compute(ago, inum_full, nall, host_x, host_type, sublo,
+                        subhi, tag, nspecial, special, eflag, vflag, eatom,
+                        vatom, host_start, ilist, jnum, cpu_time, success,
+                        host_q, boxlo, prd);
+}
+
+void ljcls_gpu_compute(const int ago, const int inum_full, const int nall,
+                      double **host_x, int *host_type, int *ilist, int *numj,
+                      int **firstneigh, const bool eflag, const bool vflag,
+                      const bool eatom, const bool vatom, int &host_start,
+                      const double cpu_time, bool &success, double *host_q,
+                      const int nlocal, double *boxlo, double *prd) {
+  LJCLSMF.compute(ago,inum_full,nall,host_x,host_type,ilist,numj,
+                firstneigh,eflag,vflag,eatom,vatom,host_start,cpu_time,success,
+                host_q,nlocal,boxlo,prd);
+}
+
+double ljcls_gpu_bytes() {
+  return LJCLSMF.host_memory_usage();
+}
+
diff --git a/lib/gpu/lal_lj_coul_soft.cpp b/lib/gpu/lal_lj_coul_soft.cpp
new file mode 100644
index 0000000000..9ee6486817
--- /dev/null
+++ b/lib/gpu/lal_lj_coul_soft.cpp
@@ -0,0 +1,157 @@
+/***************************************************************************
+                                 lj_coul_soft.cpp
+                             -------------------
+                            Trung Nguyen (U Chicago)
+
+  Class for acceleration of the lj/cut/coul/cut/soft pair style.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                :
+    email                : ndtrung@uchicago.edu
+ ***************************************************************************/
+
+#if defined(USE_OPENCL)
+#include "lj_coul_soft_cl.h"
+#elif defined(USE_CUDART)
+const char *lj_coul_soft=0;
+#else
+#include "lj_coul_soft_cubin.h"
+#endif
+
+#include "lal_lj_coul_soft.h"
+#include <cassert>
+namespace LAMMPS_AL {
+#define LJCoulSoftT LJCoulSoft<numtyp, acctyp>
+
+extern Device<PRECISION,ACC_PRECISION> device;
+
+template <class numtyp, class acctyp>
+LJCoulSoftT::LJCoulSoft() : BaseCharge<numtyp,acctyp>(),
+                                    _allocated(false) {
+}
+
+template <class numtyp, class acctyp>
+LJCoulSoftT::~LJCoulSoft() {
+  clear();
+}
+
+template <class numtyp, class acctyp>
+int LJCoulSoftT::bytes_per_atom(const int max_nbors) const {
+  return this->bytes_per_atom_atomic(max_nbors);
+}
+
+template <class numtyp, class acctyp>
+int LJCoulSoftT::init(const int ntypes,
+                          double **host_cutsq, double **host_lj1,
+                          double **host_lj2, double **host_lj3,
+                          double **host_lj4, double **host_offset, double **host_epsilon,
+                          double *host_special_lj, const int nlocal,
+                          const int nall, const int max_nbors,
+                          const int maxspecial, const double cell_size,
+                          const double gpu_split, FILE *_screen,
+                          double **host_cut_ljsq, double **host_cut_coulsq,
+                          double *host_special_coul, const double qqrd2e) {
+  int success;
+  success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
+                            _screen,lj_coul_soft,"k_lj_coul_soft");
+  if (success!=0)
+    return success;
+
+  // If atom type constants fit in shared memory use fast kernel
+  int lj_types=ntypes;
+  shared_types=false;
+  int max_shared_types=this->device->max_shared_types();
+  if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) {
+    lj_types=max_shared_types;
+    shared_types=true;
+  }
+  _lj_types=lj_types;
+
+  // Allocate a host write buffer for data initialization
+  UCL_H_Vec<numtyp> host_write(lj_types*lj_types*32,*(this->ucl_device),
+                               UCL_WRITE_ONLY);
+
+  for (int i=0; i<lj_types*lj_types; i++)
+    host_write[i]=0.0;
+
+  lj1.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
+  this->atom->type_pack4(ntypes,lj_types,lj1,host_write,host_lj1,host_lj2,
+                         host_cut_ljsq, host_cut_coulsq);
+
+  lj3.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
+  this->atom->type_pack4(ntypes,lj_types,lj3,host_write,host_lj3,host_lj4,
+                         host_offset, host_epsilon);
+
+  cutsq.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
+  this->atom->type_pack1(ntypes,lj_types,cutsq,host_write,host_cutsq);
+
+  sp_lj.alloc(8,*(this->ucl_device),UCL_READ_ONLY);
+  for (int i=0; i<4; i++) {
+    host_write[i]=host_special_lj[i];
+    host_write[i+4]=host_special_coul[i];
+  }
+  ucl_copy(sp_lj,host_write,8,false);
+
+  _qqrd2e=qqrd2e;
+
+  _allocated=true;
+  this->_max_bytes=lj1.row_bytes()+lj3.row_bytes()+cutsq.row_bytes()+
+                   sp_lj.row_bytes();
+  return 0;
+}
+
+template <class numtyp, class acctyp>
+void LJCoulSoftT::clear() {
+  if (!_allocated)
+    return;
+  _allocated=false;
+
+  lj1.clear();
+  lj3.clear();
+  cutsq.clear();
+  sp_lj.clear();
+  this->clear_atomic();
+}
+
+template <class numtyp, class acctyp>
+double LJCoulSoftT::host_memory_usage() const {
+  return this->host_memory_usage_atomic()+sizeof(LJCoulSoft<numtyp,acctyp>);
+}
+
+// ---------------------------------------------------------------------------
+// Calculate energies, forces, and torques
+// ---------------------------------------------------------------------------
+template <class numtyp, class acctyp>
+int LJCoulSoftT::loop(const int eflag, const int vflag) {
+  // Compute the block size and grid size to keep all cores busy
+  const int BX=this->block_size();
+  int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
+                               (BX/this->_threads_per_atom)));
+
+  int ainum=this->ans->inum();
+  int nbor_pitch=this->nbor->nbor_pitch();
+  this->time_pair.start();
+  if (shared_types) {
+    this->k_pair_sel->set_size(GX,BX);
+    this->k_pair_sel->run(&this->atom->x, &lj1, &lj3, &sp_lj,
+                          &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+                          &this->ans->force, &this->ans->engv, &eflag,
+                          &vflag, &ainum, &nbor_pitch, &this->atom->q,
+                          &cutsq, &_qqrd2e, &this->_threads_per_atom);
+  } else {
+    this->k_pair.set_size(GX,BX);
+    this->k_pair.run(&this->atom->x, &lj1, &lj3, &_lj_types, &sp_lj,
+                     &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+                     &this->ans->force, &this->ans->engv,
+                     &eflag, &vflag, &ainum, &nbor_pitch, &this->atom->q,
+                     &cutsq, &_qqrd2e, &this->_threads_per_atom);
+  }
+  this->time_pair.stop();
+  return GX;
+}
+
+template class LJCoulSoft<PRECISION,ACC_PRECISION>;
+}
diff --git a/lib/gpu/lal_lj_coul_soft.cu b/lib/gpu/lal_lj_coul_soft.cu
new file mode 100644
index 0000000000..1fc564bde6
--- /dev/null
+++ b/lib/gpu/lal_lj_coul_soft.cu
@@ -0,0 +1,276 @@
+// **************************************************************************
+//                               lj_coul_soft.cu
+//                             -------------------
+//                           Trung Nguyen (U Chicago)
+//
+//  Device code for acceleration of the lj/coul/cut/soft pair style
+//
+// __________________________________________________________________________
+//    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+// __________________________________________________________________________
+//
+//    begin                :
+//    email                : ndtrung@uchicago.edu
+// ***************************************************************************
+
+#if defined(NV_KERNEL) || defined(USE_HIP)
+
+#include "lal_aux_fun1.h"
+#ifndef _DOUBLE_DOUBLE
+_texture( pos_tex,float4);
+_texture( q_tex,float);
+#else
+_texture_2d( pos_tex,int4);
+_texture( q_tex,int2);
+#endif
+
+#else
+#define pos_tex x_
+#define q_tex q_
+#endif
+
+__kernel void k_lj_coul_soft(const __global numtyp4 *restrict x_,
+                        const __global numtyp4 *restrict lj1,
+                        const __global numtyp4 *restrict  lj3,
+                        const int lj_types,
+                        const __global numtyp *restrict sp_lj_in,
+                        const __global int *dev_nbor,
+                        const __global int *dev_packed,
+                        __global acctyp3 *restrict ans,
+                        __global acctyp *restrict engv,
+                        const int eflag, const int vflag, const int inum,
+                        const int nbor_pitch,
+                        const __global numtyp *restrict q_,
+                        const __global numtyp *restrict cutsq,
+                        const numtyp qqrd2e, const int t_per_atom) {
+  int tid, ii, offset;
+  atom_info(t_per_atom,ii,tid,offset);
+
+  __local numtyp sp_lj[8];
+  int n_stride;
+  local_allocate_store_charge();
+
+  sp_lj[0]=sp_lj_in[0];
+  sp_lj[1]=sp_lj_in[1];
+  sp_lj[2]=sp_lj_in[2];
+  sp_lj[3]=sp_lj_in[3];
+  sp_lj[4]=sp_lj_in[4];
+  sp_lj[5]=sp_lj_in[5];
+  sp_lj[6]=sp_lj_in[6];
+  sp_lj[7]=sp_lj_in[7];
+
+  acctyp3 f;
+  f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
+  acctyp energy, e_coul, virial[6];
+  if (EVFLAG) {
+    energy=(acctyp)0;
+    e_coul=(acctyp)0;
+    for (int i=0; i<6; i++) virial[i]=(acctyp)0;
+  }
+
+  if (ii<inum) {
+    int nbor, nbor_end;
+    int i, numj;
+    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
+              n_stride,nbor_end,nbor);
+
+    numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
+    numtyp qtmp; fetch(qtmp,i,q_tex);
+    int itype=ix.w;
+
+    for ( ; nbor<nbor_end; nbor+=n_stride) {
+      ucl_prefetch(dev_packed+nbor+n_stride);
+      int j=dev_packed[nbor];
+
+      numtyp factor_lj, factor_coul;
+      factor_lj = sp_lj[sbmask(j)];
+      factor_coul = sp_lj[sbmask(j)+4];
+      j &= NEIGHMASK;
+
+      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
+      int jtype=jx.w;
+
+      // Compute r12
+      numtyp delx = ix.x-jx.x;
+      numtyp dely = ix.y-jx.y;
+      numtyp delz = ix.z-jx.z;
+      numtyp rsq = delx*delx+dely*dely+delz*delz;
+
+      int mtype=itype*lj_types+jtype;
+      if (rsq<cutsq[mtype]) {
+        numtyp forcecoul, force_lj, force;
+        numtyp r4sig6, denlj, denc;
+
+        if (rsq < lj1[mtype].z) {
+          r4sig6 = rsq*rsq / lj1[mtype].y;
+          denlj = lj3[mtype].x + rsq*r4sig6;
+          force_lj = lj1[mtype].x * lj3[mtype].w *
+            ((numtyp)48.0*r4sig6/(denlj*denlj*denlj) - (numtyp)24.0*r4sig6/(denlj*denlj));
+          force_lj *= factor_lj;
+        } else
+          force_lj = (numtyp)0.0;
+
+        if (rsq < lj1[mtype].w) {
+          fetch(forcecoul,j,q_tex);
+          denc = sqrt(lj3[mtype].y + rsq);
+          forcecoul *= qqrd2e * lj1[mtype].x * qtmp / (denc*denc*denc);
+          forcecoul *= factor_coul;
+        } else
+          forcecoul = (numtyp)0.0;
+
+        force = force_lj + forcecoul;
+
+        f.x+=delx*force;
+        f.y+=dely*force;
+        f.z+=delz*force;
+
+        if (EVFLAG && eflag) {
+          if (rsq < lj1[mtype].w) {
+             e_coul += forcecoul*(denc*denc);
+          }
+          if (rsq < lj1[mtype].z) {
+            numtyp e = lj1[mtype].x * (numtyp)4.0 * lj3[mtype].w *
+              ((numtyp)1.0/(denlj*denlj) - (numtyp)1.0/denlj);
+            energy+=factor_lj*(e-lj3[mtype].z);
+          }
+        }
+        if (EVFLAG && vflag) {
+          virial[0] += delx*delx*force;
+          virial[1] += dely*dely*force;
+          virial[2] += delz*delz*force;
+          virial[3] += delx*dely*force;
+          virial[4] += delx*delz*force;
+          virial[5] += dely*delz*force;
+        }
+      }
+
+    } // for nbor
+  } // if ii
+  store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,
+                  vflag,ans,engv);
+}
+
+__kernel void k_lj_coul_soft_fast(const __global numtyp4 *restrict x_,
+                             const __global numtyp4 *restrict lj1_in,
+                             const __global numtyp4 *restrict lj3_in,
+                             const __global numtyp *restrict sp_lj_in,
+                             const __global int *dev_nbor,
+                             const __global int *dev_packed,
+                             __global acctyp3 *restrict ans,
+                             __global acctyp *restrict engv,
+                             const int eflag, const int vflag, const int inum,
+                             const int nbor_pitch,
+                             const __global numtyp *restrict q_,
+                             const __global numtyp *restrict _cutsq,
+                             const numtyp qqrd2e, const int t_per_atom) {
+  int tid, ii, offset;
+  atom_info(t_per_atom,ii,tid,offset);
+
+  __local numtyp4 lj1[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
+  __local numtyp4 lj3[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
+  __local numtyp cutsq[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
+  __local numtyp sp_lj[8];
+  int n_stride;
+  local_allocate_store_charge();
+
+  if (tid<8)
+    sp_lj[tid]=sp_lj_in[tid];
+  if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES) {
+    lj1[tid]=lj1_in[tid];
+    cutsq[tid]=_cutsq[tid];
+    if (EVFLAG && eflag)
+      lj3[tid]=lj3_in[tid];
+  }
+
+  acctyp3 f;
+  f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
+  acctyp energy, e_coul, virial[6];
+  if (EVFLAG) {
+    energy=(acctyp)0;
+    e_coul=(acctyp)0;
+    for (int i=0; i<6; i++) virial[i]=(acctyp)0;
+  }
+
+  __syncthreads();
+
+  if (ii<inum) {
+    int nbor, nbor_end;
+    int i, numj;
+    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
+              n_stride,nbor_end,nbor);
+
+    numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
+    numtyp qtmp; fetch(qtmp,i,q_tex);
+    int iw=ix.w;
+    int itype=fast_mul((int)MAX_SHARED_TYPES,iw);
+
+    for ( ; nbor<nbor_end; nbor+=n_stride) {
+      int j=dev_packed[nbor];
+
+      numtyp factor_lj, factor_coul;
+      factor_lj = sp_lj[sbmask(j)];
+      factor_coul = sp_lj[sbmask(j)+4];
+      j &= NEIGHMASK;
+
+      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
+      int mtype=itype+jx.w;
+
+      // Compute r12
+      numtyp delx = ix.x-jx.x;
+      numtyp dely = ix.y-jx.y;
+      numtyp delz = ix.z-jx.z;
+      numtyp rsq = delx*delx+dely*dely+delz*delz;
+
+      if (rsq<cutsq[mtype]) {
+        numtyp forcecoul, force_lj, force;
+        numtyp r4sig6, denlj, denc;
+
+        if (rsq < lj1[mtype].z) { // cut_ljsq[itype][jtype]
+          r4sig6 = rsq*rsq / lj1[mtype].y;
+          denlj = lj3[mtype].x + rsq*r4sig6;
+          force_lj = lj1[mtype].x * lj3[mtype].w *
+            ((numtyp)48.0*r4sig6/(denlj*denlj*denlj) - (numtyp)24.0*r4sig6/(denlj*denlj));
+          force_lj *= factor_lj;
+        } else
+          force_lj = (numtyp)0.0;
+
+        if (rsq < lj1[mtype].w) { // cut_coulsq[itype][jtype]
+          fetch(forcecoul,j,q_tex);
+          denc = sqrt(lj3[mtype].y + rsq);
+          forcecoul *= qqrd2e * lj1[mtype].x * qtmp / (denc*denc*denc);
+          forcecoul *= factor_coul;
+        } else
+          forcecoul = (numtyp)0.0;
+
+        force = force_lj + forcecoul;
+
+        f.x+=delx*force;
+        f.y+=dely*force;
+        f.z+=delz*force;
+
+        if (EVFLAG && eflag) {
+          if (rsq < lj1[mtype].w) {
+             e_coul += forcecoul*(denc*denc);
+          }
+          if (rsq < lj1[mtype].z) {
+            numtyp e = lj1[mtype].x * (numtyp)4.0 * lj3[mtype].w *
+              ((numtyp)1.0/(denlj*denlj) - (numtyp)1.0/denlj);
+            energy+=factor_lj*(e-lj3[mtype].z);
+          }
+        }
+        if (EVFLAG && vflag) {
+          virial[0] += delx*delx*force;
+          virial[1] += dely*dely*force;
+          virial[2] += delz*delz*force;
+          virial[3] += delx*dely*force;
+          virial[4] += delx*delz*force;
+          virial[5] += dely*delz*force;
+        }
+      }
+
+    } // for nbor
+  } // if ii
+  store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,
+                  vflag,ans,engv);
+}
+
diff --git a/lib/gpu/lal_lj_coul_soft.h b/lib/gpu/lal_lj_coul_soft.h
new file mode 100644
index 0000000000..cf2c15ff84
--- /dev/null
+++ b/lib/gpu/lal_lj_coul_soft.h
@@ -0,0 +1,85 @@
+/***************************************************************************
+                                lj_coul_soft.h
+                             -------------------
+                            Trung Nguyen (U Chicago)
+
+  Class for acceleration of the lj/cut/coul/cut/soft pair style.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                :
+    email                : ndtrung@uchicago.edu
+ ***************************************************************************/
+
+#ifndef LAL_LJ_COUL_SOFT_H
+#define LAL_LJ_COUL_SOFT_H
+
+#include "lal_base_charge.h"
+
+namespace LAMMPS_AL {
+
+template <class numtyp, class acctyp>
+class LJCoulSoft : public BaseCharge<numtyp, acctyp> {
+ public:
+  LJCoulSoft();
+  ~LJCoulSoft();
+
+  /// Clear any previous data and set up for a new LAMMPS run
+  /** \param max_nbors initial number of rows in the neighbor matrix
+    * \param cell_size cutoff + skin
+    * \param gpu_split fraction of particles handled by device
+    *
+    * Returns:
+    * -  0 if successful
+    * - -1 if fix gpu not found
+    * - -3 if there is an out of memory error
+    * - -4 if the GPU library was not compiled for GPU
+    * - -5 Double precision is not supported on card **/
+  int init(const int ntypes, double **host_cutsq, double **host_lj1,
+           double **host_lj2, double **host_lj3, double **host_lj4,
+           double **host_offset, double **host_epsilon, double *host_special_lj,
+           const int nlocal, const int nall, const int max_nbors,
+           const int maxspecial, const double cell_size,
+           const double gpu_split, FILE *screen, double **host_cut_ljsq,
+           double **host_cut_coulsq, double *host_special_coul,
+           const double qqrd2e);
+
+  /// Clear all host and device data
+  /** \note This is called at the beginning of the init() routine **/
+  void clear();
+
+  /// Returns memory usage on device per atom
+  int bytes_per_atom(const int max_nbors) const;
+
+  /// Total host memory used by library for pair style
+  double host_memory_usage() const;
+
+  // --------------------------- TYPE DATA --------------------------
+
+  /// lj1.x = lj1, lj1.y = lj2, lj1.z = cutsq_vdw, lj1.w = cutsq_coul
+  UCL_D_Vec<numtyp4> lj1;
+  /// lj3.x = lj3, lj3.y = lj4, lj3.z = offset, lj3.w = epsilon
+  UCL_D_Vec<numtyp4> lj3;
+  /// cutsq
+  UCL_D_Vec<numtyp> cutsq;
+  /// Special LJ values [0-3] and Special Coul values [4-7]
+  UCL_D_Vec<numtyp> sp_lj;
+
+  /// If atom type constants fit in shared memory, use fast kernels
+  bool shared_types;
+
+  /// Number of atom types
+  int _lj_types;
+
+  numtyp _qqrd2e;
+
+ private:
+  bool _allocated;
+  int loop(const int eflag, const int vflag);
+};
+
+}
+
+#endif
diff --git a/lib/gpu/lal_lj_coul_soft_ext.cpp b/lib/gpu/lal_lj_coul_soft_ext.cpp
new file mode 100644
index 0000000000..02d367b3c7
--- /dev/null
+++ b/lib/gpu/lal_lj_coul_soft_ext.cpp
@@ -0,0 +1,128 @@
+/***************************************************************************
+                             lj_coul_soft_ext.cpp
+                             -------------------
+                            Trung Nguyen (U Chicago)
+
+  Functions for LAMMPS access to lj/cut/coul/cut/soft acceleration routines.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                :
+    email                : ndtrung@uchicago.edu
+ ***************************************************************************/
+
+#include <iostream>
+#include <cassert>
+#include <cmath>
+
+#include "lal_lj_coul_soft.h"
+
+using namespace std;
+using namespace LAMMPS_AL;
+
+static LJCoulSoft<PRECISION,ACC_PRECISION> LJCSMF;
+
+// ---------------------------------------------------------------------------
+// Allocate memory on host and device and copy constants to device
+// ---------------------------------------------------------------------------
+int ljcs_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
+                 double **host_lj2, double **host_lj3, double **host_lj4,
+                 double **offset, double **epsilon, double *special_lj, const int inum,
+                 const int nall, const int max_nbors, const int maxspecial,
+                 const double cell_size, int &gpu_mode, FILE *screen,
+                 double **host_cut_ljsq, double **host_cut_coulsq,
+                 double *host_special_coul, const double qqrd2e) {
+  LJCSMF.clear();
+  gpu_mode=LJCSMF.device->gpu_mode();
+  double gpu_split=LJCSMF.device->particle_split();
+  int first_gpu=LJCSMF.device->first_device();
+  int last_gpu=LJCSMF.device->last_device();
+  int world_me=LJCSMF.device->world_me();
+  int gpu_rank=LJCSMF.device->gpu_rank();
+  int procs_per_gpu=LJCSMF.device->procs_per_gpu();
+
+  LJCSMF.device->init_message(screen,"lj/cut/coul/cut/soft",first_gpu,last_gpu);
+
+  bool message=false;
+  if (LJCSMF.device->replica_me()==0 && screen)
+    message=true;
+
+  if (message) {
+    fprintf(screen,"Initializing Device and compiling on process 0...");
+    fflush(screen);
+  }
+
+  int init_ok=0;
+  if (world_me==0)
+    init_ok=LJCSMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3,
+                       host_lj4, offset, epsilon, special_lj, inum, nall, max_nbors,
+                       maxspecial, cell_size, gpu_split, screen, host_cut_ljsq,
+                       host_cut_coulsq, host_special_coul, qqrd2e);
+
+  LJCSMF.device->world_barrier();
+  if (message)
+    fprintf(screen,"Done.\n");
+
+  for (int i=0; i<procs_per_gpu; i++) {
+    if (message) {
+      if (last_gpu-first_gpu==0)
+        fprintf(screen,"Initializing Device %d on core %d...",first_gpu,i);
+      else
+        fprintf(screen,"Initializing Devices %d-%d on core %d...",first_gpu,
+                last_gpu,i);
+      fflush(screen);
+    }
+    if (gpu_rank==i && world_me!=0)
+      init_ok=LJCSMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
+                         offset, epsilon, special_lj, inum, nall, max_nbors, maxspecial,
+                         cell_size, gpu_split, screen, host_cut_ljsq,
+                         host_cut_coulsq, host_special_coul, qqrd2e);
+
+    LJCSMF.device->gpu_barrier();
+    if (message)
+      fprintf(screen,"Done.\n");
+  }
+  if (message)
+    fprintf(screen,"\n");
+
+  if (init_ok==0)
+    LJCSMF.estimate_gpu_overhead();
+  return init_ok;
+}
+
+void ljcs_gpu_clear() {
+  LJCSMF.clear();
+}
+
+int** ljcs_gpu_compute_n(const int ago, const int inum_full,
+                        const int nall, double **host_x, int *host_type,
+                        double *sublo, double *subhi, tagint *tag, int **nspecial,
+                        tagint **special, const bool eflag, const bool vflag,
+                        const bool eatom, const bool vatom, int &host_start,
+                        int **ilist, int **jnum, const double cpu_time,
+                        bool &success, double *host_q, double *boxlo,
+                        double *prd) {
+  return LJCSMF.compute(ago, inum_full, nall, host_x, host_type, sublo,
+                       subhi, tag, nspecial, special, eflag, vflag, eatom,
+                       vatom, host_start, ilist, jnum, cpu_time, success,
+                       host_q, boxlo, prd);
+}
+
+void ljcs_gpu_compute(const int ago, const int inum_full, const int nall,
+                     double **host_x, int *host_type, int *ilist, int *numj,
+                     int **firstneigh, const bool eflag, const bool vflag,
+                     const bool eatom, const bool vatom, int &host_start,
+                     const double cpu_time, bool &success, double *host_q,
+                     const int nlocal, double *boxlo, double *prd) {
+  LJCSMF.compute(ago,inum_full,nall,host_x,host_type,ilist,numj,firstneigh,eflag,
+                vflag,eatom,vatom,host_start,cpu_time,success,host_q,
+                nlocal,boxlo,prd);
+}
+
+double ljcs_gpu_bytes() {
+  return LJCSMF.host_memory_usage();
+}
+
+
diff --git a/lib/gpu/lal_mdpd.cpp b/lib/gpu/lal_mdpd.cpp
new file mode 100644
index 0000000000..16cf926df8
--- /dev/null
+++ b/lib/gpu/lal_mdpd.cpp
@@ -0,0 +1,218 @@
+/***************************************************************************
+                                   mdpd.cpp
+                             -------------------
+                            Trung Dac Nguyen (U Chicago)
+
+  Class for acceleration of the mdpd pair style.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : September 2023
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#if defined(USE_OPENCL)
+#include "mdpd_cl.h"
+#elif defined(USE_CUDART)
+const char *mdpd=0;
+#else
+#include "mdpd_cubin.h"
+#endif
+
+#include "lal_mdpd.h"
+#include <cassert>
+namespace LAMMPS_AL {
+#define MDPDT MDPD<numtyp, acctyp>
+
+extern Device<PRECISION,ACC_PRECISION> device;
+
+template <class numtyp, class acctyp>
+MDPDT::MDPD() : BaseDPD<numtyp,acctyp>(), _allocated(false) {
+}
+
+template <class numtyp, class acctyp>
+MDPDT::~MDPD() {
+  clear();
+}
+
+template <class numtyp, class acctyp>
+int MDPDT::bytes_per_atom(const int max_nbors) const {
+  return this->bytes_per_atom_atomic(max_nbors);
+}
+
+template <class numtyp, class acctyp>
+int MDPDT::init(const int ntypes,
+                double **host_cutsq, double **host_A_att, double **host_B_rep,
+                double **host_gamma, double **host_sigma,
+                double **host_cut, double **host_cut_r,
+                double *host_special_lj, const int nlocal, const int nall,
+                const int max_nbors, const int maxspecial,
+                const double cell_size,
+                const double gpu_split, FILE *_screen) {
+  const int max_shared_types=this->device->max_shared_types();
+
+  int onetype=0;
+  #ifdef USE_OPENCL
+  if (maxspecial==0)
+    for (int i=1; i<ntypes; i++)
+      for (int j=i; j<ntypes; j++)
+        if (host_cutsq[i][j]>0) {
+          if (onetype>0)
+            onetype=-1;
+          else if (onetype==0)
+            onetype=i*max_shared_types+j;
+        }
+  if (onetype<0) onetype=0;
+  #endif
+
+  int success;
+  int extra_fields = 4; // round up to accomodate quadruples of numtyp values
+                        // rho
+  success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,
+                            gpu_split,_screen,mdpd,"k_mdpd",onetype,extra_fields);
+  if (success!=0)
+    return success;
+
+  // If atom type constants fit in shared memory use fast kernel
+  int lj_types=ntypes;
+  shared_types=false;
+  if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) {
+    lj_types=max_shared_types;
+    shared_types=true;
+  }
+  _lj_types=lj_types;
+
+  // Allocate a host write buffer for data initialization
+  UCL_H_Vec<numtyp> host_write(lj_types*lj_types*32,*(this->ucl_device),
+                               UCL_WRITE_ONLY);
+
+  for (int i=0; i<lj_types*lj_types; i++)
+    host_write[i]=0.0;
+
+  coeff.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
+  this->atom->type_pack4(ntypes,lj_types,coeff,host_write,host_A_att,host_B_rep,
+                         host_gamma,host_sigma);
+
+  coeff2.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
+  this->atom->type_pack4(ntypes,lj_types,coeff2,host_write,host_cut,host_cut_r,
+                         host_cutsq);
+
+  UCL_H_Vec<numtyp> host_rsq(lj_types*lj_types,*(this->ucl_device),
+                             UCL_WRITE_ONLY);
+  cutsq.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
+  this->atom->type_pack1(ntypes,lj_types,cutsq,host_rsq,host_cutsq);
+
+  double special_sqrt[4];
+  special_sqrt[0] = sqrt(host_special_lj[0]);
+  special_sqrt[1] = sqrt(host_special_lj[1]);
+  special_sqrt[2] = sqrt(host_special_lj[2]);
+  special_sqrt[3] = sqrt(host_special_lj[3]);
+
+  UCL_H_Vec<double> dview;
+  sp_lj.alloc(4,*(this->ucl_device),UCL_READ_ONLY);
+  dview.view(host_special_lj,4,*(this->ucl_device));
+  ucl_copy(sp_lj,dview,false);
+  sp_sqrt.alloc(4,*(this->ucl_device),UCL_READ_ONLY);
+  dview.view(special_sqrt,4,*(this->ucl_device));
+  ucl_copy(sp_sqrt,dview,false);
+
+  // allocate per-atom array Q
+
+  int ef_nall=nall;
+  if (ef_nall==0)
+    ef_nall=2000;
+
+  _allocated=true;
+  this->_max_bytes=coeff.row_bytes()+coeff2.row_bytes()+cutsq.row_bytes()+
+    sp_lj.row_bytes()+sp_sqrt.row_bytes();
+  return 0;
+}
+
+template <class numtyp, class acctyp>
+void MDPDT::clear() {
+  if (!_allocated)
+    return;
+  _allocated=false;
+
+  coeff.clear();
+  coeff2.clear();
+  cutsq.clear();
+  sp_lj.clear();
+  sp_sqrt.clear();
+  this->clear_atomic();
+}
+
+template <class numtyp, class acctyp>
+double MDPDT::host_memory_usage() const {
+  return this->host_memory_usage_atomic()+sizeof(MDPD<numtyp,acctyp>);
+}
+
+// ---------------------------------------------------------------------------
+// Calculate energies, forces, and torques
+// ---------------------------------------------------------------------------
+template <class numtyp, class acctyp>
+int MDPDT::loop(const int eflag, const int vflag) {
+
+  int nall = this->atom->nall();
+
+  // signal that we need to transfer extra data from the host
+
+  this->atom->extra_data_unavail();
+
+  numtyp4 *pextra=reinterpret_cast<numtyp4*>(&(this->atom->extra[0]));
+
+  int n = 0;
+  int nstride = 1;
+  for (int i = 0; i < nall; i++) {
+    int idx = n+i*nstride;
+    numtyp4 v;
+    v.x = mdpd_rho[i];
+    v.y = 0;
+    v.z = 0;
+    v.w = 0;
+    pextra[idx] = v;
+  }
+  this->atom->add_extra_data();
+
+  // Compute the block size and grid size to keep all cores busy
+  const int BX=this->block_size();
+  int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
+                               (BX/this->_threads_per_atom)));
+
+
+  int ainum=this->ans->inum();
+  int nbor_pitch=this->nbor->nbor_pitch();
+  this->time_pair.start();
+  if (shared_types) {
+    this->k_pair_sel->set_size(GX,BX);
+    this->k_pair_sel->run(&this->atom->x, &this->atom->extra, &coeff, &coeff2,
+                          &sp_lj, &sp_sqrt, &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+                          &this->ans->force, &this->ans->engv, &eflag, &vflag,
+                          &ainum, &nbor_pitch, &this->atom->v, &cutsq, &this->_dtinvsqrt, &this->_seed,
+                          &this->_timestep, &this->_threads_per_atom);
+  } else {
+    this->k_pair.set_size(GX,BX);
+    this->k_pair.run(&this->atom->x, &this->atom->extra, &coeff, &coeff2,
+                     &_lj_types, &sp_lj, &sp_sqrt, &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+                     &this->ans->force, &this->ans->engv, &eflag, &vflag,
+                     &ainum, &nbor_pitch, &this->atom->v, &cutsq, &this->_dtinvsqrt, &this->_seed,
+                     &this->_timestep, &this->_threads_per_atom);
+  }
+
+  this->time_pair.stop();
+  return GX;
+}
+
+// ---------------------------------------------------------------------------
+// Get the extra data pointers from host
+// ---------------------------------------------------------------------------
+
+template <class numtyp, class acctyp>
+void MDPDT::get_extra_data(double *host_rho) {
+  mdpd_rho = host_rho;
+}
+
+template class MDPD<PRECISION,ACC_PRECISION>;
+}
diff --git a/lib/gpu/lal_mdpd.cu b/lib/gpu/lal_mdpd.cu
new file mode 100644
index 0000000000..1e0ca8f052
--- /dev/null
+++ b/lib/gpu/lal_mdpd.cu
@@ -0,0 +1,469 @@
+// **************************************************************************
+//                                   mdpd.cu
+//                             -------------------
+//                           Trung Dac Nguyen (ORNL)
+//
+//  Device code for acceleration of the mdpd pair style
+//
+// __________________________________________________________________________
+//    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+// __________________________________________________________________________
+//
+//    begin                : December 2023
+//    email                : ndactrung@gmail.com
+// ***************************************************************************
+
+#if defined(NV_KERNEL) || defined(USE_HIP)
+#include "lal_aux_fun1.h"
+#ifndef _DOUBLE_DOUBLE
+_texture( pos_tex,float4);
+_texture( vel_tex,float4);
+#else
+_texture_2d( pos_tex,int4);
+_texture_2d( vel_tex,int4);
+#endif
+#else
+#define pos_tex x_
+#define vel_tex v_
+#endif
+
+#define EPSILON (numtyp)1.0e-10
+
+//#define _USE_UNIFORM_SARU_LCG
+//#define _USE_UNIFORM_SARU_TEA8
+//#define _USE_GAUSSIAN_SARU_LCG
+
+#if !defined(_USE_UNIFORM_SARU_LCG) && !defined(_USE_UNIFORM_SARU_TEA8) && !defined(_USE_GAUSSIAN_SARU_LCG)
+#define _USE_UNIFORM_SARU_LCG
+#endif
+
+// References:
+// 1. Y. Afshar, F. Schmid, A. Pishevar, S. Worley, Comput. Phys. Comm. 184 (2013), 1119–1128.
+// 2. C. L. Phillips, J. A. Anderson, S. C. Glotzer, Comput. Phys. Comm. 230 (2011), 7191-7201.
+// PRNG period = 3666320093*2^32 ~ 2^64 ~ 10^19
+
+#define LCGA 0x4beb5d59 /* Full period 32 bit LCG */
+#define LCGC 0x2600e1f7
+#define oWeylPeriod 0xda879add /* Prime period 3666320093 */
+#define oWeylOffset 0x8009d14b
+#define TWO_N32 0.232830643653869628906250e-9f /* 2^-32 */
+
+// specifically implemented for steps = 1; high = 1.0; low = -1.0
+// returns uniformly distributed random numbers u in [-1.0;1.0]
+// using the inherent LCG, then multiply u with sqrt(3) to "match"
+// with a normal random distribution.
+// Afshar et al. mutlplies u in [-0.5;0.5] with sqrt(12)
+// Curly brackets to make variables local to the scope.
+#ifdef _USE_UNIFORM_SARU_LCG
+#define SQRT3 (numtyp)1.7320508075688772935274463
+#define saru(seed1, seed2, seed, timestep, randnum) {                         \
+  unsigned int seed3 = seed + timestep;                                       \
+  seed3^=(seed1<<7)^(seed2>>6);                                               \
+  seed2+=(seed1>>4)^(seed3>>15);                                              \
+  seed1^=(seed2<<9)+(seed3<<8);                                               \
+  seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1));                               \
+  seed2+=0x72BE1579*((seed1<<4)  ^ (seed3>>16));                              \
+  seed1^=0x3F38A6ED*((seed3>>5)  ^ (((signed int)seed2)>>22));                \
+  seed2+=seed1*seed3;                                                         \
+  seed1+=seed3 ^ (seed2>>2);                                                  \
+  seed2^=((signed int)seed2)>>17;                                             \
+  unsigned int state  = 0x79dedea3*(seed1^(((signed int)seed1)>>14));         \
+  unsigned int wstate = (state + seed2) ^ (((signed int)state)>>8);           \
+  state  = state + (wstate*(wstate^0xdddf97f5));                              \
+  wstate = 0xABCB96F7 + (wstate>>1);                                          \
+  state = LCGA*state + LCGC;                                                  \
+  wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod);   \
+  unsigned int v = (state ^ (state>>26)) + wstate;                            \
+  unsigned int s = (signed int)((v^(v>>20))*0x6957f5a7);                      \
+  randnum = SQRT3*(s*TWO_N32*(numtyp)2.0-(numtyp)1.0);                        \
+}
+#endif
+
+// specifically implemented for steps = 1; high = 1.0; low = -1.0
+// returns uniformly distributed random numbers u in [-1.0;1.0] using TEA8
+// then multiply u with sqrt(3) to "match" with a normal random distribution
+// Afshar et al. mutlplies u in [-0.5;0.5] with sqrt(12)
+#ifdef _USE_UNIFORM_SARU_TEA8
+#define SQRT3 (numtyp)1.7320508075688772935274463
+#define k0 0xA341316C
+#define k1 0xC8013EA4
+#define k2 0xAD90777D
+#define k3 0x7E95761E
+#define delta 0x9e3779b9
+#define rounds 8
+#define saru(seed1, seed2, seed, timestep, randnum) {                         \
+  unsigned int seed3 = seed + timestep;                                       \
+  seed3^=(seed1<<7)^(seed2>>6);                                               \
+  seed2+=(seed1>>4)^(seed3>>15);                                              \
+  seed1^=(seed2<<9)+(seed3<<8);                                               \
+  seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1));                               \
+  seed2+=0x72BE1579*((seed1<<4)  ^ (seed3>>16));                              \
+  seed1^=0x3F38A6ED*((seed3>>5)  ^ (((signed int)seed2)>>22));                \
+  seed2+=seed1*seed3;                                                         \
+  seed1+=seed3 ^ (seed2>>2);                                                  \
+  seed2^=((signed int)seed2)>>17;                                             \
+  unsigned int state  = 0x79dedea3*(seed1^(((signed int)seed1)>>14));         \
+  unsigned int wstate = (state + seed2) ^ (((signed int)state)>>8);           \
+  state  = state + (wstate*(wstate^0xdddf97f5));                              \
+  wstate = 0xABCB96F7 + (wstate>>1);                                          \
+  unsigned int sum = 0;                                                       \
+  for (int i=0; i < rounds; i++) {                                            \
+    sum += delta;                                                             \
+    state += ((wstate<<4) + k0)^(wstate + sum)^((wstate>>5) + k1);            \
+    wstate += ((state<<4) + k2)^(state + sum)^((state>>5) + k3);              \
+  }                                                                           \
+  unsigned int v = (state ^ (state>>26)) + wstate;                            \
+  unsigned int s = (signed int)((v^(v>>20))*0x6957f5a7);                      \
+  randnum = SQRT3*(s*TWO_N32*(numtyp)2.0-(numtyp)1.0);                        \
+}
+#endif
+
+// specifically implemented for steps = 1; high = 1.0; low = -1.0
+// returns two uniformly distributed random numbers r1 and r2 in [-1.0;1.0],
+// and uses the polar method (Marsaglia's) to transform to a normal random value
+// This is used to compared with CPU DPD using RandMars::gaussian()
+#ifdef _USE_GAUSSIAN_SARU_LCG
+#define saru(seed1, seed2, seed, timestep, randnum) {                         \
+  unsigned int seed3 = seed + timestep;                                       \
+  seed3^=(seed1<<7)^(seed2>>6);                                               \
+  seed2+=(seed1>>4)^(seed3>>15);                                              \
+  seed1^=(seed2<<9)+(seed3<<8);                                               \
+  seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1));                               \
+  seed2+=0x72BE1579*((seed1<<4)  ^ (seed3>>16));                              \
+  seed1^=0x3F38A6ED*((seed3>>5)  ^ (((signed int)seed2)>>22));                \
+  seed2+=seed1*seed3;                                                         \
+  seed1+=seed3 ^ (seed2>>2);                                                  \
+  seed2^=((signed int)seed2)>>17;                                             \
+  unsigned int state=0x12345678;                                              \
+  unsigned int wstate=12345678;                                               \
+  state  = 0x79dedea3*(seed1^(((signed int)seed1)>>14));                      \
+  wstate = (state + seed2) ^ (((signed int)state)>>8);                        \
+  state  = state + (wstate*(wstate^0xdddf97f5));                              \
+  wstate = 0xABCB96F7 + (wstate>>1);                                          \
+  unsigned int v, s;                                                          \
+  numtyp r1, r2, rsq;                                                         \
+  while (1) {                                                                 \
+    state = LCGA*state + LCGC;                                                \
+    wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod); \
+    v = (state ^ (state>>26)) + wstate;                                       \
+    s = (signed int)((v^(v>>20))*0x6957f5a7);                                 \
+    r1 = s*TWO_N32*(numtyp)2.0-(numtyp)1.0;                                   \
+    state = LCGA*state + LCGC;                                                \
+    wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod); \
+    v = (state ^ (state>>26)) + wstate;                                       \
+    s = (signed int)((v^(v>>20))*0x6957f5a7);                                 \
+    r2 = s*TWO_N32*(numtyp)2.0-(numtyp)1.0;                                   \
+    rsq = r1 * r1 + r2 * r2;                                                  \
+    if (rsq < (numtyp)1.0) break;                                             \
+  }                                                                           \
+  numtyp fac = ucl_sqrt((numtyp)-2.0*log(rsq)/rsq);                           \
+  randnum = r2*fac;                                                           \
+}
+#endif
+
+#define MIN(A,B) ((A) < (B) ? (A) : (B))
+#define MAX(A,B) ((A) < (B) ? (B) : (A))
+
+// coeff.x = A_att, coeff.y = B_rep, coeff.z = gamma, coeff.w = sigma
+// coeff2.x = cut, coeff2.y = cut_r, coeff2.z = cutsq
+
+__kernel void k_mdpd(const __global numtyp4 *restrict x_,
+                    const __global numtyp4 *restrict extra,
+                    const __global numtyp4 *restrict coeff,
+                    const __global numtyp4 *restrict coeff2,
+                    const int lj_types,
+                    const __global numtyp *restrict sp_lj,
+                    const __global numtyp *restrict sp_sqrt,
+                    const __global int * dev_nbor,
+                    const __global int * dev_packed,
+                    __global acctyp3 *restrict ans,
+                    __global acctyp *restrict engv,
+                    const int eflag, const int vflag, const int inum,
+                    const int nbor_pitch,
+                    const __global numtyp4 *restrict v_,
+                    const __global numtyp *restrict cutsq,
+                    const numtyp dtinvsqrt, const int seed,
+                    const int timestep, const int t_per_atom) {
+  int tid, ii, offset;
+  atom_info(t_per_atom,ii,tid,offset);
+
+  int n_stride;
+  local_allocate_store_pair();
+
+  acctyp3 f;
+  f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
+  acctyp energy, virial[6];
+  if (EVFLAG) {
+    energy=(acctyp)0;
+    for (int i=0; i<6; i++) virial[i]=(acctyp)0;
+  }
+
+  if (ii<inum) {
+    int i, numj, nbor, nbor_end;
+    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
+              n_stride,nbor_end,nbor);
+
+    numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
+    int itype=ix.w;
+    numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i];
+    int itag=iv.w;
+
+    const numtyp rhoi = extra[i].x;
+
+    numtyp factor_dpd;
+    for ( ; nbor<nbor_end; nbor+=n_stride) {
+      ucl_prefetch(dev_packed+nbor+n_stride);
+
+      int j=dev_packed[nbor];
+      factor_dpd = sp_lj[sbmask(j)];
+      j &= NEIGHMASK;
+
+      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
+      int jtype=jx.w;
+      numtyp4 jv; fetch4(jv,j,vel_tex); //v_[j];
+      int jtag=jv.w;
+
+      // Compute r12
+      numtyp delx = ix.x-jx.x;
+      numtyp dely = ix.y-jx.y;
+      numtyp delz = ix.z-jx.z;
+      numtyp rsq = delx*delx+dely*dely+delz*delz;
+
+      int mtype=itype*lj_types+jtype;
+      if (rsq<coeff2[mtype].z) {  // cutsq[itype][jtype]
+        numtyp r=ucl_sqrt(rsq);
+        if (r < EPSILON) continue;
+
+        numtyp rinv=ucl_recip(r);
+        numtyp delvx = iv.x - jv.x;
+        numtyp delvy = iv.y - jv.y;
+        numtyp delvz = iv.z - jv.z;
+        numtyp dot = delx*delvx + dely*delvy + delz*delvz;
+
+        numtyp A_attij = coeff[mtype].x;
+        numtyp B_repij = coeff[mtype].y;
+        numtyp gammaij = coeff[mtype].z;
+        numtyp sigmaij = coeff[mtype].w;
+        numtyp cutij =   coeff2[mtype].x;
+        numtyp cut_rij = coeff2[mtype].y;
+
+        numtyp wc = (numtyp)1.0 - r/cutij;
+        numtyp wc_r = (numtyp)1.0 - r/cut_rij;
+        wc_r = MAX(wc_r,(numtyp)0.0);
+        numtyp wr = wc;
+
+        const numtyp rhoj = extra[j].x;
+
+        unsigned int tag1=itag, tag2=jtag;
+        if (tag1 > tag2) {
+          tag1 = jtag; tag2 = itag;
+        }
+
+        numtyp randnum = (numtyp)0.0;
+        saru(tag1, tag2, seed, timestep, randnum);
+
+        // conservative force = A_att * wc + B_rep*(rhoi+rhoj)*wc_r
+        // drag force = -gamma * wr^2 * (delx dot delv) / r
+        // random force = sigma * wr * rnd * dtinvsqrt;
+
+        numtyp force = A_attij*wc + B_repij*(rhoi+rhoj)*wc_r;
+        force -= gammaij*wr*wr*dot*rinv;
+        force += sigmaij*wr*randnum*dtinvsqrt;
+        force *= factor_dpd*rinv;
+
+        f.x+=delx*force;
+        f.y+=dely*force;
+        f.z+=delz*force;
+
+        if (EVFLAG && eflag) {
+          // unshifted eng of conservative term:
+          // eng shifted to 0.0 at cutoff
+          numtyp e = (numtyp)0.5*A_attij*cutij * wr*wr + (numtyp)0.5*B_repij*cut_rij*(rhoi+rhoj)*wc_r*wc_r;
+          energy+=factor_dpd*e;
+        }
+        if (EVFLAG && vflag) {
+          virial[0] += delx*delx*force;
+          virial[1] += dely*dely*force;
+          virial[2] += delz*delz*force;
+          virial[3] += delx*dely*force;
+          virial[4] += delx*delz*force;
+          virial[5] += dely*delz*force;
+        }
+      }
+
+    } // for nbor
+  } // if ii
+  store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag,
+                ans,engv);
+}
+
+__kernel void k_mdpd_fast(const __global numtyp4 *restrict x_,
+                          const __global numtyp4 *restrict extra,
+                          const __global numtyp4 *restrict coeff_in,
+                          const __global numtyp4 *restrict coeff2_in,
+                          const __global numtyp *restrict sp_lj_in,
+                          const __global numtyp *restrict sp_sqrt_in,
+                          const __global int * dev_nbor,
+                          const __global int * dev_packed,
+                          __global acctyp3 *restrict ans,
+                          __global acctyp *restrict engv,
+                          const int eflag, const int vflag, const int inum,
+                          const int nbor_pitch,
+                          const __global numtyp4 *restrict v_,
+                          const __global numtyp *restrict cutsq,
+                          const numtyp dtinvsqrt, const int seed,
+                          const int timestep, const int t_per_atom) {
+  int tid, ii, offset;
+  atom_info(t_per_atom,ii,tid,offset);
+
+  #ifndef ONETYPE
+  __local numtyp4 coeff[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
+  __local numtyp4 coeff2[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
+  __local numtyp sp_lj[4];
+  if (tid<4) {
+    sp_lj[tid]=sp_lj_in[tid];
+  }
+  if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES) {
+    coeff[tid]=coeff_in[tid];
+    coeff2[tid]=coeff2_in[tid];
+  }
+  __syncthreads();
+  #else
+  const numtyp A_attij=coeff_in[ONETYPE].x;
+  const numtyp B_repij=coeff_in[ONETYPE].y;
+  const numtyp gammaij=coeff_in[ONETYPE].z;
+  const numtyp sigmaij=coeff_in[ONETYPE].w;
+  const numtyp cutij=coeff2_in[ONETYPE].x;
+  const numtyp cut_rij=coeff2_in[ONETYPE].y;
+  const numtyp cutsq_p=cutsq[ONETYPE];
+  #endif
+
+  int n_stride;
+  local_allocate_store_pair();
+
+  acctyp3 f;
+  f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
+  acctyp energy, virial[6];
+  if (EVFLAG) {
+    energy=(acctyp)0;
+    for (int i=0; i<6; i++) virial[i]=(acctyp)0;
+  }
+
+  if (ii<inum) {
+    int i, numj, nbor, nbor_end;
+    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
+              n_stride,nbor_end,nbor);
+
+    numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
+    #ifndef ONETYPE
+    int iw=ix.w;
+    int itype=fast_mul((int)MAX_SHARED_TYPES,iw);
+    #endif
+    numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i];
+    int itag=iv.w;
+
+    const numtyp rhoi = extra[i].x;
+
+    #ifndef ONETYPE
+    numtyp factor_dpd;
+    #endif
+    for ( ; nbor<nbor_end; nbor+=n_stride) {
+      ucl_prefetch(dev_packed+nbor+n_stride);
+
+      int j=dev_packed[nbor];
+      #ifndef ONETYPE
+      factor_dpd = sp_lj[sbmask(j)];
+      j &= NEIGHMASK;
+      #endif
+
+      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
+      #ifndef ONETYPE
+      int mtype=itype+jx.w;
+      const numtyp cutsq_p=cutsq[mtype];
+      #endif
+      numtyp4 jv; fetch4(jv,j,vel_tex); //v_[j];
+      int jtag=jv.w;
+
+      // Compute r12
+      numtyp delx = ix.x-jx.x;
+      numtyp dely = ix.y-jx.y;
+      numtyp delz = ix.z-jx.z;
+      numtyp rsq = delx*delx+dely*dely+delz*delz;
+
+      if (rsq<cutsq_p) {
+        numtyp r=ucl_sqrt(rsq);
+        if (r < EPSILON) continue;
+
+        numtyp rinv=ucl_recip(r);
+        numtyp delvx = iv.x - jv.x;
+        numtyp delvy = iv.y - jv.y;
+        numtyp delvz = iv.z - jv.z;
+        numtyp dot = delx*delvx + dely*delvy + delz*delvz;
+
+        #ifndef ONETYPE
+        numtyp A_attij = coeff[mtype].x;
+        numtyp B_repij = coeff[mtype].y;
+        numtyp gammaij = coeff[mtype].z;
+        numtyp sigmaij = coeff[mtype].w;
+        numtyp cutij =   coeff2[mtype].x;
+        numtyp cut_rij = coeff2[mtype].y;
+        #endif
+
+        numtyp wc = (numtyp)1.0 - r/cutij;
+        numtyp wc_r = (numtyp)1.0 - r/cut_rij;
+        wc_r = MAX(wc_r,(numtyp)0.0);
+        numtyp wr = wc;
+
+        const numtyp rhoj = extra[j].x;
+
+        unsigned int tag1=itag, tag2=jtag;
+        if (tag1 > tag2) {
+          tag1 = jtag; tag2 = itag;
+        }
+
+        numtyp randnum = (numtyp)0.0;
+        saru(tag1, tag2, seed, timestep, randnum);
+
+        // conservative force = A_att * wc + B_rep*(rhoi+rhoj)*wc_r
+        // drag force = -gamma * wr^2 * (delx dot delv) / r
+        // random force = sigma * wr * rnd * dtinvsqrt;
+
+        numtyp force = A_attij*wc + B_repij*(rhoi+rhoj)*wc_r;
+        force -= gammaij*wr*wr*dot*rinv;
+        force += sigmaij*wr*randnum*dtinvsqrt;
+        #ifndef ONETYPE
+        force *= factor_dpd*rinv;
+        #else
+        force*=rinv;
+        #endif
+
+        f.x+=delx*force;
+        f.y+=dely*force;
+        f.z+=delz*force;
+
+        if (EVFLAG && eflag) {
+          // unshifted eng of conservative term:
+          // eng shifted to 0.0 at cutoff
+          numtyp e = (numtyp)0.5*A_attij*cutij * wr*wr + (numtyp)0.5*B_repij*cut_rij*(rhoi+rhoj)*wc_r*wc_r;
+          #ifndef ONETYPE
+          energy+=factor_dpd*e;
+          #else
+          energy+=e;
+          #endif
+        }
+        if (EVFLAG && vflag) {
+          virial[0] += delx*delx*force;
+          virial[1] += dely*dely*force;
+          virial[2] += delz*delz*force;
+          virial[3] += delx*dely*force;
+          virial[4] += delx*delz*force;
+          virial[5] += dely*delz*force;
+        }
+      }
+
+    } // for nbor
+  } // if ii
+  store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag,
+                ans,engv);
+}
+
diff --git a/lib/gpu/lal_mdpd.h b/lib/gpu/lal_mdpd.h
new file mode 100644
index 0000000000..0e95185714
--- /dev/null
+++ b/lib/gpu/lal_mdpd.h
@@ -0,0 +1,88 @@
+/***************************************************************************
+                                 mdpd.h
+                             -------------------
+                            Trung Dac Nguyen (U Chicago)
+
+  Class for acceleration of the mdpd pair style.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : December 2023
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#ifndef LAL_MDPD_H
+#define LAL_MDPD_H
+
+#include "lal_base_dpd.h"
+
+namespace LAMMPS_AL {
+
+template <class numtyp, class acctyp>
+class MDPD : public BaseDPD<numtyp, acctyp> {
+ public:
+  MDPD();
+  ~MDPD();
+
+  /// Clear any previous data and set up for a new LAMMPS run
+  /** \param max_nbors initial number of rows in the neighbor matrix
+    * \param cell_size cutoff + skin
+    * \param gpu_split fraction of particles handled by device
+    *
+    * Returns:
+    * -  0 if successful
+    * - -1 if fix gpu not found
+    * - -3 if there is an out of memory error
+    * - -4 if the GPU library was not compiled for GPU
+    * - -5 Double precision is not supported on card **/
+  int init(const int ntypes, double **host_cutsq,
+           double **host_A_att, double **host_B_rep,
+           double **host_gamma, double **host_sigma,
+           double **host_cut, double **host_cut_r, double *host_special_lj,
+           const int nlocal, const int nall, const int max_nbors,
+           const int maxspecial, const double cell_size, const double gpu_split,
+           FILE *screen);
+
+  /// Clear all host and device data
+  /** \note This is called at the beginning of the init() routine **/
+  void clear();
+
+  /// Returns memory usage on device per atom
+  int bytes_per_atom(const int max_nbors) const;
+
+  /// Total host memory used by library for pair style
+  double host_memory_usage() const;
+
+  void get_extra_data(double *host_rho);
+
+  // --------------------------- TYPE DATA --------------------------
+
+  /// coeff.x = A_att, coeff.x = B_rep, coeff.z = gamma, coeff.w = sigma
+  UCL_D_Vec<numtyp4> coeff;
+  /// coeff2.x = cut, coeff2.y = cut_r, coeff2.z = cutsq
+  UCL_D_Vec<numtyp4> coeff2;
+
+  UCL_D_Vec<numtyp> cutsq;
+
+  /// Special LJ values
+  UCL_D_Vec<numtyp> sp_lj, sp_sqrt;
+
+  /// If atom type constants fit in shared memory, use fast kernels
+  bool shared_types;
+
+  /// Number of atom types
+  int _lj_types;
+
+  /// pointer to host data
+  double *mdpd_rho;
+
+ private:
+  bool _allocated;
+  int loop(const int eflag, const int vflag);
+};
+
+}
+
+#endif
diff --git a/lib/gpu/lal_mdpd_ext.cpp b/lib/gpu/lal_mdpd_ext.cpp
new file mode 100644
index 0000000000..def6adb1f6
--- /dev/null
+++ b/lib/gpu/lal_mdpd_ext.cpp
@@ -0,0 +1,133 @@
+/***************************************************************************
+                                 mdpd_ext.cpp
+                             -------------------
+                            Trung Dac Nguyen (U Chicago)
+
+  Functions for LAMMPS access to mdpd acceleration routines.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : December 2023
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#include <iostream>
+#include <cassert>
+#include <cmath>
+
+#include "lal_mdpd.h"
+
+using namespace std;
+using namespace LAMMPS_AL;
+
+static MDPD<PRECISION,ACC_PRECISION> MDPDMF;
+
+// ---------------------------------------------------------------------------
+// Allocate memory on host and device and copy constants to device
+// ---------------------------------------------------------------------------
+int mdpd_gpu_init(const int ntypes, double **cutsq,
+                  double **host_A_att, double **host_B_rep,
+                  double **host_gamma, double **host_sigma,
+                  double **host_cut, double **host_cut_r,
+                  double *special_lj, const int inum,
+                  const int nall, const int max_nbors,  const int maxspecial,
+                  const double cell_size, int &gpu_mode, FILE *screen) {
+  MDPDMF.clear();
+  gpu_mode=MDPDMF.device->gpu_mode();
+  double gpu_split=MDPDMF.device->particle_split();
+  int first_gpu=MDPDMF.device->first_device();
+  int last_gpu=MDPDMF.device->last_device();
+  int world_me=MDPDMF.device->world_me();
+  int gpu_rank=MDPDMF.device->gpu_rank();
+  int procs_per_gpu=MDPDMF.device->procs_per_gpu();
+
+  MDPDMF.device->init_message(screen,"mdpd",first_gpu,last_gpu);
+
+  bool message=false;
+  if (MDPDMF.device->replica_me()==0 && screen)
+    message=true;
+
+  if (message) {
+    fprintf(screen,"Initializing Device and compiling on process 0...");
+    fflush(screen);
+  }
+
+  int init_ok=0;
+  if (world_me==0)
+    init_ok=MDPDMF.init(ntypes, cutsq, host_A_att, host_B_rep, host_gamma, host_sigma,
+                        host_cut, host_cut_r, special_lj, inum, nall, max_nbors,
+                        maxspecial, cell_size, gpu_split, screen);
+
+  MDPDMF.device->world_barrier();
+  if (message)
+    fprintf(screen,"Done.\n");
+
+  for (int i=0; i<procs_per_gpu; i++) {
+    if (message) {
+      if (last_gpu-first_gpu==0)
+        fprintf(screen,"Initializing Device %d on core %d...",first_gpu,i);
+      else
+        fprintf(screen,"Initializing Devices %d-%d on core %d...",first_gpu,
+                last_gpu,i);
+      fflush(screen);
+    }
+    if (gpu_rank==i && world_me!=0)
+      init_ok=MDPDMF.init(ntypes, cutsq, host_A_att, host_B_rep, host_gamma, host_sigma,
+                          host_cut, host_cut_r, special_lj, inum, nall, max_nbors,
+                          maxspecial, cell_size, gpu_split, screen);
+
+    MDPDMF.device->serialize_init();
+    if (message)
+      fprintf(screen,"Done.\n");
+  }
+  if (message)
+    fprintf(screen,"\n");
+
+  if (init_ok==0)
+    MDPDMF.estimate_gpu_overhead();
+  return init_ok;
+}
+
+void mdpd_gpu_clear() {
+  MDPDMF.clear();
+}
+
+int ** mdpd_gpu_compute_n(const int ago, const int inum_full, const int nall,
+                         double **host_x, int *host_type, double *sublo,
+                         double *subhi, tagint *tag, int **nspecial,
+                         tagint **special, const bool eflag, const bool vflag,
+                         const bool eatom, const bool vatom, int &host_start,
+                         int **ilist, int **jnum, const double cpu_time, bool &success,
+                         double **host_v, const double dtinvsqrt,
+                         const int seed, const int timestep,
+                         double *boxlo, double *prd) {
+  return MDPDMF.compute(ago, inum_full, nall, host_x, host_type, sublo,
+                        subhi, tag, nspecial, special, eflag, vflag, eatom,
+                        vatom, host_start, ilist, jnum, cpu_time, success,
+                        host_v, dtinvsqrt, seed, timestep, boxlo, prd);
+}
+
+void mdpd_gpu_compute(const int ago, const int inum_full, const int nall,
+                     double **host_x, int *host_type, int *ilist, int *numj,
+                     int **firstneigh, const bool eflag, const bool vflag,
+                     const bool eatom, const bool vatom, int &host_start,
+                     const double cpu_time, bool &success, tagint *tag,
+                     double **host_v, const double dtinvsqrt,
+                     const int seed, const int timestep,
+                     const int nlocal, double *boxlo, double *prd) {
+  MDPDMF.compute(ago, inum_full, nall, host_x, host_type, ilist, numj,
+                 firstneigh, eflag, vflag, eatom, vatom, host_start, cpu_time, success,
+                 tag, host_v, dtinvsqrt, seed, timestep, nlocal, boxlo, prd);
+}
+
+void mdpd_gpu_get_extra_data(double *host_rho) {
+  MDPDMF.get_extra_data(host_rho);
+}
+
+double mdpd_gpu_bytes() {
+  return MDPDMF.host_memory_usage();
+}
+
+
diff --git a/lib/gpu/lal_sph_heatconduction.cpp b/lib/gpu/lal_sph_heatconduction.cpp
new file mode 100644
index 0000000000..e8e366e93a
--- /dev/null
+++ b/lib/gpu/lal_sph_heatconduction.cpp
@@ -0,0 +1,222 @@
+/***************************************************************************
+                                sph_heatconduction.cpp
+                             -------------------
+                            Trung Nguyen (U Chicago)
+
+  Class for acceleration of the sph_heatconduction pair style.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : September 2023
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#if defined(USE_OPENCL)
+#include "sph_heatconduction_cl.h"
+#elif defined(USE_CUDART)
+const char *sph_heatconduction=0;
+#else
+#include "sph_heatconduction_cubin.h"
+#endif
+
+#include "lal_sph_heatconduction.h"
+#include <cassert>
+namespace LAMMPS_AL {
+#define SPHHeatConductionT SPHHeatConduction<numtyp, acctyp>
+
+extern Device<PRECISION,ACC_PRECISION> device;
+
+template <class numtyp, class acctyp>
+SPHHeatConductionT::SPHHeatConduction() : BaseSPH<numtyp,acctyp>(), _allocated(false) {
+  _max_dE_size = 0;
+}
+
+template <class numtyp, class acctyp>
+SPHHeatConductionT::~SPHHeatConduction() {
+  clear();
+}
+
+template <class numtyp, class acctyp>
+int SPHHeatConductionT::bytes_per_atom(const int max_nbors) const {
+  return this->bytes_per_atom_atomic(max_nbors);
+}
+
+template <class numtyp, class acctyp>
+int SPHHeatConductionT::init(const int ntypes,
+                 double **host_cutsq, double **host_cut,
+                 double **host_alpha, double* host_mass,
+                 const int dimension, double *host_special_lj,
+                 const int nlocal, const int nall,
+                 const int max_nbors, const int maxspecial,
+                 const double cell_size,
+                 const double gpu_split, FILE *_screen) {
+  const int max_shared_types=this->device->max_shared_types();
+
+  int onetype=0;
+  #ifdef USE_OPENCL
+  if (maxspecial==0)
+    for (int i=1; i<ntypes; i++)
+      for (int j=i; j<ntypes; j++)
+        if (host_cutsq[i][j]>0) {
+          if (onetype>0)
+            onetype=-1;
+          else if (onetype==0)
+            onetype=i*max_shared_types+j;
+        }
+  if (onetype<0) onetype=0;
+  #endif
+
+  int success;
+  int extra_fields = 4; // round up to accomodate quadruples of numtyp values
+                        // rho, esph
+  success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,
+                            gpu_split,_screen,sph_heatconduction,"k_sph_heatconduction",
+                            onetype,extra_fields);
+  if (success!=0)
+    return success;
+
+  // If atom type constants fit in shared memory use fast kernel
+  int lj_types=ntypes;
+  shared_types=false;
+  if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) {
+    lj_types=max_shared_types;
+    shared_types=true;
+  }
+  _lj_types=lj_types;
+
+  // Allocate a host write buffer for data initialization
+  UCL_H_Vec<numtyp> host_write(lj_types*lj_types*32,*(this->ucl_device),
+                               UCL_WRITE_ONLY);
+
+  for (int i=0; i<lj_types*lj_types; i++)
+    host_write[i]=0.0;
+
+  coeff.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
+  this->atom->type_pack4(ntypes,lj_types,coeff,host_write,host_alpha,
+                         host_cut, host_cutsq);
+
+  UCL_H_Vec<numtyp> dview_mass(ntypes, *(this->ucl_device), UCL_WRITE_ONLY);
+  for (int i = 0; i < ntypes; i++)
+    dview_mass[i] = host_mass[i];
+  mass.alloc(ntypes,*(this->ucl_device), UCL_READ_ONLY);
+  ucl_copy(mass,dview_mass,false);
+
+  UCL_H_Vec<double> dview;
+  sp_lj.alloc(4,*(this->ucl_device),UCL_READ_ONLY);
+  dview.view(host_special_lj,4,*(this->ucl_device));
+  ucl_copy(sp_lj,dview,false);
+
+  // allocate per-atom array Q
+
+  int ef_nall=nall;
+  if (ef_nall==0)
+    ef_nall=2000;
+
+  _max_dE_size=static_cast<int>(static_cast<double>(ef_nall)*1.10);
+  dE.alloc(_max_dE_size,*(this->ucl_device),UCL_READ_WRITE,UCL_READ_WRITE);
+
+  _dimension = dimension;
+
+  _allocated=true;
+  this->_max_bytes=coeff.row_bytes()+dE.row_bytes()+sp_lj.row_bytes();
+  return 0;
+}
+
+template <class numtyp, class acctyp>
+void SPHHeatConductionT::clear() {
+  if (!_allocated)
+    return;
+  _allocated=false;
+
+  coeff.clear();
+  mass.clear();
+  dE.clear();
+  sp_lj.clear();
+  this->clear_atomic();
+}
+
+template <class numtyp, class acctyp>
+double SPHHeatConductionT::host_memory_usage() const {
+  return this->host_memory_usage_atomic()+sizeof(SPHHeatConduction<numtyp,acctyp>);
+}
+
+template <class numtyp, class acctyp>
+void SPHHeatConductionT::update_dE(void **dE_ptr) {
+  *dE_ptr=dE.host.begin();
+  dE.update_host(_max_dE_size,false);
+}
+
+// ---------------------------------------------------------------------------
+// Calculate energies, forces, and torques
+// ---------------------------------------------------------------------------
+template <class numtyp, class acctyp>
+int SPHHeatConductionT::loop(const int eflag, const int vflag) {
+
+  int nall = this->atom->nall();
+
+  // Resize dE array if necessary
+  if (nall > _max_dE_size) {
+    _max_dE_size=static_cast<int>(static_cast<double>(nall)*1.10);
+    dE.resize(_max_dE_size);
+  }
+
+  // signal that we need to transfer extra data from the host
+
+  this->atom->extra_data_unavail();
+
+  numtyp4 *pextra=reinterpret_cast<numtyp4*>(&(this->atom->extra[0]));
+
+  int n = 0;
+  int nstride = 1;
+  for (int i = 0; i < nall; i++) {
+    int idx = n+i*nstride;
+    numtyp4 v;
+    v.x = rho[i];
+    v.y = esph[i];
+    v.z = 0;
+    v.w = 0;
+    pextra[idx] = v;
+  }
+  this->atom->add_extra_data();
+
+  // Compute the block size and grid size to keep all cores busy
+  const int BX=this->block_size();
+  int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
+                               (BX/this->_threads_per_atom)));
+
+
+  int ainum=this->ans->inum();
+  int nbor_pitch=this->nbor->nbor_pitch();
+  this->time_pair.start();
+  if (shared_types) {
+    this->k_pair_sel->set_size(GX,BX);
+    this->k_pair_sel->run(&this->atom->x, &this->atom->extra, &coeff, &mass, &sp_lj,
+                          &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+                          &this->ans->force, &this->ans->engv, &dE, &eflag, &vflag,
+                          &ainum, &nbor_pitch, &this->atom->v, &_dimension, &this->_threads_per_atom);
+  } else {
+    this->k_pair.set_size(GX,BX);
+    this->k_pair.run(&this->atom->x, &this->atom->extra, &coeff, &mass,
+                     &_lj_types, &sp_lj, &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+                     &this->ans->force, &this->ans->engv, &dE, &eflag, &vflag,
+                     &ainum, &nbor_pitch, &this->atom->v, &_dimension, &this->_threads_per_atom);
+  }
+
+  this->time_pair.stop();
+  return GX;
+}
+
+// ---------------------------------------------------------------------------
+// Get the extra data pointers from host
+// ---------------------------------------------------------------------------
+
+template <class numtyp, class acctyp>
+void SPHHeatConductionT::get_extra_data(double *host_rho, double *host_esph) {
+  rho = host_rho;
+  esph = host_esph;
+}
+
+template class SPHHeatConduction<PRECISION,ACC_PRECISION>;
+}
diff --git a/lib/gpu/lal_sph_heatconduction.cu b/lib/gpu/lal_sph_heatconduction.cu
new file mode 100644
index 0000000000..e2ba40db0c
--- /dev/null
+++ b/lib/gpu/lal_sph_heatconduction.cu
@@ -0,0 +1,257 @@
+// **************************************************************************
+//                             sph_heatconduction.cu
+//                             ---------------------
+//                           Trung Dac Nguyen (U Chicago)
+//
+//  Device code for acceleration of the sph/heatconduction pair style
+//
+// __________________________________________________________________________
+//    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+// __________________________________________________________________________
+//
+//    begin                : September 2023
+//    email                : ndactrung@gmail.com
+// ***************************************************************************
+
+#if defined(NV_KERNEL) || defined(USE_HIP)
+#include "lal_aux_fun1.h"
+#ifndef _DOUBLE_DOUBLE
+_texture( pos_tex,float4);
+_texture( vel_tex,float4);
+#else
+_texture_2d( pos_tex,int4);
+_texture_2d( vel_tex,int4);
+#endif
+#else
+#define pos_tex x_
+#define vel_tex v_
+#endif
+
+#if (SHUFFLE_AVAIL == 0)
+
+#define store_dE(dEacc, ii, inum, tid, t_per_atom, offset, dE)              \
+  if (t_per_atom>1) {                                                       \
+    simdsync();                                                             \
+    simd_reduce_add1(t_per_atom, red_acc, offset, tid, dEacc);              \
+  }                                                                         \
+  if (offset==0 && ii<inum) {                                               \
+    dE[ii]=dEacc;                                                           \
+  }
+#else
+#define store_drhoE(dEacc, ii, inum, tid, t_per_atom, offset, dE)           \
+  if (t_per_atom>1) {                                                       \
+    for (unsigned int s=t_per_atom/2; s>0; s>>=1) {                         \
+      dEacc += shfl_down(dEacc, s, t_per_atom);                             \
+    }                                                                       \
+  }                                                                         \
+  if (offset==0 && ii<inum) {                                               \
+    dE[ii]=dEacc;                                                           \
+  }
+#endif
+
+/* ------------------------------------------------------------------------ */
+
+__kernel void k_sph_heatconduction(const __global numtyp4 *restrict x_,
+                       const __global numtyp4 *restrict extra,
+                       const __global numtyp4 *restrict coeff,
+                       const __global numtyp *restrict mass,
+                       const int lj_types,
+                       const __global numtyp *restrict sp_lj,
+                       const __global int * dev_nbor,
+                       const __global int * dev_packed,
+                       __global acctyp3 *restrict ans,
+                       __global acctyp *restrict engv,
+                       __global acctyp *restrict dE,
+                       const int eflag, const int vflag,
+                       const int inum, const int nbor_pitch,
+                       const __global numtyp4 *restrict v_,
+                       const int dimension, const int t_per_atom) {
+  int tid, ii, offset;
+  atom_info(t_per_atom,ii,tid,offset);
+
+  int n_stride;
+#if (SHUFFLE_AVAIL == 0)
+  local_allocate_store_pair();
+#endif
+
+  acctyp dEacc = (acctyp)0;
+
+  if (ii<inum) {
+    int i, numj, nbor, nbor_end;
+    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
+              n_stride,nbor_end,nbor);
+
+    numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
+    int itype=ix.w;
+    numtyp mass_itype = mass[itype];
+
+    const numtyp4 extrai = extra[i];
+    numtyp rhoi = extrai.x;
+    numtyp esphi = extrai.y;
+
+    for ( ; nbor<nbor_end; nbor+=n_stride) {
+      ucl_prefetch(dev_packed+nbor+n_stride);
+
+      int j=dev_packed[nbor];
+      j &= NEIGHMASK;
+
+      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
+      int jtype=jx.w;
+
+      // Compute r12
+      numtyp delx = ix.x-jx.x;
+      numtyp dely = ix.y-jx.y;
+      numtyp delz = ix.z-jx.z;
+      numtyp rsq = delx*delx+dely*dely+delz*delz;
+
+      int mtype=itype*lj_types+jtype;
+      if (rsq<coeff[mtype].z) { // cutsq[itype][jtype]
+        numtyp mass_jtype = mass[jtype];
+        const numtyp coeffx=coeff[mtype].x;  // alpha[itype][jtype]
+        const numtyp coeffy=coeff[mtype].y;  // cut[itype][jtype]
+
+        const numtyp4 extraj = extra[j];
+        numtyp rhoj = extraj.x;
+        numtyp esphj = extraj.y;
+
+        numtyp h = coeffy; // cut[itype][jtype]
+        numtyp ih = ucl_recip(h); // (numtyp)1.0 / h;
+        numtyp ihsq = ih * ih;
+
+        numtyp wfd = h - ucl_sqrt(rsq);
+        if (dimension == 3) {
+          // Lucy Kernel, 3d
+          wfd = (numtyp)-25.066903536973515383 * wfd * wfd * ihsq * ihsq * ihsq * ih;
+        } else {
+          // Lucy Kernel, 2d
+          wfd = (numtyp)-19.098593171027440292 * wfd * wfd * ihsq * ihsq * ihsq;
+        }
+
+        // total thermal energy increment
+        numtyp D = coeffx; // alpha[itype][jtype]  diffusion coefficient
+        numtyp deltaE = (numtyp)2.0 * mass_itype * mass_jtype / (mass_itype + mass_jtype);
+        deltaE *= (rhoi + rhoj) / (rhoi * rhoj);
+        deltaE *= D * (esphi - esphj) * wfd;
+
+        // change in thermal energy, desph[i]
+        dEacc += deltaE;
+
+      }
+    } // for nbor
+  } // if ii
+
+  store_drhoE(dEacc,ii,inum,tid,t_per_atom,offset,dE);
+}
+
+__kernel void k_sph_heatconduction_fast(const __global numtyp4 *restrict x_,
+                            const __global numtyp4 *restrict extra,
+                            const __global numtyp4 *restrict coeff_in,
+                            const __global numtyp *restrict mass,
+                            const __global numtyp *restrict sp_lj_in,
+                            const __global int * dev_nbor,
+                            const __global int * dev_packed,
+                            __global acctyp3 *restrict ans,
+                            __global acctyp *restrict engv,
+                            __global acctyp *restrict dE,
+                            const int eflag, const int vflag,
+                            const int inum, const int nbor_pitch,
+                            const __global numtyp4 *restrict v_,
+                            const int dimension, const int t_per_atom) {
+  int tid, ii, offset;
+  atom_info(t_per_atom,ii,tid,offset);
+
+  #ifndef ONETYPE
+  __local numtyp4 coeff[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
+  if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES) {
+    coeff[tid]=coeff_in[tid];
+  }
+  __syncthreads();
+  #else
+  const numtyp coeffx=coeff_in[ONETYPE].x;   // alpha[itype][jtype]
+  const numtyp coeffy=coeff_in[ONETYPE].y;   // cut[itype][jtype]
+  const numtyp cutsq_p=coeff_in[ONETYPE].z;  // cutsq[itype][jtype]
+  #endif
+
+  int n_stride;
+#if (SHUFFLE_AVAIL == 0)
+  local_allocate_store_pair();
+#endif
+
+  acctyp dEacc = (acctyp)0;
+
+  if (ii<inum) {
+    int i, numj, nbor, nbor_end;
+    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
+              n_stride,nbor_end,nbor);
+
+    numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
+    int iw=ix.w;
+    numtyp mass_itype = mass[iw];
+    #ifndef ONETYPE
+    int itype=fast_mul((int)MAX_SHARED_TYPES,iw);
+    #endif
+
+    const numtyp4 extrai = extra[i];
+    numtyp rhoi = extrai.x;
+    numtyp esphi = extrai.y;
+
+    for ( ; nbor<nbor_end; nbor+=n_stride) {
+      ucl_prefetch(dev_packed+nbor+n_stride);
+
+      int j=dev_packed[nbor];
+      #ifndef ONETYPE
+      j &= NEIGHMASK;
+      #endif
+
+      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
+      int jtype = jx.w;
+      #ifndef ONETYPE
+      int mtype=itype+jx.w;
+      const numtyp cutsq_p=coeff[mtype].z;
+      #endif
+
+      // Compute r12
+      numtyp delx = ix.x-jx.x;
+      numtyp dely = ix.y-jx.y;
+      numtyp delz = ix.z-jx.z;
+      numtyp rsq = delx*delx+dely*dely+delz*delz;
+
+      if (rsq<cutsq_p) {
+        numtyp mass_jtype = mass[jtype];
+        #ifndef ONETYPE
+        const numtyp coeffx=coeff[mtype].x;  // alpha[itype][jtype]
+        const numtyp coeffy=coeff[mtype].y;  // cut[itype][jtype]
+        #endif
+        const numtyp4 extraj = extra[j];
+        numtyp rhoj = extraj.x;
+        numtyp esphj = extraj.y;
+
+        numtyp h = coeffy; // cut[itype][jtype]
+        numtyp ih = ih = ucl_recip(h); // (numtyp)1.0 / h;
+        numtyp ihsq = ih * ih;
+
+        numtyp wfd = h - ucl_sqrt(rsq);
+        if (dimension == 3) {
+          // Lucy Kernel, 3d
+          wfd = (numtyp)-25.066903536973515383 * wfd * wfd * ihsq * ihsq * ihsq * ih;
+        } else {
+          // Lucy Kernel, 2d
+          wfd = (numtyp)-19.098593171027440292 * wfd * wfd * ihsq * ihsq * ihsq;
+        }
+
+        // total thermal energy increment
+        numtyp D = coeffx; // alpha[itype][jtype]  diffusion coefficient
+        numtyp deltaE = (numtyp)2.0 * mass_itype * mass_jtype / (mass_itype + mass_jtype);
+        deltaE *= (rhoi + rhoj) / (rhoi * rhoj);
+        deltaE *= D * (esphi - esphj) * wfd;
+
+        // change in thermal energy, desph[i]
+        dEacc += deltaE;
+
+      }
+    } // for nbor
+  } // if ii
+
+  store_drhoE(dEacc,ii,inum,tid,t_per_atom,offset,dE);
+}
+
diff --git a/lib/gpu/lal_sph_heatconduction.h b/lib/gpu/lal_sph_heatconduction.h
new file mode 100644
index 0000000000..cd7a46e3bd
--- /dev/null
+++ b/lib/gpu/lal_sph_heatconduction.h
@@ -0,0 +1,95 @@
+/***************************************************************************
+                             sph_heatconduction.h
+                             --------------------
+                            Trung Nguyen (U Chicago)
+
+  Class for acceleration of the sph heatconduction pair style.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : December 2023
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#ifndef LAL_SPH_HEATCONDUCTION_H
+#define LAL_SPH_HEATCONDUCTION_H
+
+#include "lal_base_sph.h"
+
+namespace LAMMPS_AL {
+
+template <class numtyp, class acctyp>
+class SPHHeatConduction : public BaseSPH<numtyp, acctyp> {
+ public:
+  SPHHeatConduction();
+  ~SPHHeatConduction();
+
+  /// Clear any previous data and set up for a new LAMMPS run
+  /** \param max_nbors initial number of rows in the neighbor matrix
+    * \param cell_size cutoff + skin
+    * \param gpu_split fraction of particles handled by device
+    *
+    * Returns:
+    * -  0 if successful
+    * - -1 if fix gpu not found
+    * - -3 if there is an out of memory error
+    * - -4 if the GPU library was not compiled for GPU
+    * - -5 Double precision is not supported on card **/
+  int init(const int ntypes, double **host_cutsq,
+           double** host_cut, double **host_alpha, double *host_mass,
+           const int dimension,  double *host_special_lj,
+           const int nlocal, const int nall, const int max_nbors,
+           const int maxspecial, const double cell_size,
+           const double gpu_split, FILE *screen);
+
+  /// Clear all host and device data
+  /** \note This is called at the beginning of the init() routine **/
+  void clear();
+
+  /// Returns memory usage on device per atom
+  int bytes_per_atom(const int max_nbors) const;
+
+  /// Total host memory used by library for pair style
+  double host_memory_usage() const;
+
+  void get_extra_data(double *host_rho, double *host_esph);
+
+  /// copy desph from device to host
+  void update_dE(void **dE_ptr);
+
+  // --------------------------- TYPE DATA --------------------------
+
+  /// coeff.x = alpha, coeff.y = cut, coeff.z = cutsq
+  UCL_D_Vec<numtyp4> coeff;
+
+  /// per-type coeffs
+  UCL_D_Vec<numtyp> mass;
+
+  /// Special LJ values
+  UCL_D_Vec<numtyp> sp_lj;
+
+  /// If atom type constants fit in shared memory, use fast kernels
+  bool shared_types;
+
+  /// Number of atom types
+  int _lj_types;
+
+  /// Per-atom arrays
+  UCL_Vector<acctyp,acctyp> dE;
+  int _max_dE_size;
+
+  int _dimension;
+
+  /// pointer to host data
+  double *rho, *esph, *cv;
+
+ private:
+  bool _allocated;
+  int loop(const int eflag, const int vflag);
+};
+
+}
+
+#endif
diff --git a/lib/gpu/lal_sph_heatconduction_ext.cpp b/lib/gpu/lal_sph_heatconduction_ext.cpp
new file mode 100644
index 0000000000..645480154c
--- /dev/null
+++ b/lib/gpu/lal_sph_heatconduction_ext.cpp
@@ -0,0 +1,129 @@
+/***************************************************************************
+                             sph_heatconduction_ext.cpp
+                             --------------------------
+                            Trung Dac Nguyen (U Chicago)
+
+  Functions for LAMMPS access to sph/heatconduction acceleration routines.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : December 2023
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#include <iostream>
+#include <cassert>
+#include <cmath>
+
+#include "lal_sph_heatconduction.h"
+
+using namespace std;
+using namespace LAMMPS_AL;
+
+static SPHHeatConduction<PRECISION,ACC_PRECISION> SPHHeatConductionMF;
+
+// ---------------------------------------------------------------------------
+// Allocate memory on host and device and copy constants to device
+// ---------------------------------------------------------------------------
+int sph_heatconduction_gpu_init(const int ntypes, double **cutsq, double** host_cut,
+                    double **host_alpha, double* host_mass, const int dimension,
+                    double *special_lj, const int inum, const int nall,
+                    const int max_nbors,  const int maxspecial,
+                    const double cell_size, int &gpu_mode, FILE *screen) {
+  SPHHeatConductionMF.clear();
+  gpu_mode=SPHHeatConductionMF.device->gpu_mode();
+  double gpu_split=SPHHeatConductionMF.device->particle_split();
+  int first_gpu=SPHHeatConductionMF.device->first_device();
+  int last_gpu=SPHHeatConductionMF.device->last_device();
+  int world_me=SPHHeatConductionMF.device->world_me();
+  int gpu_rank=SPHHeatConductionMF.device->gpu_rank();
+  int procs_per_gpu=SPHHeatConductionMF.device->procs_per_gpu();
+
+  SPHHeatConductionMF.device->init_message(screen,"sph_heatconduction",first_gpu,last_gpu);
+
+  bool message=false;
+  if (SPHHeatConductionMF.device->replica_me()==0 && screen)
+    message=true;
+
+  if (message) {
+    fprintf(screen,"Initializing Device and compiling on process 0...");
+    fflush(screen);
+  }
+
+  int init_ok=0;
+  if (world_me==0)
+    init_ok=SPHHeatConductionMF.init(ntypes, cutsq, host_cut, host_alpha, host_mass,
+                         dimension, special_lj, inum, nall, max_nbors,  maxspecial,
+                         cell_size, gpu_split, screen);
+
+  SPHHeatConductionMF.device->world_barrier();
+  if (message)
+    fprintf(screen,"Done.\n");
+
+  for (int i=0; i<procs_per_gpu; i++) {
+    if (message) {
+      if (last_gpu-first_gpu==0)
+        fprintf(screen,"Initializing Device %d on core %d...",first_gpu,i);
+      else
+        fprintf(screen,"Initializing Devices %d-%d on core %d...",first_gpu,
+                last_gpu,i);
+      fflush(screen);
+    }
+    if (gpu_rank==i && world_me!=0)
+      init_ok=SPHHeatConductionMF.init(ntypes, cutsq, host_cut, host_alpha, host_mass,
+                           dimension, special_lj, inum, nall, max_nbors, maxspecial,
+                           cell_size, gpu_split, screen);
+
+    SPHHeatConductionMF.device->serialize_init();
+    if (message)
+      fprintf(screen,"Done.\n");
+  }
+  if (message)
+    fprintf(screen,"\n");
+
+  if (init_ok==0)
+    SPHHeatConductionMF.estimate_gpu_overhead();
+  return init_ok;
+}
+
+void sph_heatconduction_gpu_clear() {
+  SPHHeatConductionMF.clear();
+}
+
+int ** sph_heatconduction_gpu_compute_n(const int ago, const int inum_full, const int nall,
+                            double **host_x, int *host_type, double *sublo,
+                            double *subhi, tagint *host_tag, int **nspecial,
+                            tagint **special, const bool eflag, const bool vflag,
+                            const bool eatom, const bool vatom, int &host_start,
+                            int **ilist, int **jnum, const double cpu_time, bool &success,
+                            double **host_v) {
+  return SPHHeatConductionMF.compute(ago, inum_full, nall, host_x, host_type, sublo,
+                         subhi, host_tag, nspecial, special, eflag, vflag,
+                         eatom, vatom, host_start, ilist, jnum, cpu_time, success,
+                         host_v);
+}
+
+void sph_heatconduction_gpu_compute(const int ago, const int inum_full, const int nall,
+                        double **host_x, int *host_type, int *ilist, int *numj,
+                        int **firstneigh, const bool eflag, const bool vflag,
+                        const bool eatom, const bool vatom, int &host_start,
+                        const double cpu_time, bool &success, tagint *host_tag,
+                        double **host_v) {
+  SPHHeatConductionMF.compute(ago, inum_full, nall, host_x, host_type, ilist, numj,
+                  firstneigh, eflag, vflag, eatom, vatom, host_start, cpu_time, success,
+                  host_tag, host_v);
+}
+
+void sph_heatconduction_gpu_get_extra_data(double *host_rho, double *host_esph) {
+  SPHHeatConductionMF.get_extra_data(host_rho, host_esph);
+}
+
+void sph_heatconduction_gpu_update_dE(void **dE_ptr) {
+  SPHHeatConductionMF.update_dE(dE_ptr);
+}
+
+double sph_heatconduction_gpu_bytes() {
+  return SPHHeatConductionMF.host_memory_usage();
+}
diff --git a/lib/gpu/lal_sph_lj.cpp b/lib/gpu/lal_sph_lj.cpp
new file mode 100644
index 0000000000..66c2a5c302
--- /dev/null
+++ b/lib/gpu/lal_sph_lj.cpp
@@ -0,0 +1,222 @@
+/***************************************************************************
+                                   sph_lj.cpp
+                             -------------------
+                            Trung Nguyen (U Chicago)
+
+  Class for acceleration of the sph_lj pair style.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : September 2023
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#if defined(USE_OPENCL)
+#include "sph_lj_cl.h"
+#elif defined(USE_CUDART)
+const char *sph_lj=0;
+#else
+#include "sph_lj_cubin.h"
+#endif
+
+#include "lal_sph_lj.h"
+#include <cassert>
+namespace LAMMPS_AL {
+#define SPHLJT SPHLJ<numtyp, acctyp>
+
+extern Device<PRECISION,ACC_PRECISION> device;
+
+template <class numtyp, class acctyp>
+SPHLJT::SPHLJ() : BaseSPH<numtyp,acctyp>(), _allocated(false) {
+  _max_drhoE_size = 0;
+}
+
+template <class numtyp, class acctyp>
+SPHLJT::~SPHLJ() {
+  clear();
+}
+
+template <class numtyp, class acctyp>
+int SPHLJT::bytes_per_atom(const int max_nbors) const {
+  return this->bytes_per_atom_atomic(max_nbors);
+}
+
+template <class numtyp, class acctyp>
+int SPHLJT::init(const int ntypes,
+                 double **host_cutsq, double **host_cut,
+                 double **host_viscosity, double* host_mass,
+                 const int dimension, double *host_special_lj,
+                 const int nlocal, const int nall,
+                 const int max_nbors, const int maxspecial,
+                 const double cell_size,
+                 const double gpu_split, FILE *_screen) {
+  const int max_shared_types=this->device->max_shared_types();
+
+  int onetype=0;
+  #ifdef USE_OPENCL
+  if (maxspecial==0)
+    for (int i=1; i<ntypes; i++)
+      for (int j=i; j<ntypes; j++)
+        if (host_cutsq[i][j]>0) {
+          if (onetype>0)
+            onetype=-1;
+          else if (onetype==0)
+            onetype=i*max_shared_types+j;
+        }
+  if (onetype<0) onetype=0;
+  #endif
+
+  int success;
+  int extra_fields = 4; // round up to accomodate quadruples of numtyp values
+                        // rho, cv
+  success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,
+                            gpu_split,_screen,sph_lj,"k_sph_lj",onetype,extra_fields);
+  if (success!=0)
+    return success;
+
+  // If atom type constants fit in shared memory use fast kernel
+  int lj_types=ntypes;
+  shared_types=false;
+  if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) {
+    lj_types=max_shared_types;
+    shared_types=true;
+  }
+  _lj_types=lj_types;
+
+  // Allocate a host write buffer for data initialization
+  UCL_H_Vec<numtyp> host_write(lj_types*lj_types*32,*(this->ucl_device),
+                               UCL_WRITE_ONLY);
+
+  for (int i=0; i<lj_types*lj_types; i++)
+    host_write[i]=0.0;
+
+  coeff.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
+  this->atom->type_pack4(ntypes,lj_types,coeff,host_write,host_viscosity,
+                         host_cut, host_cutsq);
+
+  UCL_H_Vec<numtyp> dview_mass(ntypes, *(this->ucl_device), UCL_WRITE_ONLY);
+  for (int i = 0; i < ntypes; i++)
+    dview_mass[i] = host_mass[i];
+  mass.alloc(ntypes,*(this->ucl_device), UCL_READ_ONLY);
+  ucl_copy(mass,dview_mass,false);
+
+  UCL_H_Vec<double> dview;
+  sp_lj.alloc(4,*(this->ucl_device),UCL_READ_ONLY);
+  dview.view(host_special_lj,4,*(this->ucl_device));
+  ucl_copy(sp_lj,dview,false);
+
+  // allocate per-atom array Q
+
+  int ef_nall=nall;
+  if (ef_nall==0)
+    ef_nall=2000;
+
+  _max_drhoE_size=static_cast<int>(static_cast<double>(ef_nall)*1.10);
+  drhoE.alloc(_max_drhoE_size*2,*(this->ucl_device),UCL_READ_WRITE,UCL_READ_WRITE);
+
+  _dimension = dimension;
+
+  _allocated=true;
+  this->_max_bytes=coeff.row_bytes()+drhoE.row_bytes()+sp_lj.row_bytes();
+  return 0;
+}
+
+template <class numtyp, class acctyp>
+void SPHLJT::clear() {
+  if (!_allocated)
+    return;
+  _allocated=false;
+
+  coeff.clear();
+  mass.clear();
+  drhoE.clear();
+  sp_lj.clear();
+  this->clear_atomic();
+}
+
+template <class numtyp, class acctyp>
+double SPHLJT::host_memory_usage() const {
+  return this->host_memory_usage_atomic()+sizeof(SPHLJ<numtyp,acctyp>);
+}
+
+template <class numtyp, class acctyp>
+void SPHLJT::update_drhoE(void **drhoE_ptr) {
+  *drhoE_ptr=drhoE.host.begin();
+  drhoE.update_host(_max_drhoE_size*2,false);
+}
+
+// ---------------------------------------------------------------------------
+// Calculate energies, forces, and torques
+// ---------------------------------------------------------------------------
+template <class numtyp, class acctyp>
+int SPHLJT::loop(const int eflag, const int vflag) {
+
+  int nall = this->atom->nall();
+
+  // Resize drhoE array if necessary
+  if (nall > _max_drhoE_size) {
+    _max_drhoE_size=static_cast<int>(static_cast<double>(nall)*1.10);
+    drhoE.resize(_max_drhoE_size*2);
+  }
+
+  // signal that we need to transfer extra data from the host
+
+  this->atom->extra_data_unavail();
+
+  numtyp4 *pextra=reinterpret_cast<numtyp4*>(&(this->atom->extra[0]));
+
+  int n = 0;
+  int nstride = 1;
+  for (int i = 0; i < nall; i++) {
+    int idx = n+i*nstride;
+    numtyp4 v;
+    v.x = rho[i];
+    v.y = esph[i];
+    v.z = cv[i];
+    v.w = 0;
+    pextra[idx] = v;
+  }
+  this->atom->add_extra_data();
+
+  // Compute the block size and grid size to keep all cores busy
+  const int BX=this->block_size();
+  int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
+                               (BX/this->_threads_per_atom)));
+
+
+  int ainum=this->ans->inum();
+  int nbor_pitch=this->nbor->nbor_pitch();
+  this->time_pair.start();
+  if (shared_types) {
+    this->k_pair_sel->set_size(GX,BX);
+    this->k_pair_sel->run(&this->atom->x, &this->atom->extra, &coeff, &mass, &sp_lj,
+                          &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+                          &this->ans->force, &this->ans->engv, &drhoE, &eflag, &vflag,
+                          &ainum, &nbor_pitch, &this->atom->v, &_dimension, &this->_threads_per_atom);
+  } else {
+    this->k_pair.set_size(GX,BX);
+    this->k_pair.run(&this->atom->x, &this->atom->extra, &coeff, &mass,
+                     &_lj_types, &sp_lj, &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+                     &this->ans->force, &this->ans->engv, &drhoE, &eflag, &vflag,
+                     &ainum, &nbor_pitch, &this->atom->v, &_dimension, &this->_threads_per_atom);
+  }
+
+  this->time_pair.stop();
+  return GX;
+}
+
+// ---------------------------------------------------------------------------
+// Get the extra data pointers from host
+// ---------------------------------------------------------------------------
+
+template <class numtyp, class acctyp>
+void SPHLJT::get_extra_data(double *host_rho, double *host_esph, double *host_cv) {
+  rho = host_rho;
+  esph = host_esph;
+  cv = host_cv;
+}
+
+template class SPHLJ<PRECISION,ACC_PRECISION>;
+}
diff --git a/lib/gpu/lal_sph_lj.cu b/lib/gpu/lal_sph_lj.cu
new file mode 100644
index 0000000000..f376dfc533
--- /dev/null
+++ b/lib/gpu/lal_sph_lj.cu
@@ -0,0 +1,426 @@
+// **************************************************************************
+//                                 sph_lj.cu
+//                             -------------------
+//                           Trung Dac Nguyen (U Chicago)
+//
+//  Device code for acceleration of the sph/lj pair style
+//
+// __________________________________________________________________________
+//    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+// __________________________________________________________________________
+//
+//    begin                : September 2023
+//    email                : ndactrung@gmail.com
+// ***************************************************************************
+
+#if defined(NV_KERNEL) || defined(USE_HIP)
+#include "lal_aux_fun1.h"
+#ifndef _DOUBLE_DOUBLE
+_texture( pos_tex,float4);
+_texture( vel_tex,float4);
+#else
+_texture_2d( pos_tex,int4);
+_texture_2d( vel_tex,int4);
+#endif
+#else
+#define pos_tex x_
+#define vel_tex v_
+#endif
+
+#if (SHUFFLE_AVAIL == 0)
+
+#define store_drhoE(drhoEacc, ii, inum, tid, t_per_atom, offset, drhoE)      \
+  if (t_per_atom>1) {                                                        \
+    simdsync();                                                              \
+    simd_reduce_add2(t_per_atom, red_acc, offset, tid,                       \
+                     drhoEacc.x, drhoEacc.y);                                \
+  }                                                                          \
+  if (offset==0 && ii<inum) {                                                \
+    drhoE[ii]=drhoEacc;                                                      \
+  }
+#else
+#define store_drhoE(drhoEacc, ii, inum, tid, t_per_atom, offset, drhoE)     \
+  if (t_per_atom>1) {                                                       \
+    for (unsigned int s=t_per_atom/2; s>0; s>>=1) {                         \
+      drhoEacc.x += shfl_down(drhoEacc.x, s, t_per_atom);                   \
+      drhoEacc.y += shfl_down(drhoEacc.y, s, t_per_atom);                   \
+    }                                                                       \
+  }                                                                         \
+  if (offset==0 && ii<inum) {                                               \
+    drhoE[ii]=drhoEacc;                                                     \
+  }
+#endif
+
+/* ------------------------------------------------------------------------ */
+/* Lennard-Jones EOS,
+   Francis H. Ree
+   "Analytic representation of thermodynamic data for the Lennard‐Jones fluid",
+   Journal of Chemical Physics 73 pp. 5401-5403 (1980)
+   return p = pc[0], c = pc[1]
+*/
+
+ucl_inline void LJEOS2(const numtyp rho, const numtyp e, const numtyp cv, numtyp pc[2])
+{
+  numtyp T = e/cv;
+  numtyp beta = ucl_recip(T); // (numtyp)1.0 / T;
+  numtyp beta_sqrt = ucl_sqrt(beta);
+  numtyp x = rho * ucl_sqrt(beta_sqrt);
+
+  numtyp xsq = x * x;
+  numtyp xpow3 = xsq * x;
+  numtyp xpow4 = xsq * xsq;
+
+  /* differential of Helmholtz free energy w.r.t. x */
+  numtyp diff_A_NkT = (numtyp)3.629 + (numtyp)7.264*x -
+    beta*((numtyp)3.492 - (numtyp)18.698*x + (numtyp)35.505*xsq - (numtyp)31.816*xpow3 +
+    (numtyp)11.195*xpow4) - beta_sqrt*((numtyp)5.369 + (numtyp)13.16*x +
+    (numtyp)18.525*xsq - (numtyp)17.076*xpow3 + (numtyp)9.32*xpow4) +
+    (numtyp)10.4925*xsq + (numtyp)11.46*xpow3 + (numtyp)2.176*xpow4*xpow4*x;
+
+  /* differential of Helmholtz free energy w.r.t. x^2 */
+  numtyp d2A_dx2 = (numtyp)7.264 + (numtyp)20.985*x +
+     beta*((numtyp)18.698 - (numtyp)71.01*x + (numtyp)95.448*xsq - (numtyp)44.78*xpow3) -
+     beta_sqrt*((numtyp)13.16 + (numtyp)37.05*x - (numtyp)51.228*xsq + (numtyp)37.28*xpow3) +
+     (numtyp)34.38*xsq + (numtyp)19.584*xpow4*xpow4;
+
+  // p = rho k T * (1 + rho * d(A/(NkT))/drho)
+  // dx/drho = rho/x
+  pc[0] = rho * T * ((numtyp)1.0 + diff_A_NkT * x); // pressure
+  numtyp csq = T * ((numtyp)1.0 + (numtyp)2.0 * diff_A_NkT * x + d2A_dx2 * x * x); // soundspeed squared
+  if (csq > (numtyp)0.0) {
+    pc[1] = ucl_sqrt(csq); // soundspeed
+  } else {
+    pc[1] = (numtyp)0.0;
+  }
+}
+
+
+__kernel void k_sph_lj(const __global numtyp4 *restrict x_,
+                       const __global numtyp4 *restrict extra,
+                       const __global numtyp4 *restrict coeff,
+                       const __global numtyp *restrict mass,
+                       const int lj_types,
+                       const __global numtyp *restrict sp_lj,
+                       const __global int * dev_nbor,
+                       const __global int * dev_packed,
+                       __global acctyp3 *restrict ans,
+                       __global acctyp *restrict engv,
+                       __global acctyp2 *restrict drhoE,
+                       const int eflag, const int vflag,
+                       const int inum, const int nbor_pitch,
+                       const __global numtyp4 *restrict v_,
+                       const int dimension, const int t_per_atom) {
+  int tid, ii, offset;
+  atom_info(t_per_atom,ii,tid,offset);
+
+  int n_stride;
+  local_allocate_store_pair();
+
+  acctyp3 f;
+  f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
+  acctyp energy, virial[6];
+  if (EVFLAG) {
+    energy=(acctyp)0;
+    for (int i=0; i<6; i++) virial[i]=(acctyp)0;
+  }
+  acctyp2 drhoEacc;
+  drhoEacc.x = drhoEacc.x = (acctyp)0;
+
+  if (ii<inum) {
+    int i, numj, nbor, nbor_end;
+    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
+              n_stride,nbor_end,nbor);
+
+    numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
+    int itype=ix.w;
+    numtyp mass_itype = mass[itype];
+    numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i];
+
+    const numtyp4 extrai = extra[i];
+    numtyp rhoi = extrai.x;
+    numtyp esphi = extrai.y;
+    numtyp cvi = extrai.z;
+
+    // compute pressure of particle i with LJ EOS
+    numtyp fci[2];
+    LJEOS2(rhoi, esphi, cvi, fci);
+    numtyp fi = fci[0];
+    numtyp ci = fci[1];
+    fi /= (rhoi * rhoi);
+
+    for ( ; nbor<nbor_end; nbor+=n_stride) {
+      ucl_prefetch(dev_packed+nbor+n_stride);
+
+      int j=dev_packed[nbor];
+      j &= NEIGHMASK;
+
+      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
+      int jtype=jx.w;
+      numtyp4 jv; fetch4(jv,j,vel_tex); //v_[j];
+
+      // Compute r12
+      numtyp delx = ix.x-jx.x;
+      numtyp dely = ix.y-jx.y;
+      numtyp delz = ix.z-jx.z;
+      numtyp rsq = delx*delx+dely*dely+delz*delz;
+
+      int mtype=itype*lj_types+jtype;
+      if (rsq<coeff[mtype].z) { // cutsq[itype][jtype]
+        numtyp mass_jtype = mass[jtype];
+        const numtyp coeffx=coeff[mtype].x;  // viscosity[itype][jtype]
+        const numtyp coeffy=coeff[mtype].y;  // cut[itype][jtype]
+
+        const numtyp4 extraj = extra[j];
+        numtyp rhoj = extraj.x;
+        numtyp esphj = extraj.y;
+        numtyp cvj = extraj.z;
+
+        numtyp h = coeffy; // cut[itype][jtype]
+        numtyp ih = ucl_recip(h); // (numtyp)1.0 / h;
+        numtyp ihsq = ih * ih;
+        numtyp ihcub = ihsq * ih;
+
+        numtyp wfd = h - ucl_sqrt(rsq);
+        if (dimension == 3) {
+          // Lucy Kernel, 3d
+          wfd = (numtyp)-25.066903536973515383 * wfd * wfd * ihsq * ihsq * ihsq * ih;
+        } else {
+          // Lucy Kernel, 2d
+          wfd = (numtyp)-19.098593171027440292 * wfd * wfd * ihsq * ihsq * ihsq;
+        }
+
+        // function call to LJ EOS
+        numtyp fcj[2];
+        LJEOS2(rhoj, esphj, cvj, fcj);
+        numtyp fj = fcj[0];
+        numtyp cj = fcj[1];
+        fj /= (rhoj * rhoj);
+
+        // apply long-range correction to model a LJ fluid with cutoff
+        // this implies that the modelled LJ fluid has cutoff == SPH cutoff
+        numtyp lrc = (numtyp)-11.1701 * (ihcub * ihcub * ihcub - (numtyp)1.5 * ihcub);
+        fi += lrc;
+        fj += lrc;
+
+        // dot product of velocity delta and distance vector
+        numtyp delvx = iv.x - jv.x;
+        numtyp delvy = iv.y - jv.y;
+        numtyp delvz = iv.z - jv.z;
+        numtyp delVdotDelR = delx*delvx + dely*delvy + delz*delvz;
+
+        // artificial viscosity (Monaghan 1992)
+        numtyp fvisc = (numtyp)0;
+        if (delVdotDelR < (numtyp)0) {
+          numtyp mu = h * delVdotDelR / (rsq + (numtyp)0.01 * h * h);
+          fvisc = -coeffx * (ci + cj) * mu / (rhoi + rhoj); // viscosity[itype][jtype]
+        }
+
+        // total pair force & thermal energy increment
+        numtyp force = -mass_itype * mass_jtype * (fi + fj + fvisc) * wfd;
+        numtyp deltaE = (numtyp)-0.5 * force * delVdotDelR;
+
+        f.x+=delx*force;
+        f.y+=dely*force;
+        f.z+=delz*force;
+
+        // and change in density, drho[i]
+        drhoEacc.x += mass_jtype * delVdotDelR * wfd;
+
+        // change in thermal energy, desph[i]
+        drhoEacc.y += deltaE;
+
+        if (EVFLAG && eflag) {
+          numtyp e = (numtyp)0;
+          energy+=e;
+        }
+        if (EVFLAG && vflag) {
+          virial[0] += delx*delx*force;
+          virial[1] += dely*dely*force;
+          virial[2] += delz*delz*force;
+          virial[3] += delx*dely*force;
+          virial[4] += delx*delz*force;
+          virial[5] += dely*delz*force;
+        }
+      }
+    } // for nbor
+  } // if ii
+  store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag,
+                ans,engv);
+  store_drhoE(drhoEacc,ii,inum,tid,t_per_atom,offset,drhoE);
+}
+
+__kernel void k_sph_lj_fast(const __global numtyp4 *restrict x_,
+                            const __global numtyp4 *restrict extra,
+                            const __global numtyp4 *restrict coeff_in,
+                            const __global numtyp *restrict mass,
+                            const __global numtyp *restrict sp_lj_in,
+                            const __global int * dev_nbor,
+                            const __global int * dev_packed,
+                            __global acctyp3 *restrict ans,
+                            __global acctyp *restrict engv,
+                            __global acctyp2 *restrict drhoE,
+                            const int eflag, const int vflag,
+                            const int inum, const int nbor_pitch,
+                            const __global numtyp4 *restrict v_,
+                            const int dimension, const int t_per_atom) {
+  int tid, ii, offset;
+  atom_info(t_per_atom,ii,tid,offset);
+
+  #ifndef ONETYPE
+  __local numtyp4 coeff[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
+  if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES) {
+    coeff[tid]=coeff_in[tid];
+  }
+  __syncthreads();
+  #else
+  const numtyp coeffx=coeff_in[ONETYPE].x;   // viscosity[itype][jtype]
+  const numtyp coeffy=coeff_in[ONETYPE].y;   // cut[itype][jtype]
+  const numtyp cutsq_p=coeff_in[ONETYPE].z;  // cutsq[itype][jtype]
+  #endif
+
+  int n_stride;
+  local_allocate_store_pair();
+
+  acctyp3 f;
+  f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
+  acctyp energy, virial[6];
+  if (EVFLAG) {
+    energy=(acctyp)0;
+    for (int i=0; i<6; i++) virial[i]=(acctyp)0;
+  }
+  acctyp2 drhoEacc;
+  drhoEacc.x = drhoEacc.x = (acctyp)0;
+
+  if (ii<inum) {
+    int i, numj, nbor, nbor_end;
+    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
+              n_stride,nbor_end,nbor);
+
+    numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
+    int iw=ix.w;
+    numtyp mass_itype = mass[iw];
+    #ifndef ONETYPE
+    int itype=fast_mul((int)MAX_SHARED_TYPES,iw);
+    #endif
+    numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i];
+
+    const numtyp4 extrai = extra[i];
+    numtyp rhoi = extrai.x;
+    numtyp esphi = extrai.y;
+    numtyp cvi = extrai.z;
+
+    // compute pressure of particle i with LJ EOS
+    numtyp fci[2];
+    LJEOS2(rhoi, esphi, cvi, fci);
+    numtyp fi = fci[0];
+    numtyp ci = fci[1];
+    fi /= (rhoi * rhoi);
+
+    for ( ; nbor<nbor_end; nbor+=n_stride) {
+      ucl_prefetch(dev_packed+nbor+n_stride);
+
+      int j=dev_packed[nbor];
+      #ifndef ONETYPE
+      j &= NEIGHMASK;
+      #endif
+
+      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
+      int jtype = jx.w;
+      #ifndef ONETYPE
+      int mtype=itype+jx.w;
+      const numtyp cutsq_p=coeff[mtype].z; // cutsq[itype][jtype];
+      #endif
+      numtyp4 jv; fetch4(jv,j,vel_tex); //v_[j];
+
+      // Compute r12
+      numtyp delx = ix.x-jx.x;
+      numtyp dely = ix.y-jx.y;
+      numtyp delz = ix.z-jx.z;
+      numtyp rsq = delx*delx+dely*dely+delz*delz;
+
+      if (rsq<cutsq_p) {
+        numtyp mass_jtype = mass[jtype];
+        #ifndef ONETYPE
+        const numtyp coeffx=coeff[mtype].x;  // viscosity[itype][jtype]
+        const numtyp coeffy=coeff[mtype].y;  // cut[itype][jtype]
+        #endif
+        const numtyp4 extraj = extra[j];
+        numtyp rhoj = extraj.x;
+        numtyp esphj = extraj.y;
+        numtyp cvj = extraj.z;
+
+        numtyp h = coeffy; // cut[itype][jtype]
+        numtyp ih = ucl_recip(h); // (numtyp)1.0 / h;
+        numtyp ihsq = ih * ih;
+        numtyp ihcub = ihsq * ih;
+
+        numtyp wfd = h - ucl_sqrt(rsq);
+        if (dimension == 3) {
+          // Lucy Kernel, 3d
+          wfd = (numtyp)-25.066903536973515383 * wfd * wfd * ihsq * ihsq * ihsq * ih;
+        } else {
+          // Lucy Kernel, 2d
+          wfd = (numtyp)-19.098593171027440292 * wfd * wfd * ihsq * ihsq * ihsq;
+        }
+
+        // function call to LJ EOS
+        numtyp fcj[2];
+        LJEOS2(rhoj, esphj, cvj, fcj);
+        numtyp fj = fcj[0];
+        numtyp cj = fcj[1];
+        fj /= (rhoj * rhoj);
+
+        // apply long-range correction to model a LJ fluid with cutoff
+        // this implies that the modelled LJ fluid has cutoff == SPH cutoff
+        numtyp lrc = (numtyp)-11.1701 * (ihcub * ihcub * ihcub - (numtyp)1.5 * ihcub);
+        fi += lrc;
+        fj += lrc;
+
+        // dot product of velocity delta and distance vector
+        numtyp delvx = iv.x - jv.x;
+        numtyp delvy = iv.y - jv.y;
+        numtyp delvz = iv.z - jv.z;
+        numtyp delVdotDelR = delx*delvx + dely*delvy + delz*delvz;
+
+        // artificial viscosity (Monaghan 1992)
+        numtyp fvisc = (numtyp)0;
+        if (delVdotDelR < (numtyp)0) {
+          numtyp mu = h * delVdotDelR / (rsq + (numtyp)0.01 * h * h);
+          fvisc = -coeffx * (ci + cj) * mu / (rhoi + rhoj); // viscosity[itype][jtype]
+        }
+
+        // total pair force & thermal energy increment
+        numtyp force = -mass_itype * mass_jtype * (fi + fj + fvisc) * wfd;
+        numtyp deltaE = (numtyp)-0.5 * force * delVdotDelR;
+
+        f.x+=delx*force;
+        f.y+=dely*force;
+        f.z+=delz*force;
+
+        // and change in density, drho[i]
+        drhoEacc.x += mass_jtype * delVdotDelR * wfd;
+
+        // change in thermal energy, desph[i]
+        drhoEacc.y += deltaE;
+
+        if (EVFLAG && eflag) {
+          numtyp e = (numtyp)0;
+          energy+=e;
+        }
+        if (EVFLAG && vflag) {
+          virial[0] += delx*delx*force;
+          virial[1] += dely*dely*force;
+          virial[2] += delz*delz*force;
+          virial[3] += delx*dely*force;
+          virial[4] += delx*delz*force;
+          virial[5] += dely*delz*force;
+        }
+
+      }
+    } // for nbor
+  } // if ii
+
+  store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, ans,engv);
+  store_drhoE(drhoEacc,ii,inum,tid,t_per_atom,offset,drhoE);
+}
+
diff --git a/lib/gpu/lal_sph_lj.h b/lib/gpu/lal_sph_lj.h
new file mode 100644
index 0000000000..e79c2ba265
--- /dev/null
+++ b/lib/gpu/lal_sph_lj.h
@@ -0,0 +1,96 @@
+/***************************************************************************
+                                 sph_lj.h
+                             -------------------
+                            Trung Nguyen (U Chicago)
+
+  Class for acceleration of the sph lj pair style.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : December 2023
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#ifndef LAL_SPH_LJ_H
+#define LAL_SPH_LJ_H
+
+#include "lal_base_sph.h"
+
+namespace LAMMPS_AL {
+
+template <class numtyp, class acctyp>
+class SPHLJ : public BaseSPH<numtyp, acctyp> {
+ public:
+  SPHLJ();
+  ~SPHLJ();
+
+  /// Clear any previous data and set up for a new LAMMPS run
+  /** \param max_nbors initial number of rows in the neighbor matrix
+    * \param cell_size cutoff + skin
+    * \param gpu_split fraction of particles handled by device
+    *
+    * Returns:
+    * -  0 if successful
+    * - -1 if fix gpu not found
+    * - -3 if there is an out of memory error
+    * - -4 if the GPU library was not compiled for GPU
+    * - -5 Double precision is not supported on card **/
+  int init(const int ntypes, double **host_cutsq,
+           double** host_cut, double **host_viscosity, double *host_mass,
+           const int dimension,
+           double *host_special_lj, const int nlocal, const int nall, const int max_nbors,
+           const int maxspecial, const double cell_size, const double gpu_split,
+           FILE *screen);
+
+  /// Clear all host and device data
+  /** \note This is called at the beginning of the init() routine **/
+  void clear();
+
+  /// Returns memory usage on device per atom
+  int bytes_per_atom(const int max_nbors) const;
+
+  /// Total host memory used by library for pair style
+  double host_memory_usage() const;
+
+  void get_extra_data(double *host_rho, double *host_esph,
+                      double *host_cv);
+
+  /// copy drho and desph from device to host
+  void update_drhoE(void **drhoE_ptr);
+
+  // --------------------------- TYPE DATA --------------------------
+
+  /// coeff.x = viscosity, coeff.y = cut, coeff.z = cutsq
+  UCL_D_Vec<numtyp4> coeff;
+
+  /// per-type coeffs
+  UCL_D_Vec<numtyp> mass;
+
+  /// Special LJ values
+  UCL_D_Vec<numtyp> sp_lj;
+
+  /// If atom type constants fit in shared memory, use fast kernels
+  bool shared_types;
+
+  /// Number of atom types
+  int _lj_types;
+
+  /// Per-atom arrays
+  UCL_Vector<acctyp,acctyp> drhoE;
+  int _max_drhoE_size;
+
+  int _dimension;
+
+  /// pointer to host data
+  double *rho, *esph, *cv;
+
+ private:
+  bool _allocated;
+  int loop(const int eflag, const int vflag);
+};
+
+}
+
+#endif
diff --git a/lib/gpu/lal_sph_lj_ext.cpp b/lib/gpu/lal_sph_lj_ext.cpp
new file mode 100644
index 0000000000..ba88dc4b19
--- /dev/null
+++ b/lib/gpu/lal_sph_lj_ext.cpp
@@ -0,0 +1,129 @@
+/***************************************************************************
+                                 sph_lj_ext.cpp
+                             -------------------
+                            Trung Dac Nguyen (U Chicago)
+
+  Functions for LAMMPS access to sph/lj acceleration routines.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : December 2023
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#include <iostream>
+#include <cassert>
+#include <cmath>
+
+#include "lal_sph_lj.h"
+
+using namespace std;
+using namespace LAMMPS_AL;
+
+static SPHLJ<PRECISION,ACC_PRECISION> SPHLJMF;
+
+// ---------------------------------------------------------------------------
+// Allocate memory on host and device and copy constants to device
+// ---------------------------------------------------------------------------
+int sph_lj_gpu_init(const int ntypes, double **cutsq, double** host_cut,
+                    double **host_viscosity, double* host_mass, const int dimension,
+                    double *special_lj, const int inum, const int nall,
+                    const int max_nbors,  const int maxspecial,
+                    const double cell_size, int &gpu_mode, FILE *screen) {
+  SPHLJMF.clear();
+  gpu_mode=SPHLJMF.device->gpu_mode();
+  double gpu_split=SPHLJMF.device->particle_split();
+  int first_gpu=SPHLJMF.device->first_device();
+  int last_gpu=SPHLJMF.device->last_device();
+  int world_me=SPHLJMF.device->world_me();
+  int gpu_rank=SPHLJMF.device->gpu_rank();
+  int procs_per_gpu=SPHLJMF.device->procs_per_gpu();
+
+  SPHLJMF.device->init_message(screen,"sph_lj",first_gpu,last_gpu);
+
+  bool message=false;
+  if (SPHLJMF.device->replica_me()==0 && screen)
+    message=true;
+
+  if (message) {
+    fprintf(screen,"Initializing Device and compiling on process 0...");
+    fflush(screen);
+  }
+
+  int init_ok=0;
+  if (world_me==0)
+    init_ok=SPHLJMF.init(ntypes, cutsq, host_cut, host_viscosity, host_mass,
+                         dimension, special_lj, inum, nall, max_nbors,  maxspecial,
+                         cell_size, gpu_split, screen);
+
+  SPHLJMF.device->world_barrier();
+  if (message)
+    fprintf(screen,"Done.\n");
+
+  for (int i=0; i<procs_per_gpu; i++) {
+    if (message) {
+      if (last_gpu-first_gpu==0)
+        fprintf(screen,"Initializing Device %d on core %d...",first_gpu,i);
+      else
+        fprintf(screen,"Initializing Devices %d-%d on core %d...",first_gpu,
+                last_gpu,i);
+      fflush(screen);
+    }
+    if (gpu_rank==i && world_me!=0)
+      init_ok=SPHLJMF.init(ntypes, cutsq, host_cut, host_viscosity, host_mass,
+                           dimension, special_lj, inum, nall, max_nbors, maxspecial,
+                           cell_size, gpu_split, screen);
+
+    SPHLJMF.device->serialize_init();
+    if (message)
+      fprintf(screen,"Done.\n");
+  }
+  if (message)
+    fprintf(screen,"\n");
+
+  if (init_ok==0)
+    SPHLJMF.estimate_gpu_overhead();
+  return init_ok;
+}
+
+void sph_lj_gpu_clear() {
+  SPHLJMF.clear();
+}
+
+int ** sph_lj_gpu_compute_n(const int ago, const int inum_full, const int nall,
+                            double **host_x, int *host_type, double *sublo,
+                            double *subhi, tagint *host_tag, int **nspecial,
+                            tagint **special, const bool eflag, const bool vflag,
+                            const bool eatom, const bool vatom, int &host_start,
+                            int **ilist, int **jnum, const double cpu_time, bool &success,
+                            double **host_v) {
+  return SPHLJMF.compute(ago, inum_full, nall, host_x, host_type, sublo,
+                         subhi, host_tag, nspecial, special, eflag, vflag,
+                         eatom, vatom, host_start, ilist, jnum, cpu_time, success,
+                         host_v);
+}
+
+void sph_lj_gpu_compute(const int ago, const int inum_full, const int nall,
+                        double **host_x, int *host_type, int *ilist, int *numj,
+                        int **firstneigh, const bool eflag, const bool vflag,
+                        const bool eatom, const bool vatom, int &host_start,
+                        const double cpu_time, bool &success, tagint *host_tag,
+                        double **host_v) {
+  SPHLJMF.compute(ago, inum_full, nall, host_x, host_type, ilist, numj,
+                  firstneigh, eflag, vflag, eatom, vatom, host_start, cpu_time, success,
+                  host_tag, host_v);
+}
+
+void sph_lj_gpu_get_extra_data(double *host_rho, double *host_esph, double *host_cv) {
+  SPHLJMF.get_extra_data(host_rho, host_esph, host_cv);
+}
+
+void sph_lj_gpu_update_drhoE(void **drhoE_ptr) {
+  SPHLJMF.update_drhoE(drhoE_ptr);
+}
+
+double sph_lj_gpu_bytes() {
+  return SPHLJMF.host_memory_usage();
+}
diff --git a/lib/gpu/lal_sph_taitwater.cpp b/lib/gpu/lal_sph_taitwater.cpp
new file mode 100644
index 0000000000..7a584d435e
--- /dev/null
+++ b/lib/gpu/lal_sph_taitwater.cpp
@@ -0,0 +1,225 @@
+/***************************************************************************
+                              sph_taitwater.cpp
+                             -------------------
+                            Trung Dac Nguyen (U Chicago)
+
+  Class for acceleration of the sph/taitwater pair style.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : December 2023
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#if defined(USE_OPENCL)
+#include "sph_taitwater_cl.h"
+#elif defined(USE_CUDART)
+const char *sph_taitwater=0;
+#else
+#include "sph_taitwater_cubin.h"
+#endif
+
+#include "lal_sph_taitwater.h"
+#include <cassert>
+namespace LAMMPS_AL {
+#define SPHTaitwaterT SPHTaitwater<numtyp, acctyp>
+
+extern Device<PRECISION,ACC_PRECISION> device;
+
+template <class numtyp, class acctyp>
+SPHTaitwaterT::SPHTaitwater() : BaseSPH<numtyp,acctyp>(), _allocated(false) {
+  _max_drhoE_size = 0;
+}
+
+template <class numtyp, class acctyp>
+SPHTaitwaterT::~SPHTaitwater() {
+  clear();
+}
+
+template <class numtyp, class acctyp>
+int SPHTaitwaterT::bytes_per_atom(const int max_nbors) const {
+  return this->bytes_per_atom_atomic(max_nbors);
+}
+
+template <class numtyp, class acctyp>
+int SPHTaitwaterT::init(const int ntypes, double **host_cutsq,
+                        double **host_cut, double **host_viscosity,
+                        double* host_mass, double* host_rho0,
+                        double* host_soundspeed, double* host_B, const int dimension,
+                        double *host_special_lj, const int nlocal, const int nall,
+                        const int max_nbors, const int maxspecial,
+                        const double cell_size,
+                        const double gpu_split, FILE *_screen) {
+  const int max_shared_types=this->device->max_shared_types();
+
+  int onetype=0;
+  #ifdef USE_OPENCL
+  if (maxspecial==0)
+    for (int i=1; i<ntypes; i++)
+      for (int j=i; j<ntypes; j++)
+        if (host_cutsq[i][j]>0) {
+          if (onetype>0)
+            onetype=-1;
+          else if (onetype==0)
+            onetype=i*max_shared_types+j;
+        }
+  if (onetype<0) onetype=0;
+  #endif
+
+  int success;
+  int extra_fields = 4; // round up to accomodate quadruples of numtyp values
+                        // rho
+  success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,
+                            gpu_split,_screen,sph_taitwater,"k_sph_taitwater",
+                            onetype,extra_fields);
+  if (success!=0)
+    return success;
+
+  // If atom type constants fit in shared memory use fast kernel
+  int lj_types=ntypes;
+  shared_types=false;
+  if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) {
+    lj_types=max_shared_types;
+    shared_types=true;
+  }
+  _lj_types=lj_types;
+
+  // Allocate a host write buffer for data initialization
+  UCL_H_Vec<numtyp> host_write(lj_types*lj_types*32,*(this->ucl_device),
+                               UCL_WRITE_ONLY);
+
+  for (int i=0; i<lj_types*lj_types; i++)
+    host_write[i]=0.0;
+
+  coeff.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
+  this->atom->type_pack4(ntypes,lj_types,coeff,host_write,host_viscosity,
+                         host_cut, host_cutsq);
+
+  UCL_H_Vec<numtyp4> dview_coeff2(ntypes, *(this->ucl_device), UCL_WRITE_ONLY);
+  for (int i = 0; i < ntypes; i++) {
+    dview_coeff2[i].x = host_mass[i];
+    dview_coeff2[i].y = host_rho0[i];
+    dview_coeff2[i].z = host_soundspeed[i];
+    dview_coeff2[i].w = host_B[i];
+  }
+  coeff2.alloc(ntypes,*(this->ucl_device), UCL_READ_ONLY);
+  ucl_copy(coeff2,dview_coeff2,false);
+
+  UCL_H_Vec<double> dview;
+  sp_lj.alloc(4,*(this->ucl_device),UCL_READ_ONLY);
+  dview.view(host_special_lj,4,*(this->ucl_device));
+  ucl_copy(sp_lj,dview,false);
+
+  // allocate per-atom array Q
+
+  int ef_nall=nall;
+  if (ef_nall==0)
+    ef_nall=2000;
+
+  _max_drhoE_size=static_cast<int>(static_cast<double>(ef_nall)*1.10);
+  drhoE.alloc(_max_drhoE_size*2,*(this->ucl_device),UCL_READ_WRITE,UCL_READ_WRITE);
+
+  _dimension = dimension;
+
+  _allocated=true;
+  this->_max_bytes=coeff.row_bytes()+coeff2.row_bytes()+drhoE.row_bytes()+sp_lj.row_bytes();
+  return 0;
+}
+
+template <class numtyp, class acctyp>
+void SPHTaitwaterT::clear() {
+  if (!_allocated)
+    return;
+  _allocated=false;
+
+  coeff.clear();
+  coeff2.clear();
+  drhoE.clear();
+  sp_lj.clear();
+  this->clear_atomic();
+}
+
+template <class numtyp, class acctyp>
+double SPHTaitwaterT::host_memory_usage() const {
+  return this->host_memory_usage_atomic()+sizeof(SPHTaitwater<numtyp,acctyp>);
+}
+
+template <class numtyp, class acctyp>
+void SPHTaitwaterT::update_drhoE(void **drhoE_ptr) {
+  *drhoE_ptr=drhoE.host.begin();
+  drhoE.update_host(_max_drhoE_size*2,false);
+}
+
+// ---------------------------------------------------------------------------
+// Calculate energies, forces, and torques
+// ---------------------------------------------------------------------------
+template <class numtyp, class acctyp>
+int SPHTaitwaterT::loop(const int eflag, const int vflag) {
+
+  int nall = this->atom->nall();
+
+  // Resize drhoE array if necessary
+  if (nall > _max_drhoE_size) {
+    _max_drhoE_size=static_cast<int>(static_cast<double>(nall)*1.10);
+    drhoE.resize(_max_drhoE_size*2);
+  }
+
+  // signal that we need to transfer extra data from the host
+
+  this->atom->extra_data_unavail();
+
+  numtyp4 *pextra=reinterpret_cast<numtyp4*>(&(this->atom->extra[0]));
+
+  int n = 0;
+  int nstride = 1;
+  for (int i = 0; i < nall; i++) {
+    int idx = n+i*nstride;
+    numtyp4 v;
+    v.x = rho[i];
+    v.y = 0;
+    v.z = 0;
+    v.w = 0;
+    pextra[idx] = v;
+  }
+  this->atom->add_extra_data();
+
+  // Compute the block size and grid size to keep all cores busy
+  const int BX=this->block_size();
+  int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
+                               (BX/this->_threads_per_atom)));
+
+
+  int ainum=this->ans->inum();
+  int nbor_pitch=this->nbor->nbor_pitch();
+  this->time_pair.start();
+  if (shared_types) {
+    this->k_pair_sel->set_size(GX,BX);
+    this->k_pair_sel->run(&this->atom->x, &this->atom->extra, &coeff, &coeff2, &sp_lj,
+                          &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+                          &this->ans->force, &this->ans->engv, &drhoE, &eflag, &vflag,
+                          &ainum, &nbor_pitch, &this->atom->v, &_dimension, &this->_threads_per_atom);
+  } else {
+    this->k_pair.set_size(GX,BX);
+    this->k_pair.run(&this->atom->x, &this->atom->extra, &coeff, &coeff2,
+                     &_lj_types, &sp_lj, &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+                     &this->ans->force, &this->ans->engv, &drhoE, &eflag, &vflag,
+                     &ainum, &nbor_pitch, &this->atom->v, &_dimension, &this->_threads_per_atom);
+  }
+
+  this->time_pair.stop();
+  return GX;
+}
+
+// ---------------------------------------------------------------------------
+// Get the extra data pointers from host
+// ---------------------------------------------------------------------------
+
+template <class numtyp, class acctyp>
+void SPHTaitwaterT::get_extra_data(double *host_rho) {
+  rho = host_rho;
+}
+
+template class SPHTaitwater<PRECISION,ACC_PRECISION>;
+}
diff --git a/lib/gpu/lal_sph_taitwater.cu b/lib/gpu/lal_sph_taitwater.cu
new file mode 100644
index 0000000000..9424d58996
--- /dev/null
+++ b/lib/gpu/lal_sph_taitwater.cu
@@ -0,0 +1,377 @@
+// **************************************************************************
+//                              sph_taitwater.cu
+//                             -------------------
+//                           Trung Dac Nguyen (U Chicago)
+//
+//  Device code for acceleration of the sph/taitwater pair style
+//
+// __________________________________________________________________________
+//    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+// __________________________________________________________________________
+//
+//    begin                : September 2023
+//    email                : ndactrung@gmail.com
+// ***************************************************************************
+
+#if defined(NV_KERNEL) || defined(USE_HIP)
+#include "lal_aux_fun1.h"
+#ifndef _DOUBLE_DOUBLE
+_texture( pos_tex,float4);
+_texture( vel_tex,float4);
+#else
+_texture_2d( pos_tex,int4);
+_texture_2d( vel_tex,int4);
+#endif
+#else
+#define pos_tex x_
+#define vel_tex v_
+#endif
+
+#if (SHUFFLE_AVAIL == 0)
+
+#define store_drhoE(drhoEacc, ii, inum, tid, t_per_atom, offset, drhoE)      \
+  if (t_per_atom>1) {                                                        \
+    simdsync();                                                              \
+    simd_reduce_add2(t_per_atom, red_acc, offset, tid,                       \
+                     drhoEacc.x, drhoEacc.y);                                \
+  }                                                                          \
+  if (offset==0 && ii<inum) {                                                \
+    drhoE[ii]=drhoEacc;                                                      \
+  }
+#else
+#define store_drhoE(drhoEacc, ii, inum, tid, t_per_atom, offset, drhoE)     \
+  if (t_per_atom>1) {                                                       \
+    for (unsigned int s=t_per_atom/2; s>0; s>>=1) {                         \
+      drhoEacc.x += shfl_down(drhoEacc.x, s, t_per_atom);                   \
+      drhoEacc.y += shfl_down(drhoEacc.y, s, t_per_atom);                   \
+    }                                                                       \
+  }                                                                         \
+  if (offset==0 && ii<inum) {                                               \
+    drhoE[ii]=drhoEacc;                                                     \
+  }
+#endif
+
+__kernel void k_sph_taitwater(const __global numtyp4 *restrict x_,
+                              const __global numtyp4 *restrict extra,
+                              const __global numtyp4 *restrict coeff,
+                              const __global numtyp4 *restrict coeff2,
+                              const int lj_types,
+                              const __global numtyp *restrict sp_lj,
+                              const __global int * dev_nbor,
+                              const __global int * dev_packed,
+                              __global acctyp3 *restrict ans,
+                              __global acctyp *restrict engv,
+                              __global acctyp2 *restrict drhoE,
+                              const int eflag, const int vflag,
+                              const int inum, const int nbor_pitch,
+                              const __global numtyp4 *restrict v_,
+                              const int dimension, const int t_per_atom) {
+  int tid, ii, offset;
+  atom_info(t_per_atom,ii,tid,offset);
+
+  int n_stride;
+  local_allocate_store_pair();
+
+  acctyp3 f;
+  f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
+  acctyp energy, virial[6];
+  if (EVFLAG) {
+    energy=(acctyp)0;
+    for (int i=0; i<6; i++) virial[i]=(acctyp)0;
+  }
+  acctyp2 drhoEacc;
+  drhoEacc.x = drhoEacc.x = (acctyp)0;
+
+  if (ii<inum) {
+    int i, numj, nbor, nbor_end;
+    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
+              n_stride,nbor_end,nbor);
+
+    numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
+    int itype=ix.w;
+    numtyp mass_itype = coeff2[itype].x;
+    numtyp rho0_itype = coeff2[itype].y;
+    numtyp soundspeed_itype = coeff2[itype].z;
+    numtyp B_itype = coeff2[itype].w;
+    numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i];
+
+    const numtyp4 extrai = extra[i];
+    numtyp rhoi = extrai.x;
+
+    // compute pressure of atom i with Tait EOS
+    numtyp tmp = rhoi / rho0_itype;
+    numtyp fi = tmp * tmp * tmp;
+    fi = B_itype * (fi * fi * tmp - (numtyp)1.0);
+    fi /= (rhoi * rhoi);
+
+    for ( ; nbor<nbor_end; nbor+=n_stride) {
+      ucl_prefetch(dev_packed+nbor+n_stride);
+
+      int j=dev_packed[nbor];
+      j &= NEIGHMASK;
+
+      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
+      int jtype=jx.w;
+      numtyp4 jv; fetch4(jv,j,vel_tex); //v_[j];
+
+      // Compute r12
+      numtyp delx = ix.x-jx.x;
+      numtyp dely = ix.y-jx.y;
+      numtyp delz = ix.z-jx.z;
+      numtyp rsq = delx*delx+dely*dely+delz*delz;
+
+      int mtype=itype*lj_types+jtype;
+      if (rsq<coeff[mtype].z) { // cutsq[itype][jtype]
+        const numtyp coeffx=coeff[mtype].x;  // viscosity[itype][jtype]
+        const numtyp coeffy=coeff[mtype].y;  // cut[itype][jtype]
+
+        numtyp mass_jtype = coeff2[jtype].x;
+        numtyp rho0_jtype = coeff2[jtype].y;
+        numtyp soundspeed_jtype = coeff2[jtype].z;
+        numtyp B_jtype = coeff2[jtype].w;
+
+        const numtyp4 extraj = extra[j];
+        numtyp rhoj = extraj.x;
+
+        numtyp h = coeffy; // cut[itype][jtype]
+        numtyp ih = ucl_recip(h); // (numtyp)1.0 / h;
+        numtyp ihsq = ih * ih;
+
+        numtyp wfd = h - ucl_sqrt(rsq);
+        if (dimension == 3) {
+          // Lucy Kernel, 3d
+          wfd = (numtyp)-25.066903536973515383 * wfd * wfd * ihsq * ihsq * ihsq * ih;
+        } else {
+          // Lucy Kernel, 2d
+          wfd = (numtyp)-19.098593171027440292 * wfd * wfd * ihsq * ihsq * ihsq;
+        }
+
+        // compute pressure  of atom j with Tait EOS
+
+        numtyp tmp = rhoj / rho0_jtype;
+        numtyp fj = tmp * tmp * tmp;
+        fj = B_jtype * (fj * fj * tmp - (numtyp)1.0);
+        fj /= (rhoj * rhoj);
+
+        // dot product of velocity delta and distance vector
+        numtyp delvx = iv.x - jv.x;
+        numtyp delvy = iv.y - jv.y;
+        numtyp delvz = iv.z - jv.z;
+        numtyp delVdotDelR = delx*delvx + dely*delvy + delz*delvz;
+
+        // artificial viscosity (Monaghan 1992)
+        numtyp fvisc = (numtyp)0;
+        if (delVdotDelR < (numtyp)0) {
+          numtyp mu = h * delVdotDelR / (rsq + (numtyp)0.01 * h * h);
+          fvisc = -coeffx * (soundspeed_itype
+              + soundspeed_jtype) * mu / (rhoi + rhoj);
+        }
+
+        // total pair force & thermal energy increment
+        numtyp force = -mass_itype * mass_jtype * (fi + fj + fvisc) * wfd;
+        numtyp deltaE = (numtyp)-0.5 * force * delVdotDelR;
+
+        f.x+=delx*force;
+        f.y+=dely*force;
+        f.z+=delz*force;
+
+        // and change in density, drho[i]
+        drhoEacc.x += mass_jtype* delVdotDelR * wfd;
+
+        // change in thermal energy, desph[i]
+        drhoEacc.y += deltaE;
+
+        if (EVFLAG && eflag) {
+          numtyp e = (numtyp)0;
+          energy+=e;
+        }
+        if (EVFLAG && vflag) {
+          virial[0] += delx*delx*force;
+          virial[1] += dely*dely*force;
+          virial[2] += delz*delz*force;
+          virial[3] += delx*dely*force;
+          virial[4] += delx*delz*force;
+          virial[5] += dely*delz*force;
+        }
+      }
+    } // for nbor
+  } // if ii
+  store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag,
+                ans,engv);
+  store_drhoE(drhoEacc,ii,inum,tid,t_per_atom,offset,drhoE);
+}
+
+__kernel void k_sph_taitwater_fast(const __global numtyp4 *restrict x_,
+                                   const __global numtyp4 *restrict extra,
+                                   const __global numtyp4 *restrict coeff_in,
+                                   const __global numtyp4 *restrict coeff2_in,
+                                   const __global numtyp *restrict sp_lj_in,
+                                   const __global int * dev_nbor,
+                                   const __global int * dev_packed,
+                                   __global acctyp3 *restrict ans,
+                                   __global acctyp *restrict engv,
+                                   __global acctyp2 *restrict drhoE,
+                                   const int eflag, const int vflag,
+                                   const int inum, const int nbor_pitch,
+                                   const __global numtyp4 *restrict v_,
+                                   const int dimension, const int t_per_atom) {
+  int tid, ii, offset;
+  atom_info(t_per_atom,ii,tid,offset);
+
+  #ifndef ONETYPE
+  __local numtyp4 coeff[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
+  __local numtyp4 coeff2[MAX_SHARED_TYPES];
+  if (tid<MAX_SHARED_TYPES) {
+    coeff2[tid] = coeff2_in[tid];
+  }
+  if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES) {
+    coeff[tid]=coeff_in[tid];
+  }
+  __syncthreads();
+  #else
+  const numtyp coeffx=coeff_in[ONETYPE].x;   // viscosity[itype][jtype]
+  const numtyp coeffy=coeff_in[ONETYPE].y;   // cut[itype][jtype]
+  const numtyp cutsq_p=coeff_in[ONETYPE].z;  // cutsq[itype][jtype]
+  #endif
+
+  int n_stride;
+  local_allocate_store_pair();
+
+  acctyp3 f;
+  f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
+  acctyp energy, virial[6];
+  if (EVFLAG) {
+    energy=(acctyp)0;
+    for (int i=0; i<6; i++) virial[i]=(acctyp)0;
+  }
+  acctyp2 drhoEacc;
+  drhoEacc.x = drhoEacc.x = (acctyp)0;
+
+  if (ii<inum) {
+    int i, numj, nbor, nbor_end;
+    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
+              n_stride,nbor_end,nbor);
+
+    numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
+    int iw=ix.w;
+    numtyp mass_itype = coeff2[iw].x;
+    numtyp rho0_itype = coeff2[iw].y;
+    numtyp soundspeed_itype = coeff2[iw].z;
+    numtyp B_itype = coeff2[iw].w;
+    #ifndef ONETYPE
+    int itype=fast_mul((int)MAX_SHARED_TYPES,iw);
+    #endif
+    numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i];
+
+    const numtyp4 extrai = extra[i];
+    numtyp rhoi = extrai.x;
+
+    // compute pressure of atom i with Tait EOS
+    numtyp tmp = rhoi / rho0_itype;
+    numtyp fi = tmp * tmp * tmp;
+    fi = B_itype * (fi * fi * tmp - (numtyp)1.0);
+    fi /= (rhoi * rhoi);
+
+    for ( ; nbor<nbor_end; nbor+=n_stride) {
+      ucl_prefetch(dev_packed+nbor+n_stride);
+
+      int j=dev_packed[nbor];
+      #ifndef ONETYPE
+      j &= NEIGHMASK;
+      #endif
+
+      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
+      int jtype=jx.w;
+      #ifndef ONETYPE
+      int mtype=itype+jx.w;
+      const numtyp cutsq_p=coeff[mtype].z;
+      #endif
+      numtyp4 jv; fetch4(jv,j,vel_tex); //v_[j];
+
+      // Compute r12
+      numtyp delx = ix.x-jx.x;
+      numtyp dely = ix.y-jx.y;
+      numtyp delz = ix.z-jx.z;
+      numtyp rsq = delx*delx+dely*dely+delz*delz;
+
+      if (rsq<cutsq_p) {
+        #ifndef ONETYPE
+        const numtyp coeffx=coeff[mtype].x;  // viscosity[itype][jtype]
+        const numtyp coeffy=coeff[mtype].y;  // cut[itype][jtype]
+        #endif
+
+        numtyp mass_jtype = coeff2[jtype].x;
+        numtyp rho0_jtype = coeff2[jtype].y;
+        numtyp soundspeed_jtype = coeff2[jtype].z;
+        numtyp B_jtype = coeff2[jtype].w;
+
+        const numtyp4 extraj = extra[j];
+        numtyp rhoj = extraj.x;
+
+        numtyp h = coeffy; // cut[itype][jtype]
+        numtyp ih = ucl_recip(h); // (numtyp)1.0 / h;
+        numtyp ihsq = ih * ih;
+
+        numtyp wfd = h - ucl_sqrt(rsq);
+        if (dimension == 3) {
+          // Lucy Kernel, 3d
+          wfd = (numtyp)-25.066903536973515383 * wfd * wfd * ihsq * ihsq * ihsq * ih;
+        } else {
+          // Lucy Kernel, 2d
+          wfd = (numtyp)-19.098593171027440292 * wfd * wfd * ihsq * ihsq * ihsq;
+        }
+
+        // compute pressure  of atom j with Tait EOS
+        numtyp tmp = rhoj / rho0_jtype;
+        numtyp fj = tmp * tmp * tmp;
+        fj = B_jtype * (fj * fj * tmp - (numtyp)1.0);
+        fj /= (rhoj * rhoj);
+
+        // dot product of velocity delta and distance vector
+        numtyp delvx = iv.x - jv.x;
+        numtyp delvy = iv.y - jv.y;
+        numtyp delvz = iv.z - jv.z;
+        numtyp delVdotDelR = delx*delvx + dely*delvy + delz*delvz;
+
+        // artificial viscosity (Monaghan 1992)
+        numtyp fvisc = (numtyp)0;
+        if (delVdotDelR < (numtyp)0) {
+          numtyp mu = h * delVdotDelR / (rsq + (numtyp)0.01 * h * h);
+          fvisc = -coeffx * (soundspeed_itype
+              + soundspeed_jtype) * mu / (rhoi + rhoj);
+        }
+
+        // total pair force & thermal energy increment
+        numtyp force = -mass_itype * mass_jtype * (fi + fj + fvisc) * wfd;
+        numtyp deltaE = (numtyp)-0.5 * force * delVdotDelR;
+
+        f.x+=delx*force;
+        f.y+=dely*force;
+        f.z+=delz*force;
+
+        // and change in density
+        drhoEacc.x += mass_jtype * delVdotDelR * wfd;
+
+        // change in thermal energy
+        drhoEacc.y += deltaE;
+
+        if (EVFLAG && eflag) {
+          numtyp e = (numtyp)0;
+          energy+=e;
+        }
+        if (EVFLAG && vflag) {
+          virial[0] += delx*delx*force;
+          virial[1] += dely*dely*force;
+          virial[2] += delz*delz*force;
+          virial[3] += delx*dely*force;
+          virial[4] += delx*delz*force;
+          virial[5] += dely*delz*force;
+        }
+      }
+    } // for nbor
+  } // if ii
+
+  store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, ans,engv);
+  store_drhoE(drhoEacc,ii,inum,tid,t_per_atom,offset,drhoE);
+}
+
diff --git a/lib/gpu/lal_sph_taitwater.h b/lib/gpu/lal_sph_taitwater.h
new file mode 100644
index 0000000000..f3edbe5c27
--- /dev/null
+++ b/lib/gpu/lal_sph_taitwater.h
@@ -0,0 +1,96 @@
+/***************************************************************************
+                              sph_taitwater.h
+                             -------------------
+                            Trung Dac Nguyen (U Chicago)
+
+  Class for acceleration of the sph/taitwater pair style.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : December 2023
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#ifndef LAL_SPH_TAITWATER_H
+#define LAL_SPH_TAITWATER_H
+
+#include "lal_base_sph.h"
+
+namespace LAMMPS_AL {
+
+template <class numtyp, class acctyp>
+class SPHTaitwater : public BaseSPH<numtyp, acctyp> {
+ public:
+  SPHTaitwater();
+  ~SPHTaitwater();
+
+  /// Clear any previous data and set up for a new LAMMPS run
+  /** \param max_nbors initial number of rows in the neighbor matrix
+    * \param cell_size cutoff + skin
+    * \param gpu_split fraction of particles handled by device
+    *
+    * Returns:
+    * -  0 if successful
+    * - -1 if fix gpu not found
+    * - -3 if there is an out of memory error
+    * - -4 if the GPU library was not compiled for GPU
+    * - -5 Double precision is not supported on card **/
+  int init(const int ntypes, double **host_cutsq,
+           double** host_cut, double **host_viscosity, double *host_mass,
+           double* host_rho0, double* host_soundspeed, double* host_B,
+           const int dimension, double *host_special_lj,
+           const int nlocal, const int nall, const int max_nbors,
+           const int maxspecial, const double cell_size,
+           const double gpu_split, FILE *screen);
+
+  /// Clear all host and device data
+  /** \note This is called at the beginning of the init() routine **/
+  void clear();
+
+  /// Returns memory usage on device per atom
+  int bytes_per_atom(const int max_nbors) const;
+
+  /// Total host memory used by library for pair style
+  double host_memory_usage() const;
+
+  void get_extra_data(double *host_rho);
+
+  /// copy drho and desph from device to host
+  void update_drhoE(void **drhoE_ptr);
+
+  // --------------------------- TYPE DATA --------------------------
+
+  /// per-pair coeffs: coeff.x = viscosity, coeff.y = cut, coeff.z = cutsq
+  UCL_D_Vec<numtyp4> coeff;
+
+  /// per-type coeffs
+  UCL_D_Vec<numtyp4> coeff2;
+
+  /// Special LJ values
+  UCL_D_Vec<numtyp> sp_lj;
+
+  /// If atom type constants fit in shared memory, use fast kernels
+  bool shared_types;
+
+  /// Number of atom types
+  int _lj_types;
+
+  /// Per-atom arrays
+  UCL_Vector<acctyp,acctyp> drhoE;
+  int _max_drhoE_size;
+
+  int _dimension;
+
+  /// pointer to host data
+  double *rho;
+
+ private:
+  bool _allocated;
+  int loop(const int eflag, const int vflag);
+};
+
+}
+
+#endif
diff --git a/lib/gpu/lal_sph_taitwater_ext.cpp b/lib/gpu/lal_sph_taitwater_ext.cpp
new file mode 100644
index 0000000000..8372132213
--- /dev/null
+++ b/lib/gpu/lal_sph_taitwater_ext.cpp
@@ -0,0 +1,133 @@
+/***************************************************************************
+                             sph_taitwater_ext.cpp
+                             -------------------
+                            Trung Dac Nguyen (U Chicago)
+
+  Functions for LAMMPS access to sph taitwater acceleration routines.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : December 2023
+    email                : ndactrung@gmail.com
+ ***************************************************************************/
+
+#include <iostream>
+#include <cassert>
+#include <cmath>
+
+#include "lal_sph_taitwater.h"
+
+using namespace std;
+using namespace LAMMPS_AL;
+
+static SPHTaitwater<PRECISION,ACC_PRECISION> SPHTaitwaterMF;
+
+// ---------------------------------------------------------------------------
+// Allocate memory on host and device and copy constants to device
+// ---------------------------------------------------------------------------
+int sph_taitwater_gpu_init(const int ntypes, double **cutsq, double** host_cut,
+                           double **host_viscosity, double* host_mass,
+                           double* host_rho0, double* host_soundspeed, double* host_B,
+                           const int dimension, double *special_lj,
+                           const int inum, const int nall,
+                           const int max_nbors,  const int maxspecial,
+                           const double cell_size, int &gpu_mode, FILE *screen) {
+  SPHTaitwaterMF.clear();
+  gpu_mode=SPHTaitwaterMF.device->gpu_mode();
+  double gpu_split=SPHTaitwaterMF.device->particle_split();
+  int first_gpu=SPHTaitwaterMF.device->first_device();
+  int last_gpu=SPHTaitwaterMF.device->last_device();
+  int world_me=SPHTaitwaterMF.device->world_me();
+  int gpu_rank=SPHTaitwaterMF.device->gpu_rank();
+  int procs_per_gpu=SPHTaitwaterMF.device->procs_per_gpu();
+
+  SPHTaitwaterMF.device->init_message(screen,"sph_taitwater",first_gpu,last_gpu);
+
+  bool message=false;
+  if (SPHTaitwaterMF.device->replica_me()==0 && screen)
+    message=true;
+
+  if (message) {
+    fprintf(screen,"Initializing Device and compiling on process 0...");
+    fflush(screen);
+  }
+
+  int init_ok=0;
+  if (world_me==0)
+    init_ok=SPHTaitwaterMF.init(ntypes, cutsq, host_cut, host_viscosity, host_mass,
+                                host_rho0, host_soundspeed, host_B, dimension,
+                                special_lj, inum, nall, max_nbors,  maxspecial,
+                                cell_size, gpu_split, screen);
+
+  SPHTaitwaterMF.device->world_barrier();
+  if (message)
+    fprintf(screen,"Done.\n");
+
+  for (int i=0; i<procs_per_gpu; i++) {
+    if (message) {
+      if (last_gpu-first_gpu==0)
+        fprintf(screen,"Initializing Device %d on core %d...",first_gpu,i);
+      else
+        fprintf(screen,"Initializing Devices %d-%d on core %d...",first_gpu,
+                last_gpu,i);
+      fflush(screen);
+    }
+    if (gpu_rank==i && world_me!=0)
+      init_ok=SPHTaitwaterMF.init(ntypes, cutsq, host_cut, host_viscosity, host_mass,
+                                  host_rho0, host_soundspeed, host_B, dimension,
+                                  special_lj, inum, nall, max_nbors, maxspecial,
+                                  cell_size, gpu_split, screen);
+
+    SPHTaitwaterMF.device->serialize_init();
+    if (message)
+      fprintf(screen,"Done.\n");
+  }
+  if (message)
+    fprintf(screen,"\n");
+
+  if (init_ok==0)
+    SPHTaitwaterMF.estimate_gpu_overhead();
+  return init_ok;
+}
+
+void sph_taitwater_gpu_clear() {
+  SPHTaitwaterMF.clear();
+}
+
+int ** sph_taitwater_gpu_compute_n(const int ago, const int inum_full, const int nall,
+                         double **host_x, int *host_type, double *sublo,
+                         double *subhi, tagint *host_tag, int **nspecial,
+                         tagint **special, const bool eflag, const bool vflag,
+                         const bool eatom, const bool vatom, int &host_start,
+                         int **ilist, int **jnum, const double cpu_time, bool &success,
+                         double **host_v) {
+  return SPHTaitwaterMF.compute(ago, inum_full, nall, host_x, host_type, sublo,
+                         subhi, host_tag, nspecial, special, eflag, vflag, eatom,
+                         vatom, host_start, ilist, jnum, cpu_time, success,
+                         host_v);
+}
+
+void sph_taitwater_gpu_compute(const int ago, const int inum_full, const int nall,
+                        double **host_x, int *host_type, int *ilist, int *numj,
+                        int **firstneigh, const bool eflag, const bool vflag,
+                        const bool eatom, const bool vatom, int &host_start,
+                        const double cpu_time, bool &success, tagint *host_tag,
+                        double **host_v) {
+  SPHTaitwaterMF.compute(ago, inum_full, nall, host_x, host_type, ilist, numj,
+                  firstneigh, eflag, vflag, eatom, vatom, host_start, cpu_time, success,
+                  host_tag, host_v);
+}
+
+void sph_taitwater_gpu_get_extra_data(double *host_rho) {
+  SPHTaitwaterMF.get_extra_data(host_rho);
+}
+
+void sph_taitwater_gpu_update_drhoE(void **drhoE_ptr) {
+  SPHTaitwaterMF.update_drhoE(drhoE_ptr);
+}
+
+double sph_taitwater_gpu_bytes() {
+  return SPHTaitwaterMF.host_memory_usage();
+}
diff --git a/lib/kokkos/core/src/Kokkos_Printf.hpp b/lib/kokkos/core/src/Kokkos_Printf.hpp
index 39f95825c3..af20221a5a 100644
--- a/lib/kokkos/core/src/Kokkos_Printf.hpp
+++ b/lib/kokkos/core/src/Kokkos_Printf.hpp
@@ -31,7 +31,7 @@ namespace Kokkos {
 // backends. The GPU backends always return 1 and NVHPC only compiles if we
 // don't ask for the return value.
 template <typename... Args>
-KOKKOS_FUNCTION void printf(const char* format, Args... args) {
+KOKKOS_FORCEINLINE_FUNCTION void printf(const char* format, Args... args) {
 #ifdef KOKKOS_ENABLE_SYCL
   // Some compilers warn if "args" is empty and format is not a string literal
   if constexpr (sizeof...(Args) == 0)
diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.hpp
index 03f5fff395..4586406e16 100644
--- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.hpp
+++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Instance.hpp
@@ -219,8 +219,6 @@ KOKKOS_DEPRECATED void OpenMP::partition_master(F const& f, int num_partitions,
     Exec::validate_partition_impl(prev_instance->m_pool_size, num_partitions,
                                   partition_size);
 
-    OpenMP::memory_space space;
-
 #pragma omp parallel num_threads(num_partitions)
     {
       Exec thread_local_instance(partition_size);
diff --git a/lib/mdi/Install.py b/lib/mdi/Install.py
index 316313ded8..c455f2d064 100644
--- a/lib/mdi/Install.py
+++ b/lib/mdi/Install.py
@@ -32,7 +32,7 @@ make lib-mdi args="-m mpi" # build MDI lib with same settings as in the mpi Make
 
 # settings
 
-version = "1.4.16"
+version = "1.4.26"
 url = "https://github.com/MolSSI-MDI/MDI_Library/archive/v%s.tar.gz" % version
 
 # known checksums for different MDI versions. used to validate the download.
@@ -41,6 +41,7 @@ checksums = { \
               '1.4.12' : '7a222353ae8e03961d5365e6cd48baee', \
               '1.4.14' : '7a059bb12535360fdcb7de8402f9a0fc', \
               '1.4.16' : '407db44e2d79447ab5c1233af1965f65', \
+              '1.4.26' : '3124bb85259471e2a53a891f04bf697a', \
               }
 
 # print error message or help
diff --git a/lib/pace/Install.py b/lib/pace/Install.py
index 8d31852e44..fcd9497937 100644
--- a/lib/pace/Install.py
+++ b/lib/pace/Install.py
@@ -18,11 +18,11 @@ from install_helpers import fullpath, geturl, checkmd5sum, getfallback
 # settings
 
 thisdir = fullpath('.')
-version ='v.2023.10.04'
+version ='v.2023.11.25.fix'
 
 # known checksums for different PACE versions. used to validate the download.
 checksums = { \
-    'v.2023.10.04': '70ff79f4e59af175e55d24f3243ad1ff'
+    'v.2023.11.25.fix': 'b45de9a633f42ed65422567e3ce56f9f'
 }
 
 parser = ArgumentParser(prog='Install.py', description="LAMMPS library build wrapper script")
diff --git a/lib/pace/Makefile b/lib/pace/Makefile
index 5a1588ef93..a7ac753d28 100644
--- a/lib/pace/Makefile
+++ b/lib/pace/Makefile
@@ -21,7 +21,7 @@ OBJ =   $(SRC:.cpp=.o)
 
 
 # ------ SETTINGS ------
-CXXFLAGS = -O3 -fPIC -Isrc/ML-PACE -I$(YAML_CPP_INC) -I$(WIGNER_CPP_INC) -I$(CNPY_CPP_INC) -DEXTRA_C_PROJECTIONS
+CXXFLAGS = -O3 -fPIC -Isrc/ML-PACE -I$(YAML_CPP_INC) -I$(WIGNER_CPP_INC) -I$(CNPY_CPP_INC) -DEXTRA_C_PROJECTIONS -DCOMPUTE_B_GRAD
 
 ARCHIVE =	ar
 ARCHFLAG =	-rc
diff --git a/lib/pace/Makefile.lammps b/lib/pace/Makefile.lammps
index 6411e49a07..e221918056 100644
--- a/lib/pace/Makefile.lammps
+++ b/lib/pace/Makefile.lammps
@@ -1,3 +1,3 @@
-pace_SYSINC =-I../../lib/pace/src/ML-PACE -I../../lib/pace/src/yaml-cpp/include -I../../lib/pace/src/wigner-cpp/include -DEXTRA_C_PROJECTIONS
+pace_SYSINC =-I../../lib/pace/src/ML-PACE -I../../lib/pace/src/yaml-cpp/include -I../../lib/pace/src/wigner-cpp/include -DEXTRA_C_PROJECTIONS -DCOMPUTE_B_GRAD
 pace_SYSLIB = -L../../lib/pace/ -lpace -L../../lib/pace/src/yaml-cpp/ -lyaml-cpp
 pace_SYSPATH =
diff --git a/potentials/CBNOH.aip.water.2dm b/potentials/CBNOH.aip.water.2dm
new file mode 100755
index 0000000000..83205c354f
--- /dev/null
+++ b/potentials/CBNOH.aip.water.2dm
@@ -0,0 +1,58 @@
+# DATE: 2023-12-20 UNITS: metal CONTRIBUTOR: Wengen Ouyang w.g.ouyang@gmail.com
+# CITATION: Z. Feng, ..., and W. Ouyang, J. Phys. Chem. C 127(18), 8704 (2023).
+# CITATION: Z. Feng, ..., and W. Ouyang, Langmuir 39(50), 18198-18207 (2023).
+# Anisotropic Potential (AIP) for water/graphene and water/hBN heterojunctions
+# The parameters below are fitted against the PBE + MBD-NL (graphene/water) and SCAN (hBN/water) DFT reference data.
+
+# The parameters for bilayer graphene/graphene, graphene/hBN and hBN/hBN junctions are taken from
+# CITATION: Ouyang, Mandelli, Urbakh, Hod, Nano Letters 18, 6009-6016 (2018).
+#
+# -------------------Repulsion Potential ------------------++++++++++++++ Vdw Potential ++++++++++++++++************
+#    beta(A)    alpha    delta(A)  epsilon(meV)  C(meV)        d         sR      reff(A)   C6(meV*A^6)    S    rcut
+#
+# For water-graphene
+C Ow  5.453696  6.181724  1.250255   3.349092     0.687806  9.057065   1.232495   2.775772  100226.555031  1.0  2.0
+C Hw  2.553809  9.686644  1.964892  41.776171   -16.300128  9.015685   0.744155   2.415456    7409.128564  1.0  2.0
+Ow C  5.453696  6.181724  1.250255   3.349092     0.687806  9.057065   1.232495   2.775772  100226.555031  1.0  1.2
+Hw C  2.553809  9.686644  1.964892  41.776171   -16.300128  9.015685   0.744155   2.415456    7409.128564  1.0  1.2
+
+# For water-hBN
+N Ow  3.530598  16.377816  1.285374  1.717537    1.339337  24.797794   0.771411   3.928357   33589.850651  1.0  2.0
+N Hw  4.029390   5.360546  0.950352 15.945549   -1.486701  10.797276   1.352684   2.293775   41247.181447  1.0  2.0
+B Ow  3.907514   7.842519  2.380078 32.122737    1.190485  17.482482   0.788174   2.368217  139539.370785  1.0  2.0
+B Hw  3.804966   2.356248  1.114761  9.193309   -5.922514   9.000572   1.334703   1.746122   43796.489158  1.0  2.0
+Ow N  3.530598  16.377816  1.285374  1.717537    1.339337  24.797794   0.771411   3.928357   33589.850651  1.0  1.2
+Hw N  4.029390   5.360546  0.950352 15.945549   -1.486701  10.797276   1.352684   2.293775   41247.181447  1.0  1.2
+Ow B  3.907514   7.842519  2.380078 32.122737    1.190485  17.482482   0.788174   2.368217  139539.370785  1.0  1.2
+Hw B  3.804966   2.356248  1.114761  9.193309   -5.922514   9.000572   1.334703   1.746122   43796.489158  1.0  1.2
+
+# For graphene and hydrocarbons
+C C   3.205843  7.511126  1.235334  1.528338E-5  37.530428  15.499947  0.7954443  3.681440  25.714535E3   1.0   2.0
+H H   3.974540   6.53799  1.080633  0.6700556    0.8333833  15.022371  0.7490632  2.767223  1.6159581E3   1.0   1.2
+C H   2.642950  12.91410  1.020257  0.9750012    25.340996  15.222927  0.8115998  3.887324  5.6874617E3   1.0   1.5
+H C   2.642950  12.91410  1.020257  0.9750012    25.340996  15.222927  0.8115998  3.887324  5.6874617E3   1.0   1.5
+
+# For hBN
+B B   3.143737  9.825139  1.936405   2.7848400   14.495957   15.199263  0.7834022  3.682950  49.498013E3   1.0  2.0
+N N   3.443196  7.084490  1.747349   2.9139991   46.508553   15.020370  0.8008370  3.551843  14.810151E3   1.0  2.0
+B N   3.295257  7.224311  2.872667   1.3715032    0.4347152  14.594578  0.8044028  3.765728  24.669996E3   1.0  2.0
+B H   2.718657  9.214551  3.273063  14.015714    14.760509   15.084752  0.7768383  3.640866  7.9642467E3   1.0  1.5
+N B   3.295257  7.224311  2.872667   1.3715032    0.4347152  14.594578  0.8044028  3.765728  24.669996E3   1.0  2.0
+H B   2.718657  9.214551  3.273063  14.015714    14.760509   15.084752  0.7768383  3.640866  7.9642467E3   1.0  1.5
+
+# For graphene-hBN
+C B  3.303662  10.54415   2.926741  16.719972     0.3571734  15.305254  0.7001581  3.097327  30.162869E3   1.0  2.0
+C N  3.253564   8.825921  1.059550  18.344740    21.913573   15.000000  0.7234983  3.013117  19.063095E3   1.0  2.0
+B C  3.303662  10.54415   2.926741  16.719972     0.3571734  15.305254  0.7001581  3.097327  30.162869E3   1.0  2.0
+N C  3.253564   8.825921  1.059550  18.344740    21.913573   15.000000  0.7234983  3.013117  19.063095E3   1.0  2.0
+
+# The AIPs for other elements are turned off
+H Ow  5.453696   6.181724  1.250255  0.000000   0.000000    9.057065   1.232495   2.775772      0.000000   1.0  1.2
+H Hw  5.453696   6.181724  1.250255  0.000000   0.000000    9.057065   1.232495   2.775772      0.000000   1.0  1.2
+Ow H  5.453696   6.181724  1.250255  0.000000   0.000000    9.057065   1.232495   2.775772      0.000000   1.0  1.2
+Hw H  5.453696   6.181724  1.250255  0.000000   0.000000    9.057065   1.232495   2.775772      0.000000   1.0  1.2
+
+Ow Ow 5.453696   6.181724  1.250255  0.000000   0.000000    9.057065   1.232495   2.775772      0.000000   1.0  1.2
+Hw Hw 5.453696   6.181724  1.250255  0.000000   0.000000    9.057065   1.232495   2.775772      0.000000   1.0  1.2
+Ow Hw 5.453696   6.181724  1.250255  0.000000   0.000000    9.057065   1.232495   2.775772      0.000000   1.0  1.2
+Hw Ow 5.453696   6.181724  1.250255  0.000000   0.000000    9.057065   1.232495   2.775772      0.000000   1.0  1.2
diff --git a/potentials/COH.aip.water.2dm b/potentials/COH.aip.water.2dm
deleted file mode 100644
index 5325399abe..0000000000
--- a/potentials/COH.aip.water.2dm
+++ /dev/null
@@ -1,28 +0,0 @@
-# DATE: 2022-12-02 UNITS: metal CONTRIBUTOR: Wengen Ouyang w.g.ouyang@gmail.com CITATION: Z. Feng, ..., and W. Ouyang, J. Phys. Chem. C 127, 8704 (2023).
-# Anisotropic Interfacial Potential (AIP) parameters for water/graphene heterojunctions
-# The parameters below are fitted against the PBE + MBD-NL DFT reference data from 2.5 A to 15 A.
-#
-# ----------------- Repulsion Potential ------------------++++++++++++++ Vdw Potential ++++++++++++++++************
-#    beta(A)    alpha    delta(A)  epsilon(meV)  C(meV)        d         sR      reff(A)   C6(meV*A^6)    S    rcut
-# For graphene and hydrocarbons
-C C  3.205843  7.511126  1.235334  1.528338E-5  37.530428  15.499947  0.7954443  3.681440  25.714535E3   1.0   2.0
-H H  3.974540   6.53799  1.080633  0.6700556    0.8333833  15.022371  0.7490632  2.767223  1.6159581E3   1.0   1.2
-C H  2.642950  12.91410  1.020257  0.9750012    25.340996  15.222927  0.8115998  3.887324  5.6874617E3   1.0   1.5
-H C  2.642950  12.91410  1.020257  0.9750012    25.340996  15.222927  0.8115998  3.887324  5.6874617E3   1.0   1.5
-
-# For water-graphene
-C Ow  5.45369612 6.18172364 1.25025450  3.34909245   0.68780636 9.05706482 1.23249498 2.77577173 100226.55503127   1.0   2.0
-C Hw  2.55380862 9.68664390 1.96489198 41.77617053 -16.30012807 9.01568534 0.74415463 2.41545571   7409.12856378   1.0   2.0
-Ow C  5.45369612 6.18172364 1.25025450  3.34909245   0.68780636 9.05706482 1.23249498 2.77577173 100226.55503127   1.0   1.2
-Hw C  2.55380862 9.68664390 1.96489198 41.77617053 -16.30012807 9.01568534 0.74415463 2.41545571   7409.12856378   1.0   1.2
-
-# # The ILPs for other systems are set to zero
-H Ow  5.45369612 6.18172364 1.25025450 0.00000000 0.00000000 9.05706482 1.23249498 2.77577173 0.00000000    1.0     1.2
-H Hw  5.45369612 6.18172364 1.25025450 0.00000000 0.00000000 9.05706482 1.23249498 2.77577173 0.00000000    1.0     1.2
-Ow H  5.45369612 6.18172364 1.25025450 0.00000000 0.00000000 9.05706482 1.23249498 2.77577173 0.00000000    1.0     1.2
-Hw H  5.45369612 6.18172364 1.25025450 0.00000000 0.00000000 9.05706482 1.23249498 2.77577173 0.00000000    1.0     1.2
-
-Ow Ow 5.45369612 6.18172364 1.25025450 0.00000000 0.00000000 9.05706482 1.23249498 2.77577173 0.00000000    1.0     1.2
-Hw Hw 5.45369612 6.18172364 1.25025450 0.00000000 0.00000000 9.05706482 1.23249498 2.77577173 0.00000000    1.0     1.2
-Ow Hw 5.45369612 6.18172364 1.25025450 0.00000000 0.00000000 9.05706482 1.23249498 2.77577173 0.00000000    1.0     1.2
-Hw Ow 5.45369612 6.18172364 1.25025450 0.00000000 0.00000000 9.05706482 1.23249498 2.77577173 0.00000000    1.0     1.2
diff --git a/potentials/PSiO.nb3b.screened b/potentials/PSiO.nb3b.screened
new file mode 100644
index 0000000000..445da13e7a
--- /dev/null
+++ b/potentials/PSiO.nb3b.screened
@@ -0,0 +1,41 @@
+# DATE: 2023-10-30 UNITS: metal CONTRIBUTOR: Federica Lodesani <federica.lodesani@unimore.it>  CITATION: Bertani, Menziani, Pedone (2021). Physical Review Materials, 5(4), 045602
+#
+# Multiple entries can be added to this file, LAMMPS only reads the ones it needs
+#
+# These entries are in LAMMPS "metal" units:
+#
+# Format of a single entry (one or more lines):
+#   element 1 (central atom), element 2, element 3,
+#   k_theta, theta_0, rho, cutoff
+#
+# (ijj)-->determines cutoff for i-j distance
+# (jik)-->determines k, theta_0 for i-j-k angle term
+#
+# i  j  k   K     theta0   rho  cutoff
+O   P   P    32.5  109.47   1.0   3.3
+O   P   Si   60.0  109.47   1.0   0.000
+O   P   O   0.000  0.000    1.0   0.000
+O   Si  P    60.0  109.47   1.0   0.000
+O   Si  Si   12.5  109.47   1.0   3.3
+O   Si  O   0.000  0.000    1.0   0.000
+O   O   P   0.000  0.000    1.0   0.000
+O   O   Si  0.000  0.000    1.0   0.000
+O   O   O   0.000  0.000    1.0   0.000
+P   P   P   0.000  0.000    1.0   0.000
+P   P   Si  0.000  0.000    1.0   0.000
+P   P   O   0.000  0.000    1.0   0.000
+P   Si  P   0.000  0.000    1.0   0.000
+P   Si  Si  0.000  0.000    1.0   0.000
+P   Si  O   0.000  0.000    1.0   0.000
+P   O   P   0.000  0.000    1.0   0.000
+P   O   Si  0.000  0.000    1.0   0.000
+P   O   O   0.000  0.000    1.0   0.000
+Si  P   P   0.000  0.000    1.0   0.000
+Si  P   Si  0.000  0.000    1.0   0.000
+Si  P   O   0.000  0.000    1.0   0.000
+Si  Si  P   0.000  0.000    1.0   0.000
+Si  Si  Si  0.000  0.000    1.0   0.000
+Si  Si  O   0.000  0.000    1.0   0.000
+Si  O   P   0.000  0.000    1.0   0.000
+Si  O   Si  0.000  0.000    1.0   0.000
+Si  O   O   0.000  0.000    1.0   0.000
diff --git a/potentials/TMD.ILP b/potentials/TMD.ILP
new file mode 100644
index 0000000000..18563199c9
--- /dev/null
+++ b/potentials/TMD.ILP
@@ -0,0 +1,25 @@
+# DATE: 2021-12-02 UNITS: metal CONTRIBUTOR: Wengen Ouyang w.g.ouyang@gmail.com
+# CITATION: W. Ouyang, et al., J. Chem. Theory Comput. 17, 7237 (2021).
+# CITATION: W.  Jiang, et al., J. Phys. Chem. A, 127, 46, 9820–9830 (2023).
+# Interlayer Potential (ILP) for bilayer and bulk Group-VI Transition Metal Dichalcogenides.
+# The parameters below are fitted against the HSE + MBD-NL DFT reference data.
+#
+# -------------------- Repulsion Potential -------------------++++++++++++++++ Vdw Potential ++++++++++++++++*********
+#       beta(A)     alpha    delta(A)  epsilon(meV)  C(meV)         d         sR     reff(A)    C6(meV*A^6)    S   rcut
+Mo Mo  5.579450   9.377662   2.027222  144.151775   97.978570   89.437597  2.059031  5.122055  491850.316195  1.0  4.0
+W   W  5.530854   6.624992   1.983208    0.271792  140.174059  107.392585  1.356333  4.437591  691850.243962  1.0  4.0
+S   S  3.161402   8.093263   1.953140    4.586764  118.065466   58.809416  0.215367  4.299600  148811.243409  1.0  4.0
+Se Se  3.938627  10.515924   2.415783    3.012583   22.400612  116.864517  0.151121  5.884241  112506.195626  1.0  4.0
+Mo  W  5.412298   8.647128   2.108665   51.177950  184.342860  201.281256  2.547743  2.492287   99996.913401  1.0  4.0
+Mo  S  3.627152  19.971375   7.585031   76.101931    3.317496   45.720328  0.947470  4.410425  150597.857716  1.0  4.0
+Mo Se  6.196447   4.844134  14.362005    7.407221    0.058823   27.156223  0.976771  3.979186  786029.840651  1.0  4.0
+W   S  3.680136  11.163004  32.254117  110.019679   79.381335  138.340438  0.900750  8.875776  250600.809034  1.0  4.0
+W  Se  3.559392  20.638856   1.202717   20.478669  197.422484   10.005271  1.052738  3.815817  288321.561114  1.0  4.0
+S  Se  2.820092   7.491151   1.933323  141.532559  293.127817   90.470904  0.390492  4.170885  117688.987069  1.0  4.0
+# Symmetric Atom Pair
+W  Mo  5.412298   8.647128   2.108665   51.177950  184.342860  201.281256  2.547743  2.492287   99996.913401  1.0  4.0
+S  Mo  3.627152  19.971375   7.585031   76.101931    3.317496   45.720328  0.947470  4.410425  150597.857716  1.0  4.0
+Se Mo  6.196447   4.844134  14.362005    7.407221    0.058823   27.156223  0.976771  3.979186  786029.840651  1.0  4.0
+S   W  3.680136  11.163004  32.254117  110.019679   79.381335  138.340438  0.900750  8.875776  250600.809034  1.0  4.0
+Se  W  3.559392  20.638856   1.202717   20.478669  197.422484   10.005271  1.052738  3.815817  288321.561114  1.0  4.0
+Se  S  2.820092   7.491151   1.933323  141.532559  293.127817   90.470904  0.390492  4.170885  117688.987069  1.0  4.0
diff --git a/python/lammps/core.py b/python/lammps/core.py
index 1ff123760b..f4ba7d398b 100644
--- a/python/lammps/core.py
+++ b/python/lammps/core.py
@@ -282,6 +282,8 @@ class lammps(object):
     self.lib.lammps_config_accelerator.argtypes = [c_char_p, c_char_p, c_char_p]
 
     self.lib.lammps_set_variable.argtypes = [c_void_p, c_char_p, c_char_p]
+    self.lib.lammps_set_string_variable.argtypes = [c_void_p, c_char_p, c_char_p]
+    self.lib.lammps_set_internal_variable.argtypes = [c_void_p, c_char_p, c_double]
 
     self.lib.lammps_has_style.argtypes = [c_void_p, c_char_p, c_char_p]
 
@@ -1252,6 +1254,8 @@ class lammps(object):
   def set_variable(self,name,value):
     """Set a new value for a LAMMPS string style variable
 
+    .. deprecated:: TBD
+
     This is a wrapper around the :cpp:func:`lammps_set_variable`
     function of the C-library interface.
 
@@ -1271,6 +1275,52 @@ class lammps(object):
 
   # -------------------------------------------------------------------------
 
+  def set_string_variable(self,name,value):
+    """Set a new value for a LAMMPS string style variable
+
+    .. versionadded:: TBD
+
+    This is a wrapper around the :cpp:func:`lammps_set_string_variable`
+    function of the C-library interface.
+
+    :param name: name of the variable
+    :type name: string
+    :param value: new variable value
+    :type value: any. will be converted to a string
+    :return: either 0 on success or -1 on failure
+    :rtype: int
+    """
+    if name: name = name.encode()
+    else: return -1
+    if value: value = str(value).encode()
+    else: return -1
+    with ExceptionCheck(self):
+      return self.lib.lammps_set_string_variable(self.lmp,name,value)
+
+  # -------------------------------------------------------------------------
+
+  def set_internal_variable(self,name,value):
+    """Set a new value for a LAMMPS internal style variable
+
+    .. versionadded:: TBD
+
+    This is a wrapper around the :cpp:func:`lammps_set_internal_variable`
+    function of the C-library interface.
+
+    :param name: name of the variable
+    :type name: string
+    :param value: new variable value
+    :type value: float or compatible. will be converted to float
+    :return: either 0 on success or -1 on failure
+    :rtype: int
+    """
+    if name: name = name.encode()
+    else: return -1
+    with ExceptionCheck(self):
+      return self.lib.lammps_set_internal_variable(self.lmp,name,value)
+
+  # -------------------------------------------------------------------------
+
   # return vector of atom properties gathered across procs
   # 3 variants to match src/library.cpp
   # name = atom property recognized by LAMMPS in atom->extract()
diff --git a/src/.gitignore b/src/.gitignore
index 3ee771e139..1e4c5b9ddb 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -100,6 +100,8 @@
 /lepton_utils.cpp
 /lepton_utils.h
 
+/compute_pace.cpp
+/compute_pace.h
 /pair_pace.cpp
 /pair_pace.h
 /pair_pace_extrapolation.cpp
@@ -346,6 +348,8 @@
 /compute_nbond_atom.h
 /fix_nve_bpm_sphere.cpp
 /fix_nve_bpm_sphere.h
+/fix_update_special_bonds.cpp
+/fix_update_special_bonds.h
 /pair_bpm_spring.cpp
 /pair_bpm_spring.h
 
@@ -629,8 +633,14 @@
 /compute_pressure_grem.h
 /compute_ptm_atom.cpp
 /compute_ptm_atom.h
+/compute_rattlers_atom.cpp
+/compute_rattlers_atom.h
+/compute_reaxff_atom.cpp
+/compute_reaxff_atom.h
 /compute_rigid_local.cpp
 /compute_rigid_local.h
+/compute_slcsa_atom.cpp
+/compute_slcsa_atom.h
 /compute_smd_triangle_vertices.cpp
 /compute_smd_triangle_vertices.h
 /compute_spec_atom.cpp
@@ -912,6 +922,8 @@
 /fix_nvt_sllod_eff.h
 /fix_nve_tri.cpp
 /fix_nve_tri.h
+/fix_nonaffine_displacement.cpp
+/fix_nonaffine_displacement.h
 /fix_oneway.cpp
 /fix_oneway.h
 /fix_orient_bcc.cpp
diff --git a/src/ADIOS/reader_adios.cpp b/src/ADIOS/reader_adios.cpp
index ec87bc26cc..0fe82dd49f 100644
--- a/src/ADIOS/reader_adios.cpp
+++ b/src/ADIOS/reader_adios.cpp
@@ -30,7 +30,7 @@
 
 using namespace LAMMPS_NS;
 
-#define SMALL 1.0e-6
+static constexpr double SMALL = 1.0e-6;
 
 namespace LAMMPS_NS {
 class ReadADIOSInternal {
diff --git a/src/AMOEBA/amoeba_convolution.cpp b/src/AMOEBA/amoeba_convolution.cpp
index ae3dbf16c4..262ce3a9c3 100644
--- a/src/AMOEBA/amoeba_convolution.cpp
+++ b/src/AMOEBA/amoeba_convolution.cpp
@@ -47,13 +47,8 @@ enum{MPOLE_GRID,POLAR_GRID,POLAR_GRIDC,DISP_GRID,INDUCE_GRID,INDUCE_GRIDC};
 //#define SCALE 1
 #define SCALE 0
 
-#ifdef FFT_SINGLE
-#define ZEROF 0.0f
-#define ONEF  1.0f
-#else
-#define ZEROF 0.0
-#define ONEF  1.0
-#endif
+static constexpr FFT_SCALAR ZEROF = 0.0;
+static constexpr FFT_SCALAR ONEF =  1.0;
 
 /* ----------------------------------------------------------------------
    partition an FFT grid across processors
diff --git a/src/AMOEBA/amoeba_file.cpp b/src/AMOEBA/amoeba_file.cpp
index 6bf961cdab..a48af86ffe 100644
--- a/src/AMOEBA/amoeba_file.cpp
+++ b/src/AMOEBA/amoeba_file.cpp
@@ -25,23 +25,23 @@
 
 using namespace LAMMPS_NS;
 
-enum{UNKNOWN,FFIELD,LITERATURE,ATOMTYPE,VDWL,VDWLPAIR,BSTRETCH,SBEND,ABEND,
-     PAULI,DISPERSION,UB,OUTPLANE,TORSION,PITORSION,ATOMMULT,
-     QPENETRATION,DIPPOLAR,QTRANSFER,END_OF_FILE};
-enum{ALLINGER,BUFFERED_14_7};
-enum{ARITHMETIC,GEOMETRIC,CUBIC_MEAN,R_MIN,SIGMA,DIAMETER,HARMONIC,HHG,W_H};
-enum{MUTUAL,OPT,TCG,DIRECT};
-enum{NOFRAME,ZONLY,ZTHENX,BISECTOR,ZBISECT,THREEFOLD};
-enum{GEAR,ASPC,LSQR};
+enum { UNKNOWN, FFIELD, LITERATURE, ATOMTYPE, VDWL, VDWLPAIR, BSTRETCH, SBEND, ABEND,
+  PAULI, DISPERSION, UB, OUTPLANE, TORSION, PITORSION, ATOMMULT, QPENETRATION, DIPPOLAR,
+  QTRANSFER, END_OF_FILE };
+enum { ALLINGER, BUFFERED_14_7 };
+enum { ARITHMETIC, GEOMETRIC, CUBIC_MEAN, R_MIN, SIGMA, DIAMETER, HARMONIC, HHG, W_H };
+enum { MUTUAL, OPT, TCG, DIRECT };
+enum { NOFRAME, ZONLY, ZTHENX, BISECTOR, ZBISECT, THREEFOLD };
+enum { GEAR, ASPC, LSQR };
 
-#define MAXLINE 65536              // crazy big for TORSION-TORSION section
-#define MAX_TYPE_PER_GROUP 6       // max types per AMOEBA group
-#define MAX_FRAME_PER_TYPE 32      // max multipole frames for any AMOEBA type
+static constexpr int MAXLINE = 65536;             // crazy big for TORSION-TORSION section
+static constexpr int MAX_TYPE_PER_GROUP = 6;     // max types per AMOEBA group
+static constexpr int MAX_FRAME_PER_TYPE = 32;    // max multipole frames for any AMOEBA type
 
-#define DELTA_TYPE_CLASS 32
-#define DELTA_VDWL_PAIR 16
+static constexpr int DELTA_TYPE_CLASS = 32;
+static constexpr int DELTA_VDWL_PAIR = 16;
 
-#define BOHR 0.52917721067         // Bohr in Angstroms
+static constexpr double BOHR = 0.52917721067;    // Bohr in Angstroms
 
 // methods to read, parse, and store info from force field file
 
@@ -79,7 +79,7 @@ void PairAmoeba::read_prmfile(char *filename)
 
   int me = comm->me;
   FILE *fptr;
-  char line[MAXLINE];
+  char line[MAXLINE] = {'\0'};
 
   if (me == 0) {
     fptr = utils::open_potential(filename, lmp, nullptr);
@@ -179,8 +179,7 @@ void PairAmoeba::read_prmfile(char *filename)
       for (int i = 1; i <= n_amtype; i++) nmultiframe[i] = 0;
     }
 
-    char next[MAXLINE];
-    next[0] = '\0';
+    char next[MAXLINE] = {'\0'};
     bool has_next = false;
     int n;
     while (true) {
@@ -381,7 +380,7 @@ void PairAmoeba::read_keyfile(char *filename)
 
   int me = comm->me;
   FILE *fptr;
-  char line[MAXLINE];
+  char line[MAXLINE] = {'\0'};
   if (me == 0) {
     fptr = utils::open_potential(filename, lmp, nullptr);
     if (fptr == nullptr)
diff --git a/src/AMOEBA/amoeba_induce.cpp b/src/AMOEBA/amoeba_induce.cpp
index 6017b775ca..278a00316d 100644
--- a/src/AMOEBA/amoeba_induce.cpp
+++ b/src/AMOEBA/amoeba_induce.cpp
@@ -41,7 +41,7 @@ enum{GEAR,ASPC,LSQR};
 enum{BUILD,APPLY};
 enum{GORDON1,GORDON2};
 
-#define DEBYE 4.80321    // conversion factor from q-Angs (real units) to Debye
+static constexpr double DEBYE = 4.80321;    // conversion factor from q-Angs (real units) to Debye
 
 /* ----------------------------------------------------------------------
    induce = induced dipole moments via pre-conditioned CG solver
diff --git a/src/AMOEBA/amoeba_multipole.cpp b/src/AMOEBA/amoeba_multipole.cpp
index d0ae03401a..81e7763fe7 100644
--- a/src/AMOEBA/amoeba_multipole.cpp
+++ b/src/AMOEBA/amoeba_multipole.cpp
@@ -35,11 +35,11 @@ enum{FIELD,ZRSD,TORQUE,UFLD};                          // reverse comm
 enum{VDWL,REPULSE,QFER,DISP,MPOLE,POLAR,USOLV,DISP_LONG,MPOLE_LONG,POLAR_LONG};
 
 #ifdef FFT_SINGLE
-#define ZEROF 0.0f
-#define ONEF  1.0f
+static constexpr FFT_SCALAR ZEROF = 0.0f;
+static constexpr FFT_SCALAR ONEF =  1.0f;
 #else
-#define ZEROF 0.0
-#define ONEF  1.0
+static constexpr FFT_SCALAR ZEROF = 0.0;
+static constexpr FFT_SCALAR ONEF =  1.0;
 #endif
 
 /* ----------------------------------------------------------------------
diff --git a/src/AMOEBA/angle_amoeba.cpp b/src/AMOEBA/angle_amoeba.cpp
index e6ee7b579e..54fc3e9f9a 100644
--- a/src/AMOEBA/angle_amoeba.cpp
+++ b/src/AMOEBA/angle_amoeba.cpp
@@ -30,7 +30,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/AMOEBA/fix_amoeba_bitorsion.cpp b/src/AMOEBA/fix_amoeba_bitorsion.cpp
index cb8c62819d..f814f4109a 100644
--- a/src/AMOEBA/fix_amoeba_bitorsion.cpp
+++ b/src/AMOEBA/fix_amoeba_bitorsion.cpp
@@ -32,10 +32,10 @@ using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathConst;
 
-#define BITORSIONMAX 6   // max # of BiTorsion terms stored by one atom
-#define LISTDELTA 10000
-#define LB_FACTOR 1.5
-#define MAXLINE 1024
+static constexpr int BITORSIONMAX = 6;   // max # of BiTorsion terms stored by one atom
+static constexpr int LISTDELTA = 10000;
+static constexpr double LB_FACTOR = 1.5;
+static constexpr int MAXLINE = 1024;
 
 // spline weighting factors
 
@@ -724,7 +724,7 @@ double FixAmoebaBiTorsion::compute_scalar()
 
 void FixAmoebaBiTorsion::read_grid_data(char *bitorsion_file)
 {
-  char line[MAXLINE];
+  char line[MAXLINE] = {'\0'};
   char *eof;
 
   FILE *fp = nullptr;
diff --git a/src/AMOEBA/fix_amoeba_pitorsion.cpp b/src/AMOEBA/fix_amoeba_pitorsion.cpp
index 445845c075..33af4a3c31 100644
--- a/src/AMOEBA/fix_amoeba_pitorsion.cpp
+++ b/src/AMOEBA/fix_amoeba_pitorsion.cpp
@@ -32,9 +32,9 @@ using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathConst;
 
-#define PITORSIONMAX 6   // max # of PiTorsion terms stored by one atom
-#define LISTDELTA 8196
-#define LB_FACTOR 1.5
+static constexpr int PITORSIONMAX = 6;   // max # of PiTorsion terms stored by one atom
+static constexpr int LISTDELTA = 8196;
+static constexpr double LB_FACTOR = 1.5;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/AMOEBA/improper_amoeba.cpp b/src/AMOEBA/improper_amoeba.cpp
index 32c31b0af9..46c2585d1f 100644
--- a/src/AMOEBA/improper_amoeba.cpp
+++ b/src/AMOEBA/improper_amoeba.cpp
@@ -28,8 +28,8 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/AMOEBA/pair_amoeba.cpp b/src/AMOEBA/pair_amoeba.cpp
index cad9e2b628..4cb5c39b61 100644
--- a/src/AMOEBA/pair_amoeba.cpp
+++ b/src/AMOEBA/pair_amoeba.cpp
@@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    https://www.lammps.org/ Sandia National Laboratories
@@ -39,17 +38,19 @@ using namespace LAMMPS_NS;
 
 using MathSpecial::powint;
 
-enum{INDUCE,RSD,SETUP_AMOEBA,SETUP_HIPPO,KMPOLE,AMGROUP,PVAL};  // forward comm
-enum{FIELD,ZRSD,TORQUE,UFLD};                                   // reverse comm
-enum{ARITHMETIC,GEOMETRIC,CUBIC_MEAN,R_MIN,SIGMA,DIAMETER,HARMONIC,HHG,W_H};
-enum{HAL,REPULSE,QFER,DISP,MPOLE,POLAR,USOLV,DISP_LONG,MPOLE_LONG,POLAR_LONG};
-enum{MPOLE_GRID,POLAR_GRID,POLAR_GRIDC,DISP_GRID,INDUCE_GRID,INDUCE_GRIDC};
-enum{MUTUAL,OPT,TCG,DIRECT};
-enum{GEAR,ASPC,LSQR};
+enum { INDUCE, RSD, SETUP_AMOEBA, SETUP_HIPPO, KMPOLE, AMGROUP, PVAL };    // forward comm
+enum { FIELD, ZRSD, TORQUE, UFLD };                                        // reverse comm
+enum { ARITHMETIC, GEOMETRIC, CUBIC_MEAN, R_MIN, SIGMA, DIAMETER, HARMONIC, HHG, W_H };
+enum { HAL, REPULSE, QFER, DISP, MPOLE, POLAR, USOLV, DISP_LONG, MPOLE_LONG, POLAR_LONG };
+enum { MPOLE_GRID, POLAR_GRID, POLAR_GRIDC, DISP_GRID, INDUCE_GRID, INDUCE_GRIDC };
+enum { MUTUAL, OPT, TCG, DIRECT };
+enum { GEAR, ASPC, LSQR };
 
-#define DELTASTACK 16
+static constexpr int DELTASTACK = 16;
 #define DEBUG_AMOEBA 0
 
+// clang-format off
+
 /* ---------------------------------------------------------------------- */
 
 PairAmoeba::PairAmoeba(LAMMPS *lmp) : Pair(lmp)
@@ -429,7 +430,7 @@ void PairAmoeba::compute(int eflag, int vflag)
   // output FF settings to screen and logfile
   //   delay until here because RMS force accuracy is computed based on rpole
 
-  if (first_flag_compute && (comm->me == 0)) print_settings();
+  if (first_flag_compute) print_settings();
   first_flag_compute = 0;
 
   if (amoeba) pbc_xred();
@@ -827,28 +828,36 @@ void PairAmoeba::init_style()
 
   // check if all custom atom arrays were set via fix property/atom
 
-  int flag,cols;
+  // clang-format on
+  const char *names[6] = {"amtype", "amgroup", "redID", "xyzaxis", "polaxe", "pval"};
+  const int flag_check[6] = {0, 0, 1, 1, 0, 1};     // correct type (0 int, 1 dbl)
+  const int cols_check[6] = {0, 0, 0, 3, 0, 0};     // xyzaxis 3 cols, all others 0
+  const int ghost_check[6] = {1, 1, 1, 0, 0, 1};    // which types need ghost; TO-DO: check
+  int flag, cols, ghost, index[6];
 
-  index_amtype = atom->find_custom("amtype",flag,cols);
-  if (index_amtype < 0 || flag || cols)
-    error->all(FLERR,"Pair {} amtype is not defined", mystyle);
-  index_amgroup = atom->find_custom("amgroup",flag,cols);
-  if (index_amgroup < 0 || flag || cols)
-    error->all(FLERR,"Pair {} amgroup is not defined", mystyle);
+  // clang-format off
 
-  index_redID = atom->find_custom("redID",flag,cols);
-  if (index_redID < 0 || !flag || cols)
-    error->all(FLERR,"Pair {} redID is not defined", mystyle);
-  index_xyzaxis = atom->find_custom("xyzaxis",flag,cols);
-  if (index_xyzaxis < 0 || !flag || cols == 0)
-    error->all(FLERR,"Pair {} xyzaxis is not defined", mystyle);
+  for (int i = 0; i < 6; i++) {
+    if (ghost_check[i]) {
+      index[i] = atom->find_custom_ghost(names[i], flag, cols, ghost);
+    } else {
+      index[i] = atom->find_custom(names[i], flag, cols);
+    }
+    std::string err = "";
+    if (index[i] < 0) err = "was not defined";
+    else if (flag_check[i] != flag) err = "has the wrong type";
+    else if (cols_check[i] != cols) err = "has the wrong number of columns";
+    else if (ghost_check[i] && !ghost) err = "must be set by fix property/atom with ghost yes";
+    if (err != "")
+      error->all(FLERR,"Pair {} per-atom variable {} {}", mystyle, names[i], err);
+  }
 
-  index_polaxe = atom->find_custom("polaxe",flag,cols);
-  if (index_polaxe < 0 || flag || cols)
-    error->all(FLERR,"Pair {} polaxe is not defined", mystyle);
-  index_pval = atom->find_custom("pval",flag,cols);
-  if (index_pval < 0 || !flag || cols)
-    error->all(FLERR,"Pair {} pval is not defined", mystyle);
+  index_amtype  = index[0];
+  index_amgroup = index[1];
+  index_redID   = index[2];
+  index_xyzaxis = index[3];
+  index_polaxe  = index[4];
+  index_pval    = index[5];
 
   // -------------------------------------------------------------------
   // one-time initializations
@@ -1069,79 +1078,86 @@ void PairAmoeba::init_style()
 void PairAmoeba::print_settings()
 {
   std::string mesg = utils::uppercase(mystyle) + " force field settings\n";
-
-  if (amoeba) {
-    choose(HAL);
-    mesg += fmt::format("  hal: cut {} taper {} vscale {} {} {} {}\n", sqrt(off2),sqrt(cut2),
-                        special_hal[1],special_hal[2],special_hal[3],special_hal[4]);
-  } else {
-    choose(REPULSE);
-    mesg += fmt::format("  repulsion: cut {} taper {} rscale {} {} {} {}\n", sqrt(off2),sqrt(cut2),
-                        special_repel[1],special_repel[2],special_repel[3],special_repel[4]);
-
-    choose(QFER);
-    mesg += fmt::format("  qxfer: cut {} taper {} mscale {} {} {} {}\n", sqrt(off2),sqrt(cut2),
-                        special_mpole[1],special_mpole[2],special_mpole[3],special_mpole[4]);
-
-    if (use_dewald) {
-      choose(DISP_LONG);
-      mesg += fmt::format("  dispersion: cut {} aewald {} bsorder {} FFT {} {} {} "
-                          "dspscale {} {} {} {}\n", sqrt(off2),aewald,bsdorder,ndfft1,ndfft2,ndfft3,
-                          special_disp[1],special_disp[2],special_disp[3],special_disp[4]);
-    } else {
-      choose(DISP);
-      mesg += fmt::format("  dispersion: cut {} aewald {} dspscale {} {} {} {}\n",
-                          sqrt(off2),aewald,special_disp[1],
-                          special_disp[2],special_disp[3],special_disp[4]);
-    }
-  }
+  double estimated_mpole_accuracy = 0.0;
 
   if (use_ewald) {
     choose(MPOLE_LONG);
-    double estimated_accuracy = final_accuracy_mpole();
-    mesg += fmt::format("  multipole: cut {} aewald {} bsorder {} FFT {} {} {}; "
-                        "estimated absolute RMS force accuracy = {:.8g}; "
-                        "estimated relative RMS force accuracy = {:.8g}; "
-                        "mscale {} {} {} {}\n",
-                        sqrt(off2),aewald,bseorder,nefft1,nefft2,nefft3,
-                        estimated_accuracy,estimated_accuracy/two_charge_force,
-                        special_mpole[1],special_mpole[2],special_mpole[3],special_mpole[4]);
-  } else {
-    choose(MPOLE);
-    mesg += fmt::format("  multipole: cut {} aewald {} mscale {} {} {} {}\n", sqrt(off2),aewald,
-                        special_mpole[1],special_mpole[2],special_mpole[3],special_mpole[4]);
+    estimated_mpole_accuracy = final_accuracy_mpole();
   }
 
-  if (use_ewald) {
-    choose(POLAR_LONG);
-    mesg += fmt::format("  polar: cut {} aewald {} bsorder {} FFT {} {} {}\n",
-                        sqrt(off2),aewald,bsporder,nefft1,nefft2,nefft3);
-    mesg += fmt::format("         pscale {} {} {} {} piscale {} {} {} {} "
-                        "wscale {} {} {} {} d/u scale {} {}\n",
-                        special_polar_pscale[1],special_polar_pscale[2],
-                        special_polar_pscale[3],special_polar_pscale[4],
-                        special_polar_piscale[1],special_polar_piscale[2],
-                        special_polar_piscale[3],special_polar_piscale[4],
-                        special_polar_wscale[1],special_polar_wscale[2],
-                        special_polar_wscale[3],special_polar_wscale[4],
-                        polar_dscale,polar_uscale);
-  } else {
-    choose(POLAR);
-    mesg += fmt::format("  polar: cut {} aewald {}\n",sqrt(off2),aewald);
-    mesg += fmt::format("         pscale {} {} {} {} piscale {} {} {} {} "
-                        "wscale {} {} {} {} d/u scale {} {}\n",
-                        special_polar_pscale[1],special_polar_pscale[2],
-                        special_polar_pscale[3],special_polar_pscale[4],
-                        special_polar_piscale[1],special_polar_piscale[2],
-                        special_polar_piscale[3],special_polar_piscale[4],
-                        special_polar_wscale[1],special_polar_wscale[2],
-                        special_polar_wscale[3],special_polar_wscale[4],
-                        polar_dscale,polar_uscale);
-  }
+  if (comm->me == 0) {
+    if (amoeba) {
+      choose(HAL);
+      mesg += fmt::format("  hal: cut {} taper {} vscale {} {} {} {}\n", sqrt(off2),sqrt(cut2),
+                          special_hal[1],special_hal[2],special_hal[3],special_hal[4]);
+    } else {
+      choose(REPULSE);
+      mesg += fmt::format("  repulsion: cut {} taper {} rscale {} {} {} {}\n", sqrt(off2),sqrt(cut2),
+                          special_repel[1],special_repel[2],special_repel[3],special_repel[4]);
 
-  choose(USOLV);
-  mesg += fmt::format("  precondition: cut {}\n",sqrt(off2));
-  utils::logmesg(lmp, mesg);
+      choose(QFER);
+      mesg += fmt::format("  qxfer: cut {} taper {} mscale {} {} {} {}\n", sqrt(off2),sqrt(cut2),
+                          special_mpole[1],special_mpole[2],special_mpole[3],special_mpole[4]);
+
+      if (use_dewald) {
+        choose(DISP_LONG);
+        mesg += fmt::format("  dispersion: cut {} aewald {} bsorder {} FFT {} {} {} "
+                            "dspscale {} {} {} {}\n", sqrt(off2),aewald,bsdorder,ndfft1,ndfft2,ndfft3,
+                            special_disp[1],special_disp[2],special_disp[3],special_disp[4]);
+      } else {
+        choose(DISP);
+        mesg += fmt::format("  dispersion: cut {} aewald {} dspscale {} {} {} {}\n",
+                            sqrt(off2),aewald,special_disp[1],
+                            special_disp[2],special_disp[3],special_disp[4]);
+      }
+    }
+
+    if (use_ewald) {
+      choose(MPOLE_LONG);
+      mesg += fmt::format("  multipole: cut {} aewald {} bsorder {} FFT {} {} {}\n"
+                          "             estimated absolute RMS force accuracy = {:.8g}\n"
+                          "             estimated relative RMS force accuracy = {:.8g}\n"
+                          "             mscale {} {} {} {}\n",
+                          sqrt(off2),aewald,bseorder,nefft1,nefft2,nefft3,
+                          estimated_mpole_accuracy,estimated_mpole_accuracy/two_charge_force,
+                          special_mpole[1],special_mpole[2],special_mpole[3],special_mpole[4]);
+    } else {
+      choose(MPOLE);
+      mesg += fmt::format("  multipole: cut {} aewald {} mscale {} {} {} {}\n", sqrt(off2),aewald,
+                          special_mpole[1],special_mpole[2],special_mpole[3],special_mpole[4]);
+    }
+
+    if (use_ewald) {
+      choose(POLAR_LONG);
+      mesg += fmt::format("  polar: cut {} aewald {} bsorder {} FFT {} {} {}\n",
+                          sqrt(off2),aewald,bsporder,nefft1,nefft2,nefft3);
+      mesg += fmt::format("         pscale {} {} {} {} piscale {} {} {} {} "
+                          "wscale {} {} {} {} d/u scale {} {}\n",
+                          special_polar_pscale[1],special_polar_pscale[2],
+                          special_polar_pscale[3],special_polar_pscale[4],
+                          special_polar_piscale[1],special_polar_piscale[2],
+                          special_polar_piscale[3],special_polar_piscale[4],
+                          special_polar_wscale[1],special_polar_wscale[2],
+                          special_polar_wscale[3],special_polar_wscale[4],
+                          polar_dscale,polar_uscale);
+    } else {
+      choose(POLAR);
+      mesg += fmt::format("  polar: cut {} aewald {}\n",sqrt(off2),aewald);
+      mesg += fmt::format("         pscale {} {} {} {} piscale {} {} {} {} "
+                          "wscale {} {} {} {} d/u scale {} {}\n",
+                          special_polar_pscale[1],special_polar_pscale[2],
+                          special_polar_pscale[3],special_polar_pscale[4],
+                          special_polar_piscale[1],special_polar_piscale[2],
+                          special_polar_piscale[3],special_polar_piscale[4],
+                          special_polar_wscale[1],special_polar_wscale[2],
+                          special_polar_wscale[3],special_polar_wscale[4],
+                          polar_dscale,polar_uscale);
+    }
+
+    choose(USOLV);
+    mesg += fmt::format("  precondition: cut {}\n",sqrt(off2));
+    utils::logmesg(lmp, mesg);
+  }
 }
 
 /* ----------------------------------------------------------------------
diff --git a/src/ASPHERE/compute_temp_asphere.cpp b/src/ASPHERE/compute_temp_asphere.cpp
index 707938b2b9..7d5d6adce5 100644
--- a/src/ASPHERE/compute_temp_asphere.cpp
+++ b/src/ASPHERE/compute_temp_asphere.cpp
@@ -33,7 +33,7 @@ using namespace LAMMPS_NS;
 
 enum{ROTATE,ALL};
 
-#define INERTIA 0.2          // moment of inertia prefactor for ellipsoid
+static constexpr double INERTIA = 0.2;          // moment of inertia prefactor for ellipsoid
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/ASPHERE/fix_nve_asphere.cpp b/src/ASPHERE/fix_nve_asphere.cpp
index f4080493c8..a5655b875c 100644
--- a/src/ASPHERE/fix_nve_asphere.cpp
+++ b/src/ASPHERE/fix_nve_asphere.cpp
@@ -26,7 +26,7 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-#define INERTIA 0.2          // moment of inertia prefactor for ellipsoid
+static constexpr double INERTIA = 0.2;          // moment of inertia prefactor for ellipsoid
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/ASPHERE/fix_nve_line.cpp b/src/ASPHERE/fix_nve_line.cpp
index d7adf0a963..4d7a691bdd 100644
--- a/src/ASPHERE/fix_nve_line.cpp
+++ b/src/ASPHERE/fix_nve_line.cpp
@@ -23,7 +23,7 @@ using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathConst;
 
-#define INERTIA (1.0/12.0)     // moment of inertia prefactor for line segment
+static constexpr double INERTIA = (1.0/12.0);     // moment of inertia prefactor for line segment
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/ASPHERE/pair_line_lj.cpp b/src/ASPHERE/pair_line_lj.cpp
index 2d6dc557f3..3bfc10758c 100644
--- a/src/ASPHERE/pair_line_lj.cpp
+++ b/src/ASPHERE/pair_line_lj.cpp
@@ -25,7 +25,7 @@
 
 using namespace LAMMPS_NS;
 
-#define DELTA 10000
+static constexpr int DELTA = 10000;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/ASPHERE/pair_tri_lj.cpp b/src/ASPHERE/pair_tri_lj.cpp
index 4fe999f039..b9cb2528ca 100644
--- a/src/ASPHERE/pair_tri_lj.cpp
+++ b/src/ASPHERE/pair_tri_lj.cpp
@@ -26,7 +26,7 @@
 
 using namespace LAMMPS_NS;
 
-#define DELTA 20
+static constexpr int DELTA = 20;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/BOCS/fix_bocs.cpp b/src/BOCS/fix_bocs.cpp
index 4918f8d879..33f4f072e7 100644
--- a/src/BOCS/fix_bocs.cpp
+++ b/src/BOCS/fix_bocs.cpp
@@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    https://www.lammps.org/, Sandia National Laboratories
@@ -42,31 +41,30 @@ using namespace LAMMPS_NS;
 using namespace FixConst;
 
 static const char cite_user_bocs_package[] =
-  "BOCS package: doi:10.1021/acs.jpcb.7b09993\n\n"
-  "@Article{Dunn2018,\n"
-  " author = {N. J. H. Dunn and K. M. Lebold and M. R. {DeLyser} and\n"
-  "    J. F. Rudzinski and W. G. Noid},\n"
-  " title = {{BOCS}: Bottom-Up Open-Source Coarse-Graining Software},\n"
-  " journal = {J.~Phys.\\ Chem.~B},\n"
-  " year =    2018,\n"
-  " volume =  122,\n"
-  " number =  13,\n"
-  " pages =   {3363--3377}\n"
-  "}\n\n";
+    "BOCS package: doi:10.1021/acs.jpcb.7b09993\n\n"
+    "@Article{Dunn2018,\n"
+    " author = {N. J. H. Dunn and K. M. Lebold and M. R. {DeLyser} and\n"
+    "    J. F. Rudzinski and W. G. Noid},\n"
+    " title = {{BOCS}: Bottom-Up Open-Source Coarse-Graining Software},\n"
+    " journal = {J.~Phys.\\ Chem.~B},\n"
+    " year =    2018,\n"
+    " volume =  122,\n"
+    " number =  13,\n"
+    " pages =   {3363--3377}\n"
+    "}\n\n";
 
+static constexpr double DELTAFLIP = 0.1;
+static constexpr double TILTMAX = 1.5;
+static constexpr int NUM_INPUT_DATA_COLUMNS = 2;    // columns in the pressure correction file
 
-#define DELTAFLIP 0.1
-#define TILTMAX 1.5
-
-enum{NOBIAS,BIAS};
-enum{NONE,XYZ,XY,YZ,XZ};
-enum{ISO,ANISO,TRICLINIC};
-
-const int NUM_INPUT_DATA_COLUMNS = 2;     // columns in the pressure correction file
+enum { NOBIAS, BIAS };
+enum { NONE, XYZ, XY, YZ, XZ };
+enum { ISO, ANISO, TRICLINIC };
 
 /* ----------------------------------------------------------------------
    NVT,NPH,NPT integrators for improved Nose-Hoover equations of motion
  ---------------------------------------------------------------------- */
+// clang-format off
 
 FixBocs::FixBocs(LAMMPS *lmp, int narg, char **arg) :
     Fix(lmp, narg, arg), id_dilate(nullptr), irregular(nullptr), id_temp(nullptr),
@@ -75,7 +73,7 @@ FixBocs::FixBocs(LAMMPS *lmp, int narg, char **arg) :
 {
   if (lmp->citeme) lmp->citeme->add(cite_user_bocs_package);
 
-  if (narg < 4) error->all(FLERR,"Illegal fix bocs command");
+  if (narg < 4) utils::missing_cmd_args(FLERR,"fix bocs",error);
 
   restart_global = 1;
   dynamic_group_allow = 1;
@@ -102,8 +100,6 @@ FixBocs::FixBocs(LAMMPS *lmp, int narg, char **arg) :
   omega_mass_flag = 0;
   etap_mass_flag = 0;
   flipflag = 1;
-  dipole_flag = 0;
-  dlm_flag = 0;
 
   tcomputeflag = 0;
   pcomputeflag = 0;
@@ -266,13 +262,6 @@ FixBocs::FixBocs(LAMMPS *lmp, int narg, char **arg) :
   if (p_flag[2] && domain->zperiodic == 0)
     error->all(FLERR,"Cannot use fix bocs on a non-periodic dimension");
 
-  if (dipole_flag) {
-    if (!atom->sphere_flag)
-      error->all(FLERR,"Using update dipole flag requires atom style sphere");
-    if (!atom->mu_flag)
-      error->all(FLERR,"Using update dipole flag requires atom attribute mu");
-  }
-
   if ((tstat_flag && t_period <= 0.0) ||
       (p_flag[0] && p_period[0] <= 0.0) ||
       (p_flag[1] && p_period[1] <= 0.0) ||
@@ -616,8 +605,8 @@ int FixBocs::read_F_table( char *filename, int p_basis_type )
     // Data file lines hold two floating point numbers.
     // Line length we allocate should be long enough without being too long.
     // 128 seems safe for a line we expect to be < 30 chars.
-    const int MAX_F_TABLE_LINE_LENGTH = 128;
-    char line[MAX_F_TABLE_LINE_LENGTH];
+    constexpr int MAX_F_TABLE_LINE_LENGTH = 128;
+    char line[MAX_F_TABLE_LINE_LENGTH] = {'\0'};
     std::vector<std::string> inputLines;
     while (fgets(line, MAX_F_TABLE_LINE_LENGTH, fpi)) {
       inputLines.emplace_back(line);
@@ -649,17 +638,13 @@ int FixBocs::read_F_table( char *filename, int p_basis_type )
     for (int i = 0; i < (int)inputLines.size(); ++i) {
       lineNum++;  // count each line processed now so lineNum messages can be 1-based
       test_sscanf = sscanf(inputLines.at(i).c_str()," %f , %f ",&f1, &f2);
-      if (test_sscanf == 2)
-      {
+      if (test_sscanf == 2) {
         data[VOLUME][i] = (double)f1;
         data[PRESSURE_CORRECTION][i] = (double)f2;
-        if (i == 1)
-        {
+        if (i == 1) {
           // second entry is used to compute the validation interval used below
           stdVolumeInterval = data[VOLUME][i] - data[VOLUME][i-1];
-        }
-        else if (i > 1)
-        {
+        } else if (i > 1) {
           // after second entry, all intervals are validated
           currVolumeInterval = data[VOLUME][i] - data[VOLUME][i-1];
           if (fabs(currVolumeInterval - stdVolumeInterval) > volumeIntervalTolerance) {
@@ -673,17 +658,14 @@ int FixBocs::read_F_table( char *filename, int p_basis_type )
           }
           // no concluding else is intentional: i = 0, first line, no interval to validate
         }
-      }
-      else
-      {
+      } else {
         if (comm->me == 0)
           error->warning(FLERR,"Bad input format: did not find 2 comma separated numeric"
                          " values in line {} of file {}\nWARNING:\tline: {}",
                          lineNum, filename, inputLines.at(i));
         badInput = true;
       }
-      if (badInput)
-      {
+      if (badInput) {
         numBadVolumeIntervals++;
       }
     }
@@ -700,18 +682,13 @@ int FixBocs::read_F_table( char *filename, int p_basis_type )
     error->warning(FLERR,"Bad volume / pressure-correction data: {}\nSee details above", filename);
   }
 
-  if (p_basis_type == BASIS_LINEAR_SPLINE)
-  {
+  if (p_basis_type == BASIS_LINEAR_SPLINE) {
     spline_length = numEntries;
     numEntries = build_linear_splines(data);
-  }
-  else if (p_basis_type == BASIS_CUBIC_SPLINE)
-  {
+  } else if (p_basis_type == BASIS_CUBIC_SPLINE) {
     spline_length = numEntries;
     numEntries = build_cubic_splines(data);
-  }
-  else
-  {
+  } else {
     error->all(FLERR,"ERROR: invalid p_basis_type value of {} in read_F_table", p_basis_type);
   }
 
@@ -724,8 +701,7 @@ int FixBocs::build_linear_splines(double **data) {
   splines[VOLUME] = (double *) calloc(spline_length,sizeof(double));
   splines[PRESSURE_CORRECTION] = (double *) calloc(spline_length,sizeof(double));
 
-  for (int i = 0; i < spline_length; ++i)
-  {
+  for (int i = 0; i < spline_length; ++i) {
     splines[VOLUME][i] = data[VOLUME][i];
     splines[PRESSURE_CORRECTION][i] = data[PRESSURE_CORRECTION][i];
   }
@@ -758,18 +734,15 @@ int FixBocs::build_cubic_splines(double **data)
   memory->create(mu, n, "mu");
   memory->create(z, n, "z");
 
-  for (int i=0; i<n; i++)
-  {
+  for (int i=0; i<n; i++) {
     a[i] = data[1][i];
     b[i] = 0.0;
     d[i] = 0.0;
-    if (i<(n-1))
-    {
+    if (i<(n-1)) {
       h[i] = (data[0][i+1] - data[0][i]);
     }
     double alpha_i;
-    if (i>1 && i<(n-1))
-    {
+    if (i>1 && i<(n-1)) {
       alpha_i = (3.0 / h[i]) * ( data[1][i+1] - data[1][i]) - (3.0 / h[i-1] )
                                                               * ( data[1][i] - data[1][i-1] );
       alpha[i-1] = alpha_i;
@@ -779,8 +752,7 @@ int FixBocs::build_cubic_splines(double **data)
   mu[0] = 0.0;
   z[0] = 0.0;
 
-  for (int i=1; i<n-1; i++)
-  {
+  for (int i=1; i<n-1; i++) {
     l[i] = 2*(data[0][i+1] - data[0][i-1]) - h[i-1] * mu[i-1];
     mu[i] = h[i]/l[i];
     z[i] = (alpha[i] - h[i-1] * z[i-1]) / l[i];
@@ -797,19 +769,15 @@ int FixBocs::build_cubic_splines(double **data)
   c[n] = 0.0;
   d[n] = 0.0;
 
-  for (int j=n-1; j>=0; j--)
-  {
+  for (int j=n-1; j>=0; j--) {
     c[j] = z[j] - mu[j]*c[j+1];
-
     b[j] = (a[j+1]-a[j])/h[j] - h[j]*(c[j+1] + 2.0 * c[j])/3.0;
-
     d[j] = (c[j+1]-c[j])/(3.0 * h[j]);
   }
 
   int numSplines = n - 1;
   memory->create(splines, NUM_CUBIC_SPLINE_COLUMNS, numSplines, "splines");
-  for (int idx = 0; idx < numSplines; ++idx)
-  {
+  for (int idx = 0; idx < numSplines; ++idx) {
     splines[0][idx] = data[0][idx];
     splines[1][idx] = a[idx];
     splines[2][idx] = b[idx];
diff --git a/src/BOCS/fix_bocs.h b/src/BOCS/fix_bocs.h
index 71fbc273d8..af221fedb3 100644
--- a/src/BOCS/fix_bocs.h
+++ b/src/BOCS/fix_bocs.h
@@ -129,8 +129,6 @@ class FixBocs : public Fix {
   int eta_mass_flag;      // 1 if eta_mass updated, 0 if not.
   int omega_mass_flag;    // 1 if omega_mass updated, 0 if not.
   int etap_mass_flag;     // 1 if etap_mass updated, 0 if not.
-  int dipole_flag;        // 1 if dipole is updated, 0 if not.
-  int dlm_flag;           // 1 if using the DLM rotational integrator, 0 if not
 
   int scaleyz;     // 1 if yz scaled with lz
   int scalexz;     // 1 if xz scaled with lz
diff --git a/src/BODY/body_nparticle.cpp b/src/BODY/body_nparticle.cpp
index 62e6ee802a..7a1dbaa0f0 100644
--- a/src/BODY/body_nparticle.cpp
+++ b/src/BODY/body_nparticle.cpp
@@ -26,7 +26,7 @@
 
 using namespace LAMMPS_NS;
 
-#define EPSILON 1.0e-7
+static constexpr double EPSILON = 1.0e-7;
 enum{SPHERE,LINE,TRI};           // also in DumpImage
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/BODY/body_rounded_polygon.cpp b/src/BODY/body_rounded_polygon.cpp
index 2fb2a991f1..6a94f68a8e 100644
--- a/src/BODY/body_rounded_polygon.cpp
+++ b/src/BODY/body_rounded_polygon.cpp
@@ -32,7 +32,7 @@
 
 using namespace LAMMPS_NS;
 
-#define EPSILON 1.0e-7
+static constexpr double EPSILON = 1.0e-7;
 enum{SPHERE,LINE};           // also in DumpImage
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/BODY/body_rounded_polyhedron.cpp b/src/BODY/body_rounded_polyhedron.cpp
index 1d11644618..bcc3495644 100644
--- a/src/BODY/body_rounded_polyhedron.cpp
+++ b/src/BODY/body_rounded_polyhedron.cpp
@@ -31,7 +31,7 @@
 
 using namespace LAMMPS_NS;
 
-#define EPSILON 1.0e-7
+static constexpr double EPSILON = 1.0e-7;
 #define MAX_FACE_SIZE 4  // maximum number of vertices per face (for now)
 
 enum{SPHERE,LINE};       // also in DumpImage
diff --git a/src/BODY/compute_body_local.cpp b/src/BODY/compute_body_local.cpp
index 42afa13514..4a4a36bfae 100644
--- a/src/BODY/compute_body_local.cpp
+++ b/src/BODY/compute_body_local.cpp
@@ -25,7 +25,7 @@
 
 using namespace LAMMPS_NS;
 
-#define DELTA 10000
+static constexpr int DELTA = 10000;
 
 enum{ID,TYPE,INDEX};
 
diff --git a/src/BODY/fix_wall_body_polygon.cpp b/src/BODY/fix_wall_body_polygon.cpp
index 6f0622cbf6..0920145eb2 100644
--- a/src/BODY/fix_wall_body_polygon.cpp
+++ b/src/BODY/fix_wall_body_polygon.cpp
@@ -43,9 +43,9 @@ enum {INVALID=0,NONE=1,VERTEX=2};
 enum {FAR=0,XLO,XHI,YLO,YHI};
 
 //#define _POLYGON_DEBUG
-#define DELTA 10000
-#define EPSILON 1e-2    // dimensionless threshold (dot products, end point checks, contact checks)
-#define BIG 1.0e20
+static constexpr int DELTA = 10000;
+static constexpr double EPSILON = 1e-2;    // dimensionless threshold (dot products, end point checks, contact checks)
+static constexpr double BIG = 1.0e20;
 #define MAX_CONTACTS 4  // maximum number of contacts for 2D models
 #define EFF_CONTACTS 2  // effective contacts for 2D models
 
diff --git a/src/BODY/fix_wall_body_polyhedron.cpp b/src/BODY/fix_wall_body_polyhedron.cpp
index 4b28f6af72..035e814692 100644
--- a/src/BODY/fix_wall_body_polyhedron.cpp
+++ b/src/BODY/fix_wall_body_polyhedron.cpp
@@ -43,9 +43,9 @@ enum {INVALID=0,NONE=1,VERTEX=2};
 enum {FAR=0,XLO,XHI,YLO,YHI,ZLO,ZHI};
 
 //#define _POLYHEDRON_DEBUG
-#define DELTA 10000
-#define EPSILON 1e-3    // dimensionless threshold (dot products, end point checks)
-#define BIG 1.0e20
+static constexpr int DELTA = 10000;
+static constexpr double EPSILON = 1e-3;    // dimensionless threshold (dot products, end point checks)
+static constexpr double BIG = 1.0e20;
 #define MAX_CONTACTS 4  // maximum number of contacts for 2D models
 #define EFF_CONTACTS 2  // effective contacts for 2D models
 
diff --git a/src/BODY/pair_body_nparticle.cpp b/src/BODY/pair_body_nparticle.cpp
index 9e1e640015..29ee06dbef 100644
--- a/src/BODY/pair_body_nparticle.cpp
+++ b/src/BODY/pair_body_nparticle.cpp
@@ -29,7 +29,7 @@
 
 using namespace LAMMPS_NS;
 
-#define DELTA 10000
+static constexpr int DELTA = 10000;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/BODY/pair_body_rounded_polygon.cpp b/src/BODY/pair_body_rounded_polygon.cpp
index 24f38a6a0a..4b5a3c4f64 100644
--- a/src/BODY/pair_body_rounded_polygon.cpp
+++ b/src/BODY/pair_body_rounded_polygon.cpp
@@ -39,8 +39,8 @@
 
 using namespace LAMMPS_NS;
 
-#define DELTA 10000
-#define EPSILON 1e-3    // dimensionless threshold (dot products, end point checks, contact checks)
+static constexpr int DELTA = 10000;
+static constexpr double EPSILON = 1e-3;    // dimensionless threshold (dot products, end point checks, contact checks)
 #define MAX_CONTACTS 4  // maximum number of contacts for 2D models
 #define EFF_CONTACTS 2  // effective contacts for 2D models
 
diff --git a/src/BODY/pair_body_rounded_polyhedron.cpp b/src/BODY/pair_body_rounded_polyhedron.cpp
index e261ff14d3..9eaed320fe 100644
--- a/src/BODY/pair_body_rounded_polyhedron.cpp
+++ b/src/BODY/pair_body_rounded_polyhedron.cpp
@@ -43,8 +43,8 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define DELTA 10000
-#define EPSILON 1e-3     // dimensionless threshold (dot products, end point checks, contact checks)
+static constexpr int DELTA = 10000;
+static constexpr double EPSILON = 1e-3;     // dimensionless threshold (dot products, end point checks, contact checks)
 #define MAX_FACE_SIZE 4  // maximum number of vertices per face (same as BodyRoundedPolyhedron)
 #define MAX_CONTACTS 32  // for 3D models (including duplicated counts)
 
diff --git a/src/BPM/atom_vec_bpm_sphere.cpp b/src/BPM/atom_vec_bpm_sphere.cpp
index 2820a74c8a..37a36f4a59 100644
--- a/src/BPM/atom_vec_bpm_sphere.cpp
+++ b/src/BPM/atom_vec_bpm_sphere.cpp
@@ -35,7 +35,6 @@ AtomVecBPMSphere::AtomVecBPMSphere(LAMMPS *_lmp) : AtomVec(_lmp)
   radvary = 0;
 
   atom->molecule_flag = 1;
-  atom->sphere_flag = 1;
   atom->radius_flag = atom->rmass_flag = atom->omega_flag = atom->torque_flag = atom->quat_flag = 1;
 
   // strings with peratom variables to include in each AtomVec method
diff --git a/src/BPM/bond_bpm.cpp b/src/BPM/bond_bpm.cpp
index 3ebeed3f1d..b484df7fab 100644
--- a/src/BPM/bond_bpm.cpp
+++ b/src/BPM/bond_bpm.cpp
@@ -224,7 +224,7 @@ void BondBPM::settings(int narg, char **arg)
 
       ifix = modify->get_fix_by_id(id_fix_prop_atom);
       if (!ifix)
-        ifix = modify->add_fix(fmt::format("{} all property/atom {} {} {} ghost yes",
+        ifix = modify->add_fix(fmt::format("{} all property/atom d_{} d_{} d_{} ghost yes",
                                            id_fix_prop_atom, x_ref_id, y_ref_id, z_ref_id));
 
       int type_flag;
diff --git a/src/BPM/bond_bpm_rotational.cpp b/src/BPM/bond_bpm_rotational.cpp
index ffb0d9521d..f2bfa3d770 100644
--- a/src/BPM/bond_bpm_rotational.cpp
+++ b/src/BPM/bond_bpm_rotational.cpp
@@ -28,7 +28,7 @@
 #include <cmath>
 #include <cstring>
 
-#define EPSILON 1e-10
+static constexpr double EPSILON = 1e-10;
 
 using namespace LAMMPS_NS;
 using MathConst::MY_SQRT2;
@@ -645,7 +645,7 @@ void BondBPMRotational::init_style()
 {
   BondBPM::init_style();
 
-  if (!atom->quat_flag || !atom->sphere_flag)
+  if (!atom->quat_flag || !atom->radius_flag || !atom->omega_flag)
     error->all(FLERR, "Bond bpm/rotational requires atom style bpm/sphere");
   if (comm->ghost_velocity == 0)
     error->all(FLERR, "Bond bpm/rotational requires ghost atoms store velocity");
diff --git a/src/BPM/bond_bpm_spring.cpp b/src/BPM/bond_bpm_spring.cpp
index 37b79f93fb..28d17b408a 100644
--- a/src/BPM/bond_bpm_spring.cpp
+++ b/src/BPM/bond_bpm_spring.cpp
@@ -26,7 +26,7 @@
 #include <cmath>
 #include <cstring>
 
-#define EPSILON 1e-10
+static constexpr double EPSILON = 1e-10;
 
 using namespace LAMMPS_NS;
 
diff --git a/src/BPM/fix_nve_bpm_sphere.cpp b/src/BPM/fix_nve_bpm_sphere.cpp
index ceab07822e..1766f49c5c 100644
--- a/src/BPM/fix_nve_bpm_sphere.cpp
+++ b/src/BPM/fix_nve_bpm_sphere.cpp
@@ -28,7 +28,7 @@ using namespace MathExtra;
 
 FixNVEBPMSphere::FixNVEBPMSphere(LAMMPS *_lmp, int narg, char **arg) : FixNVE(_lmp, narg, arg)
 {
-  if (narg < 3) error->all(FLERR, "Illegal fix nve/bpm/sphere command");
+  if (narg < 3) utils::missing_cmd_args(FLERR, "fix nve/bpm/sphere", error);
 
   time_integrate = 1;
 
@@ -45,14 +45,14 @@ FixNVEBPMSphere::FixNVEBPMSphere(LAMMPS *_lmp, int narg, char **arg) : FixNVE(_l
         error->all(FLERR, "Fix nve/bpm/sphere disc requires 2d simulation");
       iarg++;
     } else
-      error->all(FLERR, "Illegal fix nve/bpm/sphere command");
+      error->all(FLERR, "Illegal fix nve/bpm/sphere keyword {}", arg[iarg]);
   }
 
   inv_inertia = 1.0 / inertia;
 
   // error checks
 
-  if (!atom->quat_flag || !atom->sphere_flag)
+  if (!atom->quat_flag || !atom->radius_flag || !atom->omega_flag)
     error->all(FLERR, "Fix nve/bpm/sphere requires atom style bpm/sphere");
 }
 
diff --git a/src/fix_update_special_bonds.cpp b/src/BPM/fix_update_special_bonds.cpp
similarity index 73%
rename from src/fix_update_special_bonds.cpp
rename to src/BPM/fix_update_special_bonds.cpp
index 159b2a1170..5452366090 100644
--- a/src/fix_update_special_bonds.cpp
+++ b/src/BPM/fix_update_special_bonds.cpp
@@ -20,6 +20,7 @@
 #include "force.h"
 #include "modify.h"
 #include "neigh_list.h"
+#include "neighbor.h"
 #include "pair.h"
 
 #include <utility>
@@ -27,7 +28,7 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-#define DELTA 10000
+static constexpr int DELTA = 10000;
 
 /* ---------------------------------------------------------------------- */
 
@@ -61,7 +62,8 @@ void FixUpdateSpecialBonds::setup(int /*vflag*/)
   // Require atoms know about all of their bonds and if they break
   if (force->newton_bond) error->all(FLERR, "Fix update/special/bonds requires Newton bond off");
 
-  if (!atom->avec->bonds_allow) error->all(FLERR, "Fix update/special/bonds requires atom bonds");
+  if (!atom->avec->bonds_allow)
+    error->all(FLERR, "Fix update/special/bonds requires an atom style supporting bonds");
 
   // special lj must be 0 1 1 to censor pair forces between bonded particles
   // special coulomb must be 1 1 1 to ensure all pairs are included in the
@@ -72,9 +74,6 @@ void FixUpdateSpecialBonds::setup(int /*vflag*/)
       force->special_coul[3] != 1.0)
     error->all(FLERR, "Fix update/special/bonds requires special Coulomb weights = 1,1,1");
   // Implies neighbor->special_flag = [X, 2, 1, 1]
-
-  if (utils::strmatch(force->pair_style, "^hybrid"))
-    error->all(FLERR, "Cannot use fix update/special/bonds with hybrid pair styles");
 }
 
 /* ----------------------------------------------------------------------
@@ -158,69 +157,86 @@ void FixUpdateSpecialBonds::pre_force(int /*vflag*/)
   int i1, i2, j, jj, jnum;
   int *jlist, *numneigh, **firstneigh;
   tagint tag1, tag2;
+  NeighList *list;
 
   int nlocal = atom->nlocal;
-
   tagint *tag = atom->tag;
-  NeighList *list = force->pair->list;    // may need to be generalized for pair hybrid*
-  numneigh = list->numneigh;
-  firstneigh = list->firstneigh;
 
   // In theory could communicate a list of broken bonds to neighboring processors here
   // to remove restriction that users use Newton bond off
 
-  for (auto const &it : new_broken_pairs) {
-    tag1 = it.first;
-    tag2 = it.second;
-    i1 = atom->map(tag1);
-    i2 = atom->map(tag2);
+  for (int ilist = 0; ilist < neighbor->nlist; ilist++) {
+    list = neighbor->lists[ilist];
 
-    // Loop through atoms of owned atoms i j
-    if (i1 < nlocal) {
-      jlist = firstneigh[i1];
-      jnum = numneigh[i1];
-      for (jj = 0; jj < jnum; jj++) {
-        j = jlist[jj];
-        j &= SPECIALMASK;    // Clear special bond bits
-        if (tag[j] == tag2) jlist[jj] = j;
+    // Skip copied lists, will update original
+    if (list->copy) continue;
+
+    numneigh = list->numneigh;
+    firstneigh = list->firstneigh;
+
+    for (auto const &it : new_broken_pairs) {
+      tag1 = it.first;
+      tag2 = it.second;
+      i1 = atom->map(tag1);
+      i2 = atom->map(tag2);
+
+      // Loop through atoms of owned atoms i j
+      if (i1 < nlocal) {
+        jlist = firstneigh[i1];
+        jnum = numneigh[i1];
+        for (jj = 0; jj < jnum; jj++) {
+          j = jlist[jj];
+          j &= SPECIALMASK;    // Clear special bond bits
+          if (tag[j] == tag2) jlist[jj] = j;
+        }
       }
-    }
 
-    if (i2 < nlocal) {
-      jlist = firstneigh[i2];
-      jnum = numneigh[i2];
-      for (jj = 0; jj < jnum; jj++) {
-        j = jlist[jj];
-        j &= SPECIALMASK;    // Clear special bond bits
-        if (tag[j] == tag1) jlist[jj] = j;
+      if (i2 < nlocal) {
+        jlist = firstneigh[i2];
+        jnum = numneigh[i2];
+        for (jj = 0; jj < jnum; jj++) {
+          j = jlist[jj];
+          j &= SPECIALMASK;    // Clear special bond bits
+          if (tag[j] == tag1) jlist[jj] = j;
+        }
       }
     }
   }
 
-  for (auto const &it : new_created_pairs) {
-    tag1 = it.first;
-    tag2 = it.second;
-    i1 = atom->map(tag1);
-    i2 = atom->map(tag2);
+  for (int ilist = 0; ilist < neighbor->nlist; ilist++) {
+    list = neighbor->lists[ilist];
 
-    // Loop through atoms of owned atoms i j and update SB bits
-    if (i1 < nlocal) {
-      jlist = firstneigh[i1];
-      jnum = numneigh[i1];
-      for (jj = 0; jj < jnum; jj++) {
-        j = jlist[jj];
-        if (((j >> SBBITS) & 3) != 0) continue;               // Skip bonded pairs
-        if (tag[j] == tag2) jlist[jj] = j ^ (1 << SBBITS);    // Add 1-2 special bond bits
+    // Skip copied lists, will update original
+    if (list->copy) continue;
+
+    numneigh = list->numneigh;
+    firstneigh = list->firstneigh;
+
+    for (auto const &it : new_created_pairs) {
+      tag1 = it.first;
+      tag2 = it.second;
+      i1 = atom->map(tag1);
+      i2 = atom->map(tag2);
+
+      // Loop through atoms of owned atoms i j and update SB bits
+      if (i1 < nlocal) {
+        jlist = firstneigh[i1];
+        jnum = numneigh[i1];
+        for (jj = 0; jj < jnum; jj++) {
+          j = jlist[jj];
+          if (((j >> SBBITS) & 3) != 0) continue;               // Skip bonded pairs
+          if (tag[j] == tag2) jlist[jj] = j ^ (1 << SBBITS);    // Add 1-2 special bond bits
+        }
       }
-    }
 
-    if (i2 < nlocal) {
-      jlist = firstneigh[i2];
-      jnum = numneigh[i2];
-      for (jj = 0; jj < jnum; jj++) {
-        j = jlist[jj];
-        if (((j >> SBBITS) & 3) != 0) continue;               // Skip bonded pairs
-        if (tag[j] == tag1) jlist[jj] = j ^ (1 << SBBITS);    // Add 1-2 special bond bits
+      if (i2 < nlocal) {
+        jlist = firstneigh[i2];
+        jnum = numneigh[i2];
+        for (jj = 0; jj < jnum; jj++) {
+          j = jlist[jj];
+          if (((j >> SBBITS) & 3) != 0) continue;               // Skip bonded pairs
+          if (tag[j] == tag1) jlist[jj] = j ^ (1 << SBBITS);    // Add 1-2 special bond bits
+        }
       }
     }
   }
diff --git a/src/fix_update_special_bonds.h b/src/BPM/fix_update_special_bonds.h
similarity index 100%
rename from src/fix_update_special_bonds.h
rename to src/BPM/fix_update_special_bonds.h
diff --git a/src/BPM/pair_bpm_spring.cpp b/src/BPM/pair_bpm_spring.cpp
index 1177156359..01cee91b4c 100644
--- a/src/BPM/pair_bpm_spring.cpp
+++ b/src/BPM/pair_bpm_spring.cpp
@@ -19,6 +19,7 @@
 #include "force.h"
 #include "memory.h"
 #include "neigh_list.h"
+#include "neighbor.h"
 
 #include <cmath>
 
@@ -202,6 +203,18 @@ void PairBPMSpring::coeff(int narg, char **arg)
   if (count == 0) error->all(FLERR, "Incorrect args for pair coefficients");
 }
 
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+void PairBPMSpring::init_style()
+{
+  if (comm->ghost_velocity == 0)
+    error->all(FLERR,"Pair bpm/spring requires ghost atoms store velocity");
+
+  neighbor->add_request(this);
+}
+
 /* ----------------------------------------------------------------------
    init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
diff --git a/src/BPM/pair_bpm_spring.h b/src/BPM/pair_bpm_spring.h
index 3cb281bff3..c10e4a3400 100644
--- a/src/BPM/pair_bpm_spring.h
+++ b/src/BPM/pair_bpm_spring.h
@@ -31,6 +31,7 @@ class PairBPMSpring : public Pair {
   void compute(int, int) override;
   void settings(int, char **) override;
   void coeff(int, char **) override;
+  void init_style() override;
   double init_one(int, int) override;
   void write_restart(FILE *) override;
   void read_restart(FILE *) override;
diff --git a/src/BROWNIAN/fix_brownian_base.cpp b/src/BROWNIAN/fix_brownian_base.cpp
index b2e89a096a..508ce4d1c6 100644
--- a/src/BROWNIAN/fix_brownian_base.cpp
+++ b/src/BROWNIAN/fix_brownian_base.cpp
@@ -204,7 +204,7 @@ FixBrownianBase::~FixBrownianBase()
     delete[] gamma_r_invsqrt;
   }
 
-  if (dipole_flag) { delete[] dipole_body; }
+  if (dipole_flag) delete[] dipole_body;
   delete rng;
 }
 
diff --git a/src/BROWNIAN/fix_brownian_sphere.cpp b/src/BROWNIAN/fix_brownian_sphere.cpp
index 220a3b9735..79e3858f00 100644
--- a/src/BROWNIAN/fix_brownian_sphere.cpp
+++ b/src/BROWNIAN/fix_brownian_sphere.cpp
@@ -36,12 +36,11 @@ FixBrownianSphere::FixBrownianSphere(LAMMPS *lmp, int narg, char **arg) :
     FixBrownianBase(lmp, narg, arg)
 {
   if (gamma_t_eigen_flag || gamma_r_eigen_flag) {
-    error->all(FLERR, "Illegal fix brownian command.");
+    error->all(FLERR, "Illegal fix brownian/sphere command.");
   }
 
-  if (!gamma_t_flag || !gamma_r_flag) { error->all(FLERR, "Illegal fix brownian command."); }
+  if (!gamma_t_flag || !gamma_r_flag) error->all(FLERR, "Illegal fix brownian/sphere command.");
   if (!atom->mu_flag) error->all(FLERR, "Fix brownian/sphere requires atom attribute mu");
-  if (!atom->sphere_flag) error->all(FLERR, "Fix brownian/sphere requires atom style sphere");
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/BROWNIAN/fix_propel_self.cpp b/src/BROWNIAN/fix_propel_self.cpp
index 8d97b828a9..e98f461b80 100644
--- a/src/BROWNIAN/fix_propel_self.cpp
+++ b/src/BROWNIAN/fix_propel_self.cpp
@@ -35,7 +35,7 @@ using namespace FixConst;
 
 enum { DIPOLE, VELOCITY, QUAT };
 
-#define TOL 1e-14
+static constexpr double TOL = 1e-14;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/CG-DNA/fix_nve_dot.cpp b/src/CG-DNA/fix_nve_dot.cpp
index a9d288eb16..4fbf9bb9be 100644
--- a/src/CG-DNA/fix_nve_dot.cpp
+++ b/src/CG-DNA/fix_nve_dot.cpp
@@ -26,7 +26,7 @@ using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathExtra;
 
-#define INERTIA 0.2          // moment of inertia prefactor for ellipsoid
+static constexpr double INERTIA = 0.2;          // moment of inertia prefactor for ellipsoid
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/CG-DNA/fix_nve_dotc_langevin.cpp b/src/CG-DNA/fix_nve_dotc_langevin.cpp
index 56436a3a98..8151cbe4fe 100644
--- a/src/CG-DNA/fix_nve_dotc_langevin.cpp
+++ b/src/CG-DNA/fix_nve_dotc_langevin.cpp
@@ -33,7 +33,7 @@ using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathExtra;
 
-#define INERTIA 0.2          // moment of inertia prefactor for ellipsoid
+static constexpr double INERTIA = 0.2;          // moment of inertia prefactor for ellipsoid
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/CG-SPICA/angle_spica.cpp b/src/CG-SPICA/angle_spica.cpp
index 3f8a506ed2..45b28d812a 100644
--- a/src/CG-SPICA/angle_spica.cpp
+++ b/src/CG-SPICA/angle_spica.cpp
@@ -39,7 +39,7 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace LJSPICAParms;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/CLASS2/angle_class2.cpp b/src/CLASS2/angle_class2.cpp
index c731a4d5c9..1dbaaf0568 100644
--- a/src/CLASS2/angle_class2.cpp
+++ b/src/CLASS2/angle_class2.cpp
@@ -33,7 +33,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/CLASS2/dihedral_class2.cpp b/src/CLASS2/dihedral_class2.cpp
index d49d50f0e6..288fac2e92 100644
--- a/src/CLASS2/dihedral_class2.cpp
+++ b/src/CLASS2/dihedral_class2.cpp
@@ -32,8 +32,8 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define TOLERANCE 0.05
-#define SMALL     0.0000001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.0000001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/CLASS2/improper_class2.cpp b/src/CLASS2/improper_class2.cpp
index 1e172757b0..e2f5ffe01b 100644
--- a/src/CLASS2/improper_class2.cpp
+++ b/src/CLASS2/improper_class2.cpp
@@ -32,7 +32,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/COLLOID/fix_wall_colloid.cpp b/src/COLLOID/fix_wall_colloid.cpp
index 0637057417..75dc5c797f 100644
--- a/src/COLLOID/fix_wall_colloid.cpp
+++ b/src/COLLOID/fix_wall_colloid.cpp
@@ -34,7 +34,7 @@ FixWallColloid::FixWallColloid(LAMMPS *lmp, int narg, char **arg) : FixWall(lmp,
 
 void FixWallColloid::init()
 {
-  if (!atom->sphere_flag) error->all(FLERR, "Fix wall/colloid requires atom style sphere");
+  if (!atom->radius_flag) error->all(FLERR, "Fix wall/colloid requires atom attribute radius");
 
   // ensure all particles in group are extended particles
 
diff --git a/src/COLLOID/pair_brownian.cpp b/src/COLLOID/pair_brownian.cpp
index 82be043df0..39432ca61b 100644
--- a/src/COLLOID/pair_brownian.cpp
+++ b/src/COLLOID/pair_brownian.cpp
@@ -42,10 +42,6 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
-// same as fix_wall.cpp
-
-enum { EDGE, CONSTANT, VARIABLE };
-
 /* ---------------------------------------------------------------------- */
 
 PairBrownian::PairBrownian(LAMMPS *lmp) : Pair(lmp)
@@ -110,7 +106,7 @@ void PairBrownian::compute(int eflag, int vflag)
         for (int m = 0; m < wallfix->nwall; m++) {
           int dim = wallfix->wallwhich[m] / 2;
           int side = wallfix->wallwhich[m] % 2;
-          if (wallfix->xstyle[m] == VARIABLE) {
+          if (wallfix->xstyle[m] == FixWall::VARIABLE) {
             wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
           } else
             wallcoord = wallfix->coord0[m];
@@ -439,7 +435,7 @@ void PairBrownian::coeff(int narg, char **arg)
 
 void PairBrownian::init_style()
 {
-  if (!atom->sphere_flag) error->all(FLERR, "Pair brownian requires atom style sphere");
+  if (!atom->radius_flag) error->all(FLERR, "Pair brownian requires atom attribute radius");
 
   // if newton off, forces between atoms ij will be double computed
   // using different random numbers
@@ -506,7 +502,7 @@ void PairBrownian::init_style()
     for (int m = 0; m < wallfix->nwall; m++) {
       int dim = wallfix->wallwhich[m] / 2;
       int side = wallfix->wallwhich[m] % 2;
-      if (wallfix->xstyle[m] == VARIABLE) {
+      if (wallfix->xstyle[m] == FixWall::VARIABLE) {
         wallfix->xindex[m] = input->variable->find(wallfix->xstr[m]);
         // Since fix->wall->init happens after pair->init_style
         wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
diff --git a/src/COLLOID/pair_brownian_poly.cpp b/src/COLLOID/pair_brownian_poly.cpp
index 1e04b8dc2a..99cebe3792 100644
--- a/src/COLLOID/pair_brownian_poly.cpp
+++ b/src/COLLOID/pair_brownian_poly.cpp
@@ -42,10 +42,6 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
-// same as fix_wall.cpp
-
-enum{EDGE,CONSTANT,VARIABLE};
-
 /* ---------------------------------------------------------------------- */
 
 PairBrownianPoly::PairBrownianPoly(LAMMPS *lmp) : PairBrownian(lmp)
@@ -95,7 +91,7 @@ void PairBrownianPoly::compute(int eflag, int vflag)
         for (int m = 0; m < wallfix->nwall; m++) {
           int dim = wallfix->wallwhich[m] / 2;
           int side = wallfix->wallwhich[m] % 2;
-          if (wallfix->xstyle[m] == VARIABLE) {
+          if (wallfix->xstyle[m] == FixWall::VARIABLE) {
             wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
           }
           else wallcoord = wallfix->coord0[m];
@@ -322,8 +318,8 @@ void PairBrownianPoly::init_style()
 {
   if (force->newton_pair == 1)
     error->all(FLERR,"Pair brownian/poly requires newton pair off");
-  if (!atom->sphere_flag)
-    error->all(FLERR,"Pair brownian/poly requires atom style sphere");
+  if (!atom->radius_flag)
+    error->all(FLERR,"Pair brownian/poly requires atom attribute radius");
 
   // ensure all particles are finite-size
   // for pair hybrid, should limit test to types using the pair style
@@ -376,7 +372,7 @@ void PairBrownianPoly::init_style()
     for (int m = 0; m < wallfix->nwall; m++) {
       int dim = wallfix->wallwhich[m] / 2;
       int side = wallfix->wallwhich[m] % 2;
-      if (wallfix->xstyle[m] == VARIABLE) {
+      if (wallfix->xstyle[m] == FixWall::VARIABLE) {
         wallfix->xindex[m] = input->variable->find(wallfix->xstr[m]);
         // Since fix->wall->init happens after pair->init_style
         wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
diff --git a/src/COLLOID/pair_lubricate.cpp b/src/COLLOID/pair_lubricate.cpp
index 6f07d63bfb..99a544cd7f 100644
--- a/src/COLLOID/pair_lubricate.cpp
+++ b/src/COLLOID/pair_lubricate.cpp
@@ -41,10 +41,6 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-// same as fix_wall.cpp
-
-enum{NONE=0,EDGE,CONSTANT,VARIABLE};
-
 /* ---------------------------------------------------------------------- */
 
 PairLubricate::PairLubricate(LAMMPS *lmp) : Pair(lmp)
@@ -169,7 +165,7 @@ void PairLubricate::compute(int eflag, int vflag)
          for (int m = 0; m < wallfix->nwall; m++) {
            int dim = wallfix->wallwhich[m] / 2;
            int side = wallfix->wallwhich[m] % 2;
-           if (wallfix->xstyle[m] == VARIABLE) {
+           if (wallfix->xstyle[m] == FixWall::VARIABLE) {
              wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
            }
            else wallcoord = wallfix->coord0[m];
@@ -531,8 +527,10 @@ void PairLubricate::coeff(int narg, char **arg)
 
 void PairLubricate::init_style()
 {
-  if (!atom->sphere_flag)
-    error->all(FLERR,"Pair lubricate requires atom style sphere");
+  if (!atom->omega_flag)
+    error->all(FLERR,"Pair lubricate requires atom attribute omega");
+  if (!atom->radius_flag)
+    error->all(FLERR,"Pair lubricate requires atom attribute radius");
   if (comm->ghost_velocity == 0)
     error->all(FLERR,"Pair lubricate requires ghost atoms store velocity");
 
@@ -593,7 +591,7 @@ void PairLubricate::init_style()
     for (int m = 0; m < wallfix->nwall; m++) {
       int dim = wallfix->wallwhich[m] / 2;
       int side = wallfix->wallwhich[m] % 2;
-      if (wallfix->xstyle[m] == VARIABLE) {
+      if (wallfix->xstyle[m] == FixWall::VARIABLE) {
         wallfix->xindex[m] = input->variable->find(wallfix->xstr[m]);
         //Since fix->wall->init happens after pair->init_style
         wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
diff --git a/src/COLLOID/pair_lubricateU.cpp b/src/COLLOID/pair_lubricateU.cpp
index ac1e62c2a2..f97a5b34c7 100644
--- a/src/COLLOID/pair_lubricateU.cpp
+++ b/src/COLLOID/pair_lubricateU.cpp
@@ -40,11 +40,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define TOL 1E-4   // tolerance for conjugate gradient
-
-// same as fix_wall.cpp
-
-enum{EDGE,CONSTANT,VARIABLE};
+static constexpr double TOL = 1e-4;   // tolerance for conjugate gradient
 
 /* ---------------------------------------------------------------------- */
 
@@ -595,7 +591,7 @@ void PairLubricateU::compute_Fh(double **x)
          for (int m = 0; m < wallfix->nwall; m++) {
            int dim = wallfix->wallwhich[m] / 2;
            int side = wallfix->wallwhich[m] % 2;
-           if (wallfix->xstyle[m] == VARIABLE) {
+           if (wallfix->xstyle[m] == FixWall::VARIABLE) {
              wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
            }
            else wallcoord = wallfix->coord0[m];
@@ -827,7 +823,7 @@ void PairLubricateU::compute_RU()
          for (int m = 0; m < wallfix->nwall; m++) {
            int dim = wallfix->wallwhich[m] / 2;
            int side = wallfix->wallwhich[m] % 2;
-           if (wallfix->xstyle[m] == VARIABLE) {
+           if (wallfix->xstyle[m] == FixWall::VARIABLE) {
              wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
            }
            else wallcoord = wallfix->coord0[m];
@@ -1098,7 +1094,7 @@ void PairLubricateU::compute_RU(double **x)
          for (int m = 0; m < wallfix->nwall; m++) {
            int dim = wallfix->wallwhich[m] / 2;
            int side = wallfix->wallwhich[m] % 2;
-           if (wallfix->xstyle[m] == VARIABLE) {
+           if (wallfix->xstyle[m] == FixWall::VARIABLE) {
              wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
            }
            else wallcoord = wallfix->coord0[m];
@@ -1764,8 +1760,10 @@ void PairLubricateU::coeff(int narg, char **arg)
 
 void PairLubricateU::init_style()
 {
-  if (!atom->sphere_flag)
-    error->all(FLERR,"Pair lubricateU requires atom style sphere");
+  if (!atom->omega_flag)
+    error->all(FLERR,"Pair lubricateU requires atom attribute omega");
+  if (!atom->radius_flag)
+    error->all(FLERR,"Pair lubricateU requires atom attribute radius");
   if (comm->ghost_velocity == 0)
     error->all(FLERR,"Pair lubricateU requires ghost atoms store velocity");
 
@@ -1819,7 +1817,7 @@ void PairLubricateU::init_style()
     for (int m = 0; m < wallfix->nwall; m++) {
       int dim = wallfix->wallwhich[m] / 2;
       int side = wallfix->wallwhich[m] % 2;
-      if (wallfix->xstyle[m] == VARIABLE) {
+      if (wallfix->xstyle[m] == FixWall::VARIABLE) {
         wallfix->xindex[m] = input->variable->find(wallfix->xstr[m]);
         //Since fix->wall->init happens after pair->init_style
         wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
diff --git a/src/COLLOID/pair_lubricateU_poly.cpp b/src/COLLOID/pair_lubricateU_poly.cpp
index 297c4e1924..cc98656524 100644
--- a/src/COLLOID/pair_lubricateU_poly.cpp
+++ b/src/COLLOID/pair_lubricateU_poly.cpp
@@ -41,12 +41,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define TOL 1E-3   // tolerance for conjugate gradient
-
-// same as fix_wall.cpp
-
-enum{EDGE,CONSTANT,VARIABLE};
-
+static constexpr double TOL = 1e-3;   // tolerance for conjugate gradient
 
 /* ---------------------------------------------------------------------- */
 
@@ -365,7 +360,7 @@ void PairLubricateUPoly::compute_Fh(double **x)
          for (int m = 0; m < wallfix->nwall; m++) {
            int dim = wallfix->wallwhich[m] / 2;
            int side = wallfix->wallwhich[m] % 2;
-           if (wallfix->xstyle[m] == VARIABLE) {
+           if (wallfix->xstyle[m] == FixWall::VARIABLE) {
              wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
            }
            else wallcoord = wallfix->coord0[m];
@@ -640,7 +635,7 @@ void PairLubricateUPoly::compute_RU(double **x)
          for (int m = 0; m < wallfix->nwall; m++) {
            int dim = wallfix->wallwhich[m] / 2;
            int side = wallfix->wallwhich[m] % 2;
-           if (wallfix->xstyle[m] == VARIABLE) {
+           if (wallfix->xstyle[m] == FixWall::VARIABLE) {
              wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
            }
            else wallcoord = wallfix->coord0[m];
@@ -1126,12 +1121,13 @@ void PairLubricateUPoly::settings(int narg, char **arg)
 void PairLubricateUPoly::init_style()
 {
   if (force->newton_pair == 1)
-    error->all(FLERR,"Pair lubricateU/poly requires newton pair off");
+    error->all(FLERR, "Pair lubricateU/poly requires newton pair off");
   if (comm->ghost_velocity == 0)
-    error->all(FLERR,
-               "Pair lubricateU/poly requires ghost atoms store velocity");
-  if (!atom->sphere_flag)
-    error->all(FLERR,"Pair lubricate/poly requires atom style sphere");
+    error->all(FLERR, "Pair lubricateU/poly requires ghost atoms store velocity");
+  if (!atom->omega_flag)
+    error->all(FLERR, "Pair lubricateU/poly requires atom attribute omega");
+  if (!atom->radius_flag)
+    error->all(FLERR, "Pair lubricateU/poly requires atom attribute radius");
 
   // ensure all particles are finite-size
   // for pair hybrid, should limit test to types using the pair style
@@ -1141,7 +1137,7 @@ void PairLubricateUPoly::init_style()
 
   for (int i = 0; i < nlocal; i++)
     if (radius[i] == 0.0)
-      error->one(FLERR,"Pair lubricate/poly requires extended particles");
+      error->one(FLERR,"Pair lubricateU/poly requires extended particles");
 
   // Set the isotropic constants depending on the volume fraction
 
@@ -1161,9 +1157,7 @@ void PairLubricateUPoly::init_style()
       flagdeform = 1;
     else if (strstr(modify->fix[i]->style,"wall") != nullptr) {
       if (flagwall)
-        error->all(FLERR,
-                   "Cannot use multiple fix wall commands with "
-                   "pair lubricateU");
+        error->all(FLERR, "Cannot use multiple fix wall commands with pair lubricateU/poly");
       flagwall = 1; // Walls exist
       wallfix = dynamic_cast<FixWall *>(modify->fix[i]);
       if (wallfix->xflag) flagwall = 2; // Moving walls exist
@@ -1184,7 +1178,7 @@ void PairLubricateUPoly::init_style()
     for (int m = 0; m < wallfix->nwall; m++) {
       int dim = wallfix->wallwhich[m] / 2;
       int side = wallfix->wallwhich[m] % 2;
-      if (wallfix->xstyle[m] == VARIABLE) {
+      if (wallfix->xstyle[m] == FixWall::VARIABLE) {
         wallfix->xindex[m] = input->variable->find(wallfix->xstr[m]);
         //Since fix->wall->init happens after pair->init_style
         wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
@@ -1214,14 +1208,8 @@ void PairLubricateUPoly::init_style()
 
   if (!flagVF) vol_f = 0;
 
-  if (!comm->me) {
-    if (logfile)
-      fprintf(logfile, "lubricateU: vol_f = %g, vol_p = %g, vol_T = %g\n",
-          vol_f,vol_P,vol_T);
-    if (screen)
-      fprintf(screen, "lubricateU: vol_f = %g, vol_p = %g, vol_T = %g\n",
-          vol_f,vol_P,vol_T);
-  }
+  if (comm->me == 0)
+    utils::logmesg(lmp, "lubricateU: vol_f = {}, vol_p = {}, vol_T = {}\n", vol_f, vol_P, vol_T);
 
   // Set the isotropic constant
 
diff --git a/src/COLLOID/pair_lubricate_poly.cpp b/src/COLLOID/pair_lubricate_poly.cpp
index e6a0606e87..33f32f3ad2 100644
--- a/src/COLLOID/pair_lubricate_poly.cpp
+++ b/src/COLLOID/pair_lubricate_poly.cpp
@@ -41,10 +41,6 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-// same as fix_wall.cpp
-
-enum{EDGE,CONSTANT,VARIABLE};
-
 /* ---------------------------------------------------------------------- */
 
 PairLubricatePoly::PairLubricatePoly(LAMMPS *lmp) : PairLubricate(lmp)
@@ -151,7 +147,7 @@ void PairLubricatePoly::compute(int eflag, int vflag)
          for (int m = 0; m < wallfix->nwall; m++) {
            int dim = wallfix->wallwhich[m] / 2;
            int side = wallfix->wallwhich[m] % 2;
-           if (wallfix->xstyle[m] == VARIABLE) {
+           if (wallfix->xstyle[m] == FixWall::VARIABLE) {
              wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
            }
            else wallcoord = wallfix->coord0[m];
@@ -428,12 +424,13 @@ void PairLubricatePoly::compute(int eflag, int vflag)
 void PairLubricatePoly::init_style()
 {
   if (force->newton_pair == 1)
-    error->all(FLERR,"Pair lubricate/poly requires newton pair off");
+    error->all(FLERR, "Pair lubricate/poly requires newton pair off");
   if (comm->ghost_velocity == 0)
-    error->all(FLERR,
-               "Pair lubricate/poly requires ghost atoms store velocity");
-  if (!atom->sphere_flag)
-    error->all(FLERR,"Pair lubricate/poly requires atom style sphere");
+    error->all(FLERR, "Pair lubricate/poly requires ghost atoms store velocity");
+  if (!atom->omega_flag)
+    error->all(FLERR, "Pair lubricate/poly requires atom attribute omega");
+  if (!atom->radius_flag)
+    error->all(FLERR, "Pair lubricate/poly requires atom attribute radius");
 
   // ensure all particles are finite-size
   // for pair hybrid, should limit test to types using the pair style
@@ -443,7 +440,7 @@ void PairLubricatePoly::init_style()
 
   for (int i = 0; i < nlocal; i++)
     if (radius[i] == 0.0)
-      error->one(FLERR,"Pair lubricate/poly requires extended particles");
+      error->one(FLERR,"Pair lubricate/poly requires only extended particles");
 
   neighbor->add_request(this, NeighConst::REQ_FULL);
 
@@ -498,7 +495,7 @@ void PairLubricatePoly::init_style()
     for (int m = 0; m < wallfix->nwall; m++) {
       int dim = wallfix->wallwhich[m] / 2;
       int side = wallfix->wallwhich[m] % 2;
-      if (wallfix->xstyle[m] == VARIABLE) {
+      if (wallfix->xstyle[m] == FixWall::VARIABLE) {
         wallfix->xindex[m] = input->variable->find(wallfix->xstr[m]);
         //Since fix->wall->init happens after pair->init_style
         wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
diff --git a/src/COLLOID/pair_yukawa_colloid.cpp b/src/COLLOID/pair_yukawa_colloid.cpp
index ad63292e33..37bd4dfd99 100644
--- a/src/COLLOID/pair_yukawa_colloid.cpp
+++ b/src/COLLOID/pair_yukawa_colloid.cpp
@@ -121,8 +121,8 @@ void PairYukawaColloid::compute(int eflag, int vflag)
 
 void PairYukawaColloid::init_style()
 {
-  if (!atom->sphere_flag)
-    error->all(FLERR,"Pair yukawa/colloid requires atom style sphere");
+  if (!atom->radius_flag)
+    error->all(FLERR,"Pair yukawa/colloid requires atom attribute radius");
 
   neighbor->add_request(this);
 
diff --git a/src/COLVARS/ndx_group.cpp b/src/COLVARS/ndx_group.cpp
index 1d24db3900..062f17619a 100644
--- a/src/COLVARS/ndx_group.cpp
+++ b/src/COLVARS/ndx_group.cpp
@@ -26,8 +26,8 @@
 #include "tokenizer.h"
 
 using namespace LAMMPS_NS;
-#define BUFLEN 4096
-#define DELTA 16384
+static constexpr int BUFLEN = 4096;
+static constexpr int DELTA = 16384;
 
 // read file until next section "name" or any next section if name == ""
 
diff --git a/src/COMPRESS/dump_cfg_gz.cpp b/src/COMPRESS/dump_cfg_gz.cpp
index 259056c013..e4d5ae76cc 100644
--- a/src/COMPRESS/dump_cfg_gz.cpp
+++ b/src/COMPRESS/dump_cfg_gz.cpp
@@ -22,7 +22,7 @@
 #include <cstring>
 
 using namespace LAMMPS_NS;
-#define UNWRAPEXPAND 10.0
+static constexpr double UNWRAPEXPAND = 10.0;
 
 DumpCFGGZ::DumpCFGGZ(LAMMPS *lmp, int narg, char **arg) : DumpCFG(lmp, narg, arg)
 {
diff --git a/src/COMPRESS/dump_cfg_zstd.cpp b/src/COMPRESS/dump_cfg_zstd.cpp
index e3f9a7c1f9..c52f9e0942 100644
--- a/src/COMPRESS/dump_cfg_zstd.cpp
+++ b/src/COMPRESS/dump_cfg_zstd.cpp
@@ -28,7 +28,7 @@
 #include <cstring>
 
 using namespace LAMMPS_NS;
-#define UNWRAPEXPAND 10.0
+static constexpr double UNWRAPEXPAND = 10.0;
 
 DumpCFGZstd::DumpCFGZstd(LAMMPS *lmp, int narg, char **arg) : DumpCFG(lmp, narg, arg)
 {
diff --git a/src/CORESHELL/pair_born_coul_dsf_cs.cpp b/src/CORESHELL/pair_born_coul_dsf_cs.cpp
index 9e0ac11c78..9edcf2755d 100644
--- a/src/CORESHELL/pair_born_coul_dsf_cs.cpp
+++ b/src/CORESHELL/pair_born_coul_dsf_cs.cpp
@@ -30,7 +30,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define EPSILON 1.0e-20
+static constexpr double EPSILON = 1.0e-20;
 
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/CORESHELL/pair_born_coul_long_cs.cpp b/src/CORESHELL/pair_born_coul_long_cs.cpp
index 3a3dc39d69..c29bd5c31d 100644
--- a/src/CORESHELL/pair_born_coul_long_cs.cpp
+++ b/src/CORESHELL/pair_born_coul_long_cs.cpp
@@ -33,7 +33,7 @@ using namespace LAMMPS_NS;
 #define B4       -5.80844129e-3
 #define B5        1.14652755e-1
 
-#define EPSILON 1.0e-20
+static constexpr double EPSILON = 1.0e-20;
 #define EPS_EWALD 1.0e-6
 #define EPS_EWALD_SQR 1.0e-12
 
diff --git a/src/CORESHELL/pair_born_coul_wolf_cs.cpp b/src/CORESHELL/pair_born_coul_wolf_cs.cpp
index 4765e1575c..47241c0beb 100644
--- a/src/CORESHELL/pair_born_coul_wolf_cs.cpp
+++ b/src/CORESHELL/pair_born_coul_wolf_cs.cpp
@@ -25,7 +25,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define EPSILON 1.0e-20
+static constexpr double EPSILON = 1.0e-20;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/CORESHELL/pair_buck_coul_long_cs.cpp b/src/CORESHELL/pair_buck_coul_long_cs.cpp
index 6b15e8dfe5..96203deae1 100644
--- a/src/CORESHELL/pair_buck_coul_long_cs.cpp
+++ b/src/CORESHELL/pair_buck_coul_long_cs.cpp
@@ -33,7 +33,7 @@ using namespace LAMMPS_NS;
 #define B4       -5.80844129e-3
 #define B5        1.14652755e-1
 
-#define EPSILON 1.0e-20
+static constexpr double EPSILON = 1.0e-20;
 #define EPS_EWALD 1.0e-6
 #define EPS_EWALD_SQR 1.0e-12
 
diff --git a/src/CORESHELL/pair_coul_long_cs.cpp b/src/CORESHELL/pair_coul_long_cs.cpp
index 5148ffda2d..6af9a30639 100644
--- a/src/CORESHELL/pair_coul_long_cs.cpp
+++ b/src/CORESHELL/pair_coul_long_cs.cpp
@@ -33,7 +33,7 @@ using namespace LAMMPS_NS;
 #define B4       -5.80844129e-3
 #define B5        1.14652755e-1
 
-#define EPSILON 1.0e-20
+static constexpr double EPSILON = 1.0e-20;
 #define EPS_EWALD 1.0e-6
 #define EPS_EWALD_SQR 1.0e-12
 
diff --git a/src/CORESHELL/pair_coul_wolf_cs.cpp b/src/CORESHELL/pair_coul_wolf_cs.cpp
index 5e15493aad..6deb66735b 100644
--- a/src/CORESHELL/pair_coul_wolf_cs.cpp
+++ b/src/CORESHELL/pair_coul_wolf_cs.cpp
@@ -24,7 +24,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define EPSILON 1.0e-20
+static constexpr double EPSILON = 1.0e-20;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/CORESHELL/pair_lj_class2_coul_long_cs.cpp b/src/CORESHELL/pair_lj_class2_coul_long_cs.cpp
index 1cbddf0614..577bc2e6df 100644
--- a/src/CORESHELL/pair_lj_class2_coul_long_cs.cpp
+++ b/src/CORESHELL/pair_lj_class2_coul_long_cs.cpp
@@ -29,7 +29,7 @@ using namespace LAMMPS_NS;
 #define B4       -5.80844129e-3
 #define B5        1.14652755e-1
 
-#define EPSILON 1.0e-20
+static constexpr double EPSILON = 1.0e-20;
 #define EPS_EWALD 1.0e-6
 #define EPS_EWALD_SQR 1.0e-12
 
diff --git a/src/CORESHELL/pair_lj_cut_coul_long_cs.cpp b/src/CORESHELL/pair_lj_cut_coul_long_cs.cpp
index 253ae440b0..d5fd66e382 100644
--- a/src/CORESHELL/pair_lj_cut_coul_long_cs.cpp
+++ b/src/CORESHELL/pair_lj_cut_coul_long_cs.cpp
@@ -33,7 +33,7 @@ using namespace LAMMPS_NS;
 #define B4       -5.80844129e-3
 #define B5        1.14652755e-1
 
-#define EPSILON 1.0e-20
+static constexpr double EPSILON = 1.0e-20;
 #define EPS_EWALD 1.0e-6
 #define EPS_EWALD_SQR 1.0e-12
 
diff --git a/src/DIELECTRIC/pppm_dielectric.cpp b/src/DIELECTRIC/pppm_dielectric.cpp
index e308cb0826..dac24a9838 100644
--- a/src/DIELECTRIC/pppm_dielectric.cpp
+++ b/src/DIELECTRIC/pppm_dielectric.cpp
@@ -36,18 +36,13 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
-#define SMALL 0.00001
+static constexpr double SMALL = 0.00001;
 
 enum {REVERSE_RHO};
 enum {FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM};
 
-#ifdef FFT_SINGLE
-#define ZEROF 0.0f
-#define ONEF  1.0f
-#else
-#define ZEROF 0.0
-#define ONEF  1.0
-#endif
+static constexpr FFT_SCALAR ZEROF = 0.0;
+static constexpr FFT_SCALAR ONEF =  1.0;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/DIELECTRIC/pppm_disp_dielectric.cpp b/src/DIELECTRIC/pppm_disp_dielectric.cpp
index 2c4de6ada1..baa9478a45 100644
--- a/src/DIELECTRIC/pppm_disp_dielectric.cpp
+++ b/src/DIELECTRIC/pppm_disp_dielectric.cpp
@@ -33,11 +33,11 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define MAXORDER   7
-#define OFFSET 16384
-#define SMALL 0.00001
-#define LARGE 10000.0
-#define EPS_HOC 1.0e-7
+static constexpr int MAXORDER =   7;
+static constexpr int OFFSET = 16384;
+static constexpr double SMALL = 0.00001;
+static constexpr double LARGE = 10000.0;
+static constexpr double EPS_HOC = 1.0e-7;
 
 enum{REVERSE_RHO,REVERSE_RHO_GEOM,REVERSE_RHO_ARITH,REVERSE_RHO_NONE};
 enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM,
@@ -48,13 +48,8 @@ enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM,
      FORWARD_IK_NONE,FORWARD_AD_NONE,FORWARD_IK_PERATOM_NONE,
      FORWARD_AD_PERATOM_NONE};
 
-#ifdef FFT_SINGLE
-#define ZEROF 0.0f
-#define ONEF  1.0f
-#else
-#define ZEROF 0.0
-#define ONEF  1.0
-#endif
+static constexpr FFT_SCALAR ZEROF = 0.0;
+static constexpr FFT_SCALAR ONEF =  1.0;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/DPD-BASIC/pair_dpd.cpp b/src/DPD-BASIC/pair_dpd.cpp
index 5771831e02..1f60074280 100644
--- a/src/DPD-BASIC/pair_dpd.cpp
+++ b/src/DPD-BASIC/pair_dpd.cpp
@@ -32,7 +32,7 @@
 
 using namespace LAMMPS_NS;
 
-#define EPSILON 1.0e-10
+static constexpr double EPSILON = 1.0e-10;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/DPD-BASIC/pair_dpd_ext.cpp b/src/DPD-BASIC/pair_dpd_ext.cpp
index 623b16774c..da08e53a4a 100644
--- a/src/DPD-BASIC/pair_dpd_ext.cpp
+++ b/src/DPD-BASIC/pair_dpd_ext.cpp
@@ -33,7 +33,7 @@
 
 using namespace LAMMPS_NS;
 
-#define EPSILON 1.0e-10
+static constexpr double EPSILON = 1.0e-10;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/DPD-BASIC/pair_dpd_ext_tstat.cpp b/src/DPD-BASIC/pair_dpd_ext_tstat.cpp
index 433bc54063..fe881d7bc5 100644
--- a/src/DPD-BASIC/pair_dpd_ext_tstat.cpp
+++ b/src/DPD-BASIC/pair_dpd_ext_tstat.cpp
@@ -30,7 +30,7 @@
 
 using namespace LAMMPS_NS;
 
-#define EPSILON 1.0e-10
+static constexpr double EPSILON = 1.0e-10;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/DPD-BASIC/pair_dpd_tstat.cpp b/src/DPD-BASIC/pair_dpd_tstat.cpp
index ed659e7cc3..108177ed69 100644
--- a/src/DPD-BASIC/pair_dpd_tstat.cpp
+++ b/src/DPD-BASIC/pair_dpd_tstat.cpp
@@ -26,7 +26,7 @@
 
 using namespace LAMMPS_NS;
 
-#define EPSILON 1.0e-10
+static constexpr double EPSILON = 1.0e-10;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/DPD-MESO/pair_edpd.cpp b/src/DPD-MESO/pair_edpd.cpp
index ed99a5eac1..b575956f71 100644
--- a/src/DPD-MESO/pair_edpd.cpp
+++ b/src/DPD-MESO/pair_edpd.cpp
@@ -38,7 +38,7 @@ using namespace LAMMPS_NS;
 #define MIN(A,B) ((A) < (B) ? (A) : (B))
 #define MAX(A,B) ((A) > (B) ? (A) : (B))
 
-#define EPSILON 1.0e-10
+static constexpr double EPSILON = 1.0e-10;
 
 static const char cite_pair_edpd[] =
   "pair edpd command: doi:10.1016/j.jcp.2014.02.003\n\n"
diff --git a/src/DPD-MESO/pair_mdpd.cpp b/src/DPD-MESO/pair_mdpd.cpp
index 767fddd7ac..de148189e0 100644
--- a/src/DPD-MESO/pair_mdpd.cpp
+++ b/src/DPD-MESO/pair_mdpd.cpp
@@ -34,7 +34,7 @@
 
 using namespace LAMMPS_NS;
 
-#define EPSILON 1.0e-10
+static constexpr double EPSILON = 1.0e-10;
 
 static const char cite_pair_mdpd[] =
   "pair mdpd command: doi:10.1063/1.4812366\n\n"
diff --git a/src/DPD-MESO/pair_tdpd.cpp b/src/DPD-MESO/pair_tdpd.cpp
index eac5bd1318..038b3c3cdc 100644
--- a/src/DPD-MESO/pair_tdpd.cpp
+++ b/src/DPD-MESO/pair_tdpd.cpp
@@ -37,7 +37,7 @@ using namespace LAMMPS_NS;
 #define MIN(A,B) ((A) < (B) ? (A) : (B))
 #define MAX(A,B) ((A) > (B) ? (A) : (B))
 
-#define EPSILON 1.0e-10
+static constexpr double EPSILON = 1.0e-10;
 
 static const char cite_pair_tdpd[] =
   "pair tdpd command: doi:10.1063/1.4923254\n\n"
diff --git a/src/DPD-REACT/fix_eos_table.cpp b/src/DPD-REACT/fix_eos_table.cpp
index 36bbe4d478..42567119df 100644
--- a/src/DPD-REACT/fix_eos_table.cpp
+++ b/src/DPD-REACT/fix_eos_table.cpp
@@ -24,7 +24,7 @@
 
 #include <cstring>
 
-#define MAXLINE 1024
+static constexpr int MAXLINE = 1024;
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
@@ -194,7 +194,7 @@ void FixEOStable::free_table(Table *tb)
 
 void FixEOStable::read_table(Table *tb, Table *tb2, char *file, char *keyword)
 {
-  char line[MAXLINE];
+  char line[MAXLINE] = {'\0'};
 
   // open file
 
diff --git a/src/DPD-REACT/fix_eos_table_rx.cpp b/src/DPD-REACT/fix_eos_table_rx.cpp
index f7afddc64f..bf71b502f0 100644
--- a/src/DPD-REACT/fix_eos_table_rx.cpp
+++ b/src/DPD-REACT/fix_eos_table_rx.cpp
@@ -28,12 +28,12 @@
 #include <cmath>
 #include <cstring>
 
-#define MAXLINE 1024
+static constexpr int MAXLINE = 1024;
 
 #ifdef DBL_EPSILON
-  #define MY_EPSILON (10.0*DBL_EPSILON)
+static constexpr double MY_EPSILON = 10.0*DBL_EPSILON;
 #else
-  #define MY_EPSILON (10.0*2.220446049250313e-16)
+static constexpr double MY_EPSILON = 10.0*2.220446049250313e-16;
 #endif
 
 using namespace LAMMPS_NS;
@@ -318,7 +318,8 @@ void FixEOStableRX::read_file(char *file)
 
   // one set of params can span multiple lines
   int n,nwords,ispecies;
-  char line[MAXLINE],*ptr;
+  char line[MAXLINE] = {'\0'};
+  char *ptr;
   int eof = 0;
 
   while (true) {
@@ -414,7 +415,7 @@ void FixEOStableRX::free_table(Table *tb)
 
 void FixEOStableRX::read_table(Table *tb, Table *tb2, char *file, char *keyword)
 {
-  char line[MAXLINE];
+  char line[MAXLINE] = {'\0'};
 
   // open file
 
diff --git a/src/DPD-REACT/fix_rx.cpp b/src/DPD-REACT/fix_rx.cpp
index cce88cf465..69eab4dbee 100644
--- a/src/DPD-REACT/fix_rx.cpp
+++ b/src/DPD-REACT/fix_rx.cpp
@@ -38,16 +38,16 @@ using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathSpecial;
 
-enum{NONE,HARMONIC};
-enum{LUCY};
+enum { NONE, HARMONIC };
+enum { LUCY };
 
-#define MAXLINE 1024
-#define DELTA 4
+static constexpr int MAXLINE = 1024;
+static constexpr int DELTA = 4;
 
 #ifdef DBL_EPSILON
-  #define MY_EPSILON (10.0*DBL_EPSILON)
+static constexpr double MY_EPSILON = 10.0*DBL_EPSILON;
 #else
-  #define MY_EPSILON (10.0*2.220446049250313e-16)
+static constexpr double MY_EPSILON = 10.0*2.220446049250313e-16;
 #endif
 
 #define SparseKinetics_enableIntegralReactions (true)
@@ -250,7 +250,8 @@ void FixRX::post_constructor()
   // Assign species names to tmpspecies array and determine the number of unique species
 
   int n;
-  char line[MAXLINE],*ptr;
+  char line[MAXLINE] = {'\0'};
+  char *ptr;
   int eof = 0;
   char * word;
 
@@ -784,7 +785,8 @@ void FixRX::read_file(char *file)
   // Count the number of reactions from kinetics file
 
   int n,ispecies;
-  char line[MAXLINE],*ptr;
+  char line[MAXLINE] = {'\0'};
+  char *ptr;
   int eof = 0;
 
   while (true) {
diff --git a/src/DPD-REACT/fix_shardlow.cpp b/src/DPD-REACT/fix_shardlow.cpp
index 19f2c08a5f..747d2725b8 100644
--- a/src/DPD-REACT/fix_shardlow.cpp
+++ b/src/DPD-REACT/fix_shardlow.cpp
@@ -59,7 +59,7 @@ using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace random_external_state;
 
-#define EPSILON 1.0e-10
+static constexpr double EPSILON = 1.0e-10;
 #define EPSILON_SQUARED ((EPSILON) * (EPSILON))
 
 static const char cite_fix_shardlow[] =
diff --git a/src/DPD-REACT/pair_dpd_fdt.cpp b/src/DPD-REACT/pair_dpd_fdt.cpp
index e6cab0e996..44920a6bda 100644
--- a/src/DPD-REACT/pair_dpd_fdt.cpp
+++ b/src/DPD-REACT/pair_dpd_fdt.cpp
@@ -34,7 +34,7 @@
 
 using namespace LAMMPS_NS;
 
-#define EPSILON 1.0e-10
+static constexpr double EPSILON = 1.0e-10;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/DPD-REACT/pair_dpd_fdt_energy.cpp b/src/DPD-REACT/pair_dpd_fdt_energy.cpp
index d3371f465a..12d6dc5fb7 100644
--- a/src/DPD-REACT/pair_dpd_fdt_energy.cpp
+++ b/src/DPD-REACT/pair_dpd_fdt_energy.cpp
@@ -34,7 +34,7 @@
 
 using namespace LAMMPS_NS;
 
-#define EPSILON 1.0e-10
+static constexpr double EPSILON = 1.0e-10;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/DPD-REACT/pair_exp6_rx.cpp b/src/DPD-REACT/pair_exp6_rx.cpp
index c6b831f84b..e0ac9c0b27 100644
--- a/src/DPD-REACT/pair_exp6_rx.cpp
+++ b/src/DPD-REACT/pair_exp6_rx.cpp
@@ -31,13 +31,13 @@
 using namespace LAMMPS_NS;
 using namespace MathSpecial;
 
-#define MAXLINE 1024
-#define DELTA 4
+static constexpr int MAXLINE = 1024;
+static constexpr int DELTA = 4;
 
 #ifdef DBL_EPSILON
-  #define MY_EPSILON (10.0*DBL_EPSILON)
+static constexpr double MY_EPSILON = 10.0*DBL_EPSILON;
 #else
-  #define MY_EPSILON (10.0*2.220446049250313e-16)
+static constexpr double MY_EPSILON = 10.0*2.220446049250313e-16;
 #endif
 
 #define oneFluidApproxParameter (-1)
@@ -728,7 +728,8 @@ void PairExp6rx::read_file(char *file)
   // one set of params can span multiple lines
 
   int n,nwords,ispecies;
-  char line[MAXLINE],*ptr;
+  char line[MAXLINE] = {'\0'};
+  char *ptr;
   int eof = 0;
 
   while (true) {
@@ -835,7 +836,8 @@ void PairExp6rx::read_file2(char *file)
 
   // one set of params can span multiple lines
   int n,nwords;
-  char line[MAXLINE],*ptr;
+  char line[MAXLINE] = {'\0'};
+  char *ptr;
   int eof = 0;
 
   while (true) {
diff --git a/src/DPD-REACT/pair_multi_lucy.cpp b/src/DPD-REACT/pair_multi_lucy.cpp
index 89263dd445..918246dcde 100644
--- a/src/DPD-REACT/pair_multi_lucy.cpp
+++ b/src/DPD-REACT/pair_multi_lucy.cpp
@@ -39,9 +39,8 @@
 using namespace LAMMPS_NS;
 using MathConst::MY_PI;
 
-enum{NONE,RLINEAR,RSQ};
-
-#define MAXLINE 1024
+enum { NONE, RLINEAR, RSQ };
+static constexpr int MAXLINE = 1024;
 
 static const char cite_pair_multi_lucy[] =
   "pair_style multi/lucy command: doi:10.1063/1.4942520\n\n"
@@ -344,7 +343,7 @@ double PairMultiLucy::init_one(int i, int j)
 
 void PairMultiLucy::read_table(Table *tb, char *file, char *keyword)
 {
-  char line[MAXLINE];
+  char line[MAXLINE] = {'\0'};
 
   // open file
 
diff --git a/src/DPD-REACT/pair_multi_lucy_rx.cpp b/src/DPD-REACT/pair_multi_lucy_rx.cpp
index 9620908531..5ff03de6e7 100644
--- a/src/DPD-REACT/pair_multi_lucy_rx.cpp
+++ b/src/DPD-REACT/pair_multi_lucy_rx.cpp
@@ -41,14 +41,14 @@
 using namespace LAMMPS_NS;
 using MathConst::MY_PI;
 
-enum{NONE,RLINEAR,RSQ};
+enum{ NONE, RLINEAR, RSQ };
 
-#define MAXLINE 1024
+static constexpr int MAXLINE = 1024;
 
 #ifdef DBL_EPSILON
-  #define MY_EPSILON (10.0*DBL_EPSILON)
+static constexpr double MY_EPSILON = 10.0*DBL_EPSILON;
 #else
-  #define MY_EPSILON (10.0*2.220446049250313e-16)
+static constexpr double MY_EPSILON = 10.0*2.220446049250313e-16;
 #endif
 
 #define oneFluidParameter (-1)
@@ -483,16 +483,13 @@ double PairMultiLucyRX::init_one(int i, int j)
 
 void PairMultiLucyRX::read_table(Table *tb, char *file, char *keyword)
 {
-  char line[MAXLINE];
+  char line[MAXLINE] = {'\0'};
 
   // open file
 
   FILE *fp = utils::open_potential(file,lmp,nullptr);
-  if (fp == nullptr) {
-    char str[128];
-    snprintf(str,128,"Cannot open file %s",file);
-    error->one(FLERR,str);
-  }
+  if (fp == nullptr)
+    error->one(FLERR, "Cannot open file {}: {}",file,utils::getsyserror());
 
   // loop until section found with matching keyword
 
diff --git a/src/DRUDE/fix_tgnh_drude.cpp b/src/DRUDE/fix_tgnh_drude.cpp
index 987408fe63..c067bf7adf 100644
--- a/src/DRUDE/fix_tgnh_drude.cpp
+++ b/src/DRUDE/fix_tgnh_drude.cpp
@@ -40,8 +40,8 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-#define DELTAFLIP 0.1
-#define TILTMAX 1.5
+static constexpr double DELTAFLIP = 0.1;
+static constexpr double TILTMAX = 1.5;
 
 enum{NOBIAS,BIAS};
 enum{NONE,XYZ,XY,YZ,XZ};
@@ -1076,7 +1076,6 @@ void FixTGNHDrude::couple()
 
 void FixTGNHDrude::remap()
 {
-  int i;
   double oldlo,oldhi;
   double expfac;
 
diff --git a/src/DRUDE/pair_lj_cut_thole_long.cpp b/src/DRUDE/pair_lj_cut_thole_long.cpp
index cfdf631eb9..a11fc691b1 100644
--- a/src/DRUDE/pair_lj_cut_thole_long.cpp
+++ b/src/DRUDE/pair_lj_cut_thole_long.cpp
@@ -46,7 +46,7 @@ using namespace MathConst;
 #define B4       -5.80844129e-3
 #define B5        1.14652755e-1
 
-#define EPSILON 1.0e-20
+static constexpr double EPSILON = 1.0e-20;
 #define EPS_EWALD 1.0e-6
 #define EPS_EWALD_SQR 1.0e-12
 
diff --git a/src/Depend.sh b/src/Depend.sh
index dbffb2dba0..3df1347e67 100755
--- a/src/Depend.sh
+++ b/src/Depend.sh
@@ -99,6 +99,7 @@ fi
 
 if (test $1 = "EXTRA-PAIR") then
   depend GPU
+  depend KOKKOS
   depend OPENMP
 fi
 
diff --git a/src/EFF/fix_langevin_eff.cpp b/src/EFF/fix_langevin_eff.cpp
index 8c255e4348..72c368b612 100644
--- a/src/EFF/fix_langevin_eff.cpp
+++ b/src/EFF/fix_langevin_eff.cpp
@@ -37,8 +37,8 @@ using namespace FixConst;
 enum{NOBIAS,BIAS};
 enum{CONSTANT,EQUAL,ATOM};
 
-#define SINERTIA 0.4          // moment of inertia prefactor for sphere
-#define EINERTIA 0.2          // moment of inertia prefactor for ellipsoid
+static constexpr double SINERTIA = 0.4;          // moment of inertia prefactor for sphere
+static constexpr double EINERTIA = 0.2;          // moment of inertia prefactor for ellipsoid
 
 /* ---------------------------------------------------------------------- */
 
@@ -137,7 +137,7 @@ void FixLangevinEff::post_force_no_tally()
   dof = domain->dimension * particles;
   fix_dof = 0;
   for (int i = 0; i < modify->nfix; i++)
-    fix_dof += modify->fix[i]->dof(igroup);
+    fix_dof += (int)modify->fix[i]->dof(igroup);
 
   // extra_dof = domain->dimension
   dof -= domain->dimension + fix_dof;
@@ -306,7 +306,7 @@ void FixLangevinEff::post_force_tally()
   dof = domain->dimension * particles;
   fix_dof = 0;
   for (int i = 0; i < modify->nfix; i++)
-    fix_dof += modify->fix[i]->dof(igroup);
+    fix_dof += (int)modify->fix[i]->dof(igroup);
 
   // extra_dof = domain->dimension
   dof -= domain->dimension + fix_dof;
diff --git a/src/ELECTRODE/ewald_electrode.cpp b/src/ELECTRODE/ewald_electrode.cpp
index 99266ed450..122708b97d 100644
--- a/src/ELECTRODE/ewald_electrode.cpp
+++ b/src/ELECTRODE/ewald_electrode.cpp
@@ -37,7 +37,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define SMALL 0.00001
+static constexpr double SMALL = 0.00001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/ELECTRODE/fix_electrode_conp.cpp b/src/ELECTRODE/fix_electrode_conp.cpp
index 1b6c0a37d4..9e2599ca8a 100644
--- a/src/ELECTRODE/fix_electrode_conp.cpp
+++ b/src/ELECTRODE/fix_electrode_conp.cpp
@@ -48,7 +48,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define SMALL 1e-16
+static constexpr double SMALL = 1e-16;
 
 extern "C" {
 void dgetrf_(const int *M, const int *N, double *A, const int *lda, int *ipiv, int *info);
diff --git a/src/ELECTRODE/fix_electrode_thermo.cpp b/src/ELECTRODE/fix_electrode_thermo.cpp
index 52c0a3ce4c..f95aeac615 100644
--- a/src/ELECTRODE/fix_electrode_thermo.cpp
+++ b/src/ELECTRODE/fix_electrode_thermo.cpp
@@ -30,7 +30,7 @@
 using namespace LAMMPS_NS;
 
 #define NUM_GROUPS 2
-#define SMALL 0.00001
+static constexpr double SMALL = 0.00001;
 
 /* ----------------------------------------------------------------------- */
 
diff --git a/src/ELECTRODE/pppm_electrode.cpp b/src/ELECTRODE/pppm_electrode.cpp
index 6ede0f1f4d..b9024cd69a 100644
--- a/src/ELECTRODE/pppm_electrode.cpp
+++ b/src/ELECTRODE/pppm_electrode.cpp
@@ -45,22 +45,17 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
-#define MAXORDER 7
-#define OFFSET 16384
-#define LARGE 10000.0
-#define SMALL 0.00001
-#define EPS_HOC 1.0e-7
+static constexpr int MAXORDER = 7;
+static constexpr int OFFSET = 16384;
+static constexpr double LARGE = 10000.0;
+static constexpr double SMALL = 0.00001;
+static constexpr double EPS_HOC = 1.0e-7;
 
 enum { REVERSE_RHO };
 enum { FORWARD_IK, FORWARD_AD, FORWARD_IK_PERATOM, FORWARD_AD_PERATOM };
 
-#ifdef FFT_SINGLE
-#define ZEROF 0.0f
-#define ONEF 1.0f
-#else
-#define ZEROF 0.0
-#define ONEF 1.0
-#endif
+static constexpr FFT_SCALAR ZEROF = 0.0;
+static constexpr FFT_SCALAR ONEF = 1.0;
 
 static const char cite_pppm_electrode[] =
     "kspace_style pppm/electrode command:\n\n"
@@ -633,7 +628,9 @@ void PPPMElectrode::project_psi(double *vec, int sensor_grpbit)
   // project u_brick with weight matrix
   double **x = atom->x;
   int *mask = atom->mask;
-  double const scaleinv = 1.0 / (nx_pppm * ny_pppm * nz_pppm);
+  const bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm;
+  const double scaleinv = 1.0 / ngridtotal;
+
   for (int i = 0; i < atom->nlocal; i++) {
     if (!(mask[i] & sensor_grpbit)) continue;
     double v = 0.;
@@ -1362,7 +1359,7 @@ double PPPMElectrode::compute_qopt()
   // each proc calculates contributions from every Pth grid point
 
   bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm;
-  int nxy_pppm = nx_pppm * ny_pppm;
+  bigint nxy_pppm = (bigint) nx_pppm * ny_pppm;
 
   double qopt = 0.0;
 
diff --git a/src/ELECTRODE/slab_dipole.cpp b/src/ELECTRODE/slab_dipole.cpp
index d5f3eae7aa..27ec42a8b5 100644
--- a/src/ELECTRODE/slab_dipole.cpp
+++ b/src/ELECTRODE/slab_dipole.cpp
@@ -28,7 +28,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define SMALL 0.00001
+static constexpr double SMALL = 0.00001;
 
 /* ----------------------------------------------------------------------
    Slab-geometry correction term to dampen inter-slab interactions between
diff --git a/src/EXTRA-COMPUTE/compute_born_matrix.cpp b/src/EXTRA-COMPUTE/compute_born_matrix.cpp
index 4eecbbfa14..2e674409d1 100644
--- a/src/EXTRA-COMPUTE/compute_born_matrix.cpp
+++ b/src/EXTRA-COMPUTE/compute_born_matrix.cpp
@@ -42,8 +42,7 @@
 
 using namespace LAMMPS_NS;
 
-#define BIG 1000000000
-#define SMALL 1e-16
+static constexpr double SMALL = 1e-16;
 
 // this table is used to pick the 3d rij vector indices used to
 // compute the 6 indices long Voigt stress vector
diff --git a/src/EXTRA-COMPUTE/compute_cnp_atom.cpp b/src/EXTRA-COMPUTE/compute_cnp_atom.cpp
index 526874d7a4..f42d896f6b 100644
--- a/src/EXTRA-COMPUTE/compute_cnp_atom.cpp
+++ b/src/EXTRA-COMPUTE/compute_cnp_atom.cpp
@@ -41,8 +41,8 @@
 using namespace LAMMPS_NS;
 
 //define maximum values
-#define MAXNEAR 24
-#define MAXCOMMON 12
+static constexpr int MAXNEAR = 24;
+static constexpr int MAXCOMMON = 12;
 
 enum{NCOMMON};
 
diff --git a/src/EXTRA-COMPUTE/compute_rattlers_atom.cpp b/src/EXTRA-COMPUTE/compute_rattlers_atom.cpp
new file mode 100644
index 0000000000..9dacf14171
--- /dev/null
+++ b/src/EXTRA-COMPUTE/compute_rattlers_atom.cpp
@@ -0,0 +1,311 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Joel Clemmer (SNL), Ishan Srivastava (LBNL)
+------------------------------------------------------------------------- */
+
+#include "compute_rattlers_atom.h"
+
+#include "atom.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "memory.h"
+#include "neigh_list.h"
+#include "neigh_request.h"
+#include "neighbor.h"
+#include "pair.h"
+#include "update.h"
+
+#include <cmath>
+#include <cstring>
+
+using namespace LAMMPS_NS;
+
+enum { TYPE, RADIUS };
+
+/* ---------------------------------------------------------------------- */
+
+ComputeRattlersAtom::ComputeRattlersAtom(LAMMPS *lmp, int narg, char **arg) :
+    Compute(lmp, narg, arg), ncontacts(nullptr), rattler(nullptr)
+{
+  if (narg != 6) error->all(FLERR, "Illegal compute rattlers/atom command");
+
+  if (strcmp(arg[3], "type") == 0)
+    cutstyle = TYPE;
+  else if (strcmp(arg[3], "radius") == 0)
+    cutstyle = RADIUS;
+  else
+    error->all(FLERR, "Illegal compute rattlers/atom command");
+
+  if (cutstyle == RADIUS && !atom->radius_flag)
+    error->all(FLERR, "Compute rattlers/atom radius style requires atom attribute radius");
+
+  ncontacts_rattler = utils::inumeric(FLERR, arg[4], false, lmp);
+  max_tries = utils::inumeric(FLERR, arg[5], false, lmp);
+
+  nmax = 0;
+  invoked_peratom = -1;
+
+  scalar_flag = 1;
+  extscalar = 1;
+  peratom_flag = 1;
+  size_peratom_cols = 0;
+  comm_forward = 1;
+  comm_reverse = 1;
+}
+
+/* ---------------------------------------------------------------------- */
+
+ComputeRattlersAtom::~ComputeRattlersAtom()
+{
+  memory->destroy(ncontacts);
+  memory->destroy(rattler);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ComputeRattlersAtom::init()
+{
+  if (force->pair == nullptr) error->all(FLERR, "No pair style is defined for compute rattlers");
+
+  // Cannot calculate distance from radii for JKR/DMT
+  if (force->pair->beyond_contact)
+    error->all(FLERR, "Compute rattlers does not currently support pair styles that extend beyond contact");
+
+  // need an occasional half neighbor list
+  // set size to same value as request made by force->pair
+  // this should enable it to always be a copy list (e.g. for granular pstyle)
+
+  auto pairrequest = neighbor->find_request(force->pair);
+  if (pairrequest && pairrequest->get_size())
+    neighbor->add_request(this, NeighConst::REQ_SIZE | NeighConst::REQ_OCCASIONAL);
+  else
+    neighbor->add_request(this, NeighConst::REQ_OCCASIONAL);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ComputeRattlersAtom::init_list(int /*id*/, NeighList *ptr)
+{
+  list = ptr;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ComputeRattlersAtom::compute_peratom()
+{
+  if (invoked_peratom == update->ntimestep) return;
+  invoked_peratom = update->ntimestep;
+
+  int i, j, ii, jj, inum, jnum, itype, jtype, tmp_flag;
+  tagint itag, jtag;
+  double xtmp, ytmp, ztmp, delx, dely, delz;
+  double rsq, radsum;
+
+  if (nmax < atom->nmax) {
+    nmax = atom->nmax;
+    memory->destroy(ncontacts);
+    memory->destroy(rattler);
+    memory->create(ncontacts, nmax, "rattlers:ncontacts");
+    memory->create(rattler, nmax, "rattlers:rattler");
+    vector_atom = rattler;
+  }
+
+  for (i = 0; i < nmax; i++) rattler[i] = 0;
+
+  int *ilist, *jlist, *numneigh, **firstneigh;
+
+  double **x = atom->x;
+  double *radius = atom->radius;
+  tagint *tag = atom->tag;
+  int *type = atom->type;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  int newton_pair = force->newton_pair;
+
+  // invoke half neighbor list (will copy or build if necessary)
+  neighbor->build_one(list);
+
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  double **cutsq = force->pair->cutsq;
+
+  int change_flag = 1;
+  int ntry = 0;
+  while (ntry < max_tries) {
+    change_flag = 0;
+
+    for (i = 0; i < nmax; i++) ncontacts[i] = 0;
+
+    for (ii = 0; ii < inum; ii++) {
+      i = ilist[ii];
+      if (!(mask[i] & groupbit)) continue;
+      if (rattler[i] == 1) continue;
+
+      xtmp = x[i][0];
+      ytmp = x[i][1];
+      ztmp = x[i][2];
+      itag = tag[i];
+      itype = type[i];
+      jlist = firstneigh[i];
+      jnum = numneigh[i];
+
+      for (jj = 0; jj < jnum; jj++) {
+        j = jlist[jj];
+        j &= NEIGHMASK;
+
+        if (!(mask[j] & groupbit)) continue;
+        if (rattler[j] == 1) continue;
+
+        // itag = jtag is possible for long cutoffs that include images of self
+
+        if (newton_pair == 0 && j >= nlocal) {
+          jtag = tag[j];
+          if (itag > jtag) {
+            if ((itag + jtag) % 2 == 0) continue;
+          } else if (itag < jtag) {
+            if ((itag + jtag) % 2 == 1) continue;
+          } else {
+            if (x[j][2] < ztmp) continue;
+            if (x[j][2] == ztmp) {
+              if (x[j][1] < ytmp) continue;
+              if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
+            }
+          }
+        }
+
+        jtype = type[j];
+
+        delx = xtmp - x[j][0];
+        dely = ytmp - x[j][1];
+        delz = ztmp - x[j][2];
+        rsq = delx * delx + dely * dely + delz * delz;
+
+        if (cutstyle == TYPE) {
+          if (rsq >= cutsq[itype][jtype]) continue;
+        } else {
+          radsum = radius[i] + radius[j];
+          if (rsq >= radsum * radsum) continue;
+        }
+        ncontacts[i] += 1;
+        if (newton_pair || j < nlocal)
+          ncontacts[j] += 1;
+      }
+    }
+
+    // add contributions from ghosts
+    if (force->newton_pair) comm->reverse_comm(this);
+
+    // Set flags for rattlers
+    for (i = 0; i < atom->nlocal; i++) {
+      if (ncontacts[i] < ncontacts_rattler && rattler[i] == 0) {
+        rattler[i] = 1;
+        change_flag = 1;
+      }
+    }
+
+    comm->forward_comm(this);
+
+    MPI_Allreduce(&change_flag, &tmp_flag, 1, MPI_INT, MPI_MAX, world);
+    change_flag = tmp_flag;
+    if (change_flag == 0) break;
+
+    ntry += 1;
+  }
+
+  if (change_flag == 1)
+    error->warning(FLERR, "Rattler calculation failed to converge within max tries");
+}
+
+/* ---------------------------------------------------------------------- */
+
+double ComputeRattlersAtom::compute_scalar()
+{
+  if (invoked_peratom != update->ntimestep)
+    compute_peratom();
+
+  invoked_scalar = update->ntimestep;
+
+  double total_rattlers = 0;
+  for (int i = 0; i < atom->nlocal; i++) {
+    if (rattler[i] == 1) {
+      total_rattlers += 1;
+    }
+  }
+
+  //Total across processors
+  MPI_Allreduce(&total_rattlers, &scalar, 1, MPI_DOUBLE, MPI_SUM, world);
+  return scalar;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int ComputeRattlersAtom::pack_reverse_comm(int n, int first, double *buf)
+{
+  int i, m, last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    buf[m++] = ubuf(ncontacts[i]).d;
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ComputeRattlersAtom::unpack_reverse_comm(int n, int *list, double *buf)
+{
+  int i, j, m;
+
+  m = 0;
+  for (i = 0; i < n; i++) {
+    j = list[i];
+    ncontacts[j] += (int) ubuf(buf[m++]).i;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int ComputeRattlersAtom::pack_forward_comm(int n, int *list, double *buf,
+                                          int /*pbc_flag*/, int * /*pbc*/)
+{
+  int i, j, m;
+
+  m = 0;
+  for (i = 0; i < n; i++) {
+    j = list[i];
+    buf[m++] = rattler[j];
+  }
+
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ComputeRattlersAtom::unpack_forward_comm(int n, int first, double *buf)
+{
+  int i, m, last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    rattler[i] = buf[m++];
+  }
+}
diff --git a/src/EXTRA-COMPUTE/compute_rattlers_atom.h b/src/EXTRA-COMPUTE/compute_rattlers_atom.h
new file mode 100644
index 0000000000..79a0a0a982
--- /dev/null
+++ b/src/EXTRA-COMPUTE/compute_rattlers_atom.h
@@ -0,0 +1,51 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef COMPUTE_CLASS
+// clang-format off
+ComputeStyle(rattlers/atom,ComputeRattlersAtom);
+// clang-format on
+#else
+
+#ifndef LMP_COMPUTE_RATTLERS_ATOM_H
+#define LMP_COMPUTE_RATTLERS_ATOM_H
+
+#include "compute.h"
+
+namespace LAMMPS_NS {
+
+class ComputeRattlersAtom : public Compute {
+ public:
+  ComputeRattlersAtom(class LAMMPS *, int, char **);
+  ~ComputeRattlersAtom() override;
+  void init() override;
+  void init_list(int, class NeighList *) override;
+  void compute_peratom() override;
+  double compute_scalar() override;
+  int pack_forward_comm(int, int *, double *, int, int *) override;
+  void unpack_forward_comm(int, int, double *) override;
+  int pack_reverse_comm(int, int, double *) override;
+  void unpack_reverse_comm(int, int *, double *) override;
+
+ private:
+  int cutstyle, ncontacts_rattler, max_tries, nmax, invoked_peratom;
+  int *ncontacts;
+  double *rattler;
+  class NeighList *list;
+
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/EXTRA-COMPUTE/compute_slcsa_atom.cpp b/src/EXTRA-COMPUTE/compute_slcsa_atom.cpp
new file mode 100644
index 0000000000..6c272938b6
--- /dev/null
+++ b/src/EXTRA-COMPUTE/compute_slcsa_atom.cpp
@@ -0,0 +1,417 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Paul Lafourcade (CEA-DAM-DIF, Arpajon, France)
+------------------------------------------------------------------------- */
+
+#include "compute_slcsa_atom.h"
+
+#include "arg_info.h"
+#include "atom.h"
+#include "citeme.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "memory.h"
+#include "modify.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+#include "pair.h"
+#include "potential_file_reader.h"
+#include "update.h"
+
+#include <cmath>
+#include <cstring>
+
+using namespace LAMMPS_NS;
+
+static const char cite_compute_slcsa_atom_c[] =
+    "compute slcsa/atom command: doi:10.1016/j.commatsci.2023.112534\n\n"
+    "@Article{Lafourcade2023,\n"
+    " author = {P. Lafourcade and J.-B. Maillet and C. Denoual and E. Duval and A. Allera and A. "
+    "M. Goryaeva and M.-C. Marinica},\n"
+    " title = {Robust crystal structure identification at extreme conditions using a "
+    "density-independent spectral descriptor and supervised learning},\n"
+    " journal = {Computational Materials Science},\n"
+    " year = 2023,\n"
+    " volume = 230,\n"
+    " pages = 112534\n"
+    "}\n\n";
+
+/* ---------------------------------------------------------------------- */
+ComputeSLCSAAtom::ComputeSLCSAAtom(LAMMPS *lmp, int narg, char **arg) :
+    Compute(lmp, narg, arg), list(nullptr), lda_scalings(nullptr),
+    database_mean_descriptor(nullptr), lr_bias(nullptr), lr_decision(nullptr), icov_list(nullptr),
+    mean_projected_descriptors(nullptr), maha_thresholds(nullptr), full_descriptor(nullptr),
+    projected_descriptor(nullptr), scores(nullptr), probas(nullptr), prodright(nullptr),
+    dmaha(nullptr), classification(nullptr)
+{
+  // command : compute c1 all slcsa/atom jmax nclasses parameters_file.dat
+  // example : compute c1 all slcsa/atom 8 4 slcsa_parameters.dat
+  // example : compute c1 all slcsa/atom 8 4 database_mean_descriptor.dat lda_scalings.dat lr_decision.dat lr_bias.dat mahalanobis_data.dat c_b1[*]
+  // Steps :
+  // 1. bs=bs-xbar
+  // 2. dred=coefs_lda*bs
+  // 3. scores=decision_lr*dred + lr_bias
+  // 4. probas=exp(scores)/sum(exp(scores))
+  // 5. cs=argmax(probas)
+
+  // Read the parameters file in one bloc
+  // File structure :
+  // # database mean descriptor
+  // vector with bso4dim rows x 1 col
+  // # LDA dimension reduction matrix
+  // matrix bso4dim rows x nclasses-1 cols
+  // # LR decision matrix
+  // matrix with nclasses rows x nclasses-1 cols
+  // # LR bias vector
+  // vector with 1 row x nclasses cols
+
+  if (lmp->citeme) lmp->citeme->add(cite_compute_slcsa_atom_c);
+
+  if (narg != 11) utils::missing_cmd_args(FLERR, "compute slcsa/atom", error);
+
+  int twojmax = utils::inumeric(FLERR, arg[3], false, lmp);
+  if (twojmax < 0)
+    error->all(FLERR, "Illegal compute slcsa/atom command: twojmax must be a non-negative integer");
+  ncomps = compute_ncomps(twojmax);
+
+  nclasses = utils::inumeric(FLERR, arg[4], false, lmp);
+  if (nclasses < 2)
+    error->all(FLERR, "Illegal compute slcsa/atom command: nclasses must be greater than 1");
+
+  database_mean_descriptor_file = arg[5];
+  lda_scalings_file = arg[6];
+  lr_decision_file = arg[7];
+  lr_bias_file = arg[8];
+  maha_file = arg[9];
+
+  if (comm->me == 0) {
+    auto mesg = fmt::format(
+        "Files used:\n  {:24}: {}\n  {:24}: {}\n  {:24}: {}\n  {:24}: {}\n  {:24}: {}\n",
+        "database mean descriptor", database_mean_descriptor_file, "lda scalings",
+        lda_scalings_file, "lr decision", lr_decision_file, "lr bias", lr_bias_file, "maha stats",
+        maha_file);
+    utils::logmesg(lmp, mesg);
+  }
+
+  int expand = 0;
+  char **earg;
+  int nvalues = utils::expand_args(FLERR, narg - 10, &arg[10], 1, earg, lmp);
+  if (earg != &arg[10]) expand = 1;
+  arg = earg;
+
+  ArgInfo argi(arg[0]);
+  value_t val;
+  val.id = "";
+  val.val.c = nullptr;
+  val.which = argi.get_type();
+  val.argindex = argi.get_index1();
+  val.id = argi.get_name();
+  if ((val.which == ArgInfo::FIX) || (val.which == ArgInfo::VARIABLE) ||
+      (val.which == ArgInfo::UNKNOWN) || (val.which == ArgInfo::NONE) || (argi.get_dim() > 1))
+    error->all(FLERR, "Invalid compute slcsa/atom argument: {}", arg[0]);
+
+  // if wildcard expansion occurred, free earg memory from exapnd_args()
+
+  if (expand) {
+    for (int i = 0; i < nvalues; i++) delete[] earg[i];
+    memory->sfree(earg);
+  }
+
+  val.val.c = modify->get_compute_by_id(val.id);
+  if (!val.val.c) error->all(FLERR, "Compute ID {} for fix slcsa/atom does not exist", val.id);
+  if (val.val.c->peratom_flag == 0)
+    error->all(FLERR, "Compute slcsa/atom compute {} does not calculate per-atom values", val.id);
+  if (val.argindex == 0 && val.val.c->size_peratom_cols != 0)
+    error->all(FLERR, "Compute slcsa/atom compute {} does not calculate a per-atom vector", val.id);
+  if (val.argindex && val.val.c->size_peratom_cols == 0)
+    error->all(FLERR, "Compute slcsa/atom compute {} does not calculate a per-atom array", val.id);
+  if (val.argindex && val.argindex > val.val.c->size_peratom_cols)
+    error->all(FLERR, "Compute slcsa/atom compute {} array is accessed out-of-range", val.id);
+  descriptorval = val;
+  memory->create(database_mean_descriptor, ncomps, "slcsa/atom:database_mean_descriptor");
+  memory->create(lda_scalings, ncomps, nclasses - 1, "slcsa/atom:lda_scalings");
+  memory->create(lr_decision, nclasses, nclasses - 1, "slcsa/atom:lr_decision");
+  memory->create(lr_bias, nclasses, "slcsa/atom:lr_bias");
+  memory->create(maha_thresholds, nclasses, "slcsa/atom:maha_thresholds");
+  memory->create(icov_list, nclasses, nclasses - 1, nclasses - 1, "slcsa/atom:icov_list");
+  memory->create(mean_projected_descriptors, nclasses, nclasses - 1,
+                 "slcsa/atom:mean_projected_descriptors");
+
+  if (comm->me == 0) {
+
+    if (strcmp(database_mean_descriptor_file, "NULL") == 0) {
+      error->one(FLERR,
+                 "Cannot open database mean descriptor file {}: ", database_mean_descriptor_file,
+                 utils::getsyserror());
+    } else {
+      PotentialFileReader reader(lmp, database_mean_descriptor_file,
+                                 "database mean descriptor file");
+      int nread = 0;
+      while (nread < ncomps) {
+        auto values = reader.next_values(0);
+        database_mean_descriptor[nread] = values.next_double();
+        nread++;
+      }
+    }
+
+    if (strcmp(lda_scalings_file, "NULL") == 0) {
+      error->one(FLERR, "Cannot open database linear discriminant analysis scalings file {}: ",
+                 lda_scalings_file, utils::getsyserror());
+    } else {
+      PotentialFileReader reader(lmp, lda_scalings_file, "lda scalings file");
+      int nread = 0;
+      while (nread < ncomps) {
+        auto values = reader.next_values(nclasses - 1);
+        lda_scalings[nread][0] = values.next_double();
+        lda_scalings[nread][1] = values.next_double();
+        lda_scalings[nread][2] = values.next_double();
+        nread++;
+      }
+    }
+
+    if (strcmp(lr_decision_file, "NULL") == 0) {
+      error->one(FLERR, "Cannot open logistic regression decision file {}: ", lr_decision_file,
+                 utils::getsyserror());
+    } else {
+      PotentialFileReader reader(lmp, lr_decision_file, "lr decision file");
+      int nread = 0;
+      while (nread < nclasses) {
+        auto values = reader.next_values(nclasses - 1);
+        lr_decision[nread][0] = values.next_double();
+        lr_decision[nread][1] = values.next_double();
+        lr_decision[nread][2] = values.next_double();
+        nread++;
+      }
+    }
+
+    if (strcmp(lr_bias_file, "NULL") == 0) {
+      error->one(FLERR, "Cannot open logistic regression bias file {}: ", lr_bias_file,
+                 utils::getsyserror());
+    } else {
+      PotentialFileReader reader(lmp, lr_bias_file, "lr bias file");
+      auto values = reader.next_values(nclasses);
+      lr_bias[0] = values.next_double();
+      lr_bias[1] = values.next_double();
+      lr_bias[2] = values.next_double();
+      lr_bias[3] = values.next_double();
+    }
+
+    if (strcmp(maha_file, "NULL") == 0) {
+      error->one(FLERR, "Cannot open mahalanobis stats file {}: ", maha_file, utils::getsyserror());
+    } else {
+      PotentialFileReader reader(lmp, maha_file, "mahalanobis stats file");
+      int nvalues = nclasses * ((nclasses - 1) * (nclasses - 1) + nclasses);
+      auto values = reader.next_values(nvalues);
+
+      for (int i = 0; i < nclasses; i++) {
+        maha_thresholds[i] = values.next_double();
+        for (int j = 0; j < nclasses - 1; j++)
+          mean_projected_descriptors[i][j] = values.next_double();
+        for (int k = 0; k < nclasses - 1; k++)
+          for (int l = 0; l < nclasses - 1; l++) icov_list[i][k][l] = values.next_double();
+      }
+
+      for (int i = 0; i < nclasses; i++) {
+        auto mesg = fmt::format("For class {}  maha threshold = {:.6}\n", i, maha_thresholds[i]);
+        mesg += "  mean B:\n";
+        for (int j = 0; j < nclasses - 1; j++)
+          mesg += fmt::format("   {:11.6}\n", mean_projected_descriptors[i][j]);
+        mesg += "  icov:\n";
+        for (int j = 0; j < nclasses - 1; j++) {
+          mesg += fmt::format("   {:11.6} {:11.6} {:11.6}\n", icov_list[i][j][0],
+                              icov_list[i][j][1], icov_list[i][j][2]);
+        }
+        utils::logmesg(lmp, mesg);
+      }
+    }
+  }
+
+  MPI_Bcast(&database_mean_descriptor[0], ncomps, MPI_DOUBLE, 0, world);
+  MPI_Bcast(&lda_scalings[0][0], ncomps * (nclasses - 1), MPI_DOUBLE, 0, world);
+  MPI_Bcast(&lr_decision[0][0], nclasses * (nclasses - 1), MPI_DOUBLE, 0, world);
+  MPI_Bcast(&lr_bias[0], nclasses, MPI_DOUBLE, 0, world);
+  MPI_Bcast(&maha_thresholds[0], nclasses, MPI_DOUBLE, 0, world);
+  MPI_Bcast(&mean_projected_descriptors[0][0], nclasses * (nclasses - 1), MPI_DOUBLE, 0, world);
+  MPI_Bcast(&icov_list[0][0][0], nclasses * (nclasses - 1) * (nclasses - 1), MPI_DOUBLE, 0, world);
+
+  peratom_flag = 1;
+  size_peratom_cols = nclasses + 1;
+  ncols = nclasses + 1;
+  nmax = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+ComputeSLCSAAtom::~ComputeSLCSAAtom()
+{
+  memory->destroy(classification);
+  memory->destroy(database_mean_descriptor);
+  memory->destroy(lda_scalings);
+  memory->destroy(lr_decision);
+  memory->destroy(lr_bias);
+  memory->destroy(maha_thresholds);
+  memory->destroy(mean_projected_descriptors);
+  memory->destroy(icov_list);
+  memory->destroy(full_descriptor);
+  memory->destroy(projected_descriptor);
+  memory->destroy(scores);
+  memory->destroy(probas);
+  memory->destroy(prodright);
+  memory->destroy(dmaha);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ComputeSLCSAAtom::init()
+{
+
+  if (modify->get_compute_by_style(style).size() > 1)
+    if (comm->me == 0) error->warning(FLERR, "More than one compute {}", style);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ComputeSLCSAAtom::init_list(int /*id*/, NeighList *ptr)
+{
+  list = ptr;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ComputeSLCSAAtom::compute_peratom()
+{
+  invoked_peratom = update->ntimestep;
+
+  // grow per-atom if necessary
+
+  if (atom->nmax > nmax) {
+    memory->destroy(classification);
+    nmax = atom->nmax;
+    memory->create(classification, nmax, ncols, "slcsa/atom:classification");
+    array_atom = classification;
+  }
+
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+
+  if (descriptorval.which == ArgInfo::COMPUTE) {
+    if (!(descriptorval.val.c->invoked_flag & Compute::INVOKED_PERATOM)) {
+      descriptorval.val.c->compute_peratom();
+      descriptorval.val.c->invoked_flag |= Compute::INVOKED_PERATOM;
+    }
+    double **compute_array = descriptorval.val.c->array_atom;
+
+    memory->create(full_descriptor, ncomps, "slcsa/atom:local descriptor");
+    memory->create(projected_descriptor, nclasses - 1, "slcsa/atom:reduced descriptor");
+    memory->create(scores, nclasses, "slcsa/atom:scores");
+    memory->create(probas, nclasses, "slcsa/atom:probas");
+    memory->create(prodright, nclasses - 1, "slcsa/atom:prodright");
+    memory->create(dmaha, nclasses, "slcsa/atom:prodright");
+
+    for (int i = 0; i < nlocal; i++) {
+      if (mask[i] & groupbit) {
+        for (int j = 0; j < ncomps; j++) full_descriptor[j] = compute_array[i][j];
+        // Here comes the LDA + LR process
+        // 1st step : Retrieve mean database descriptor
+        for (int j = 0; j < ncomps; j++) full_descriptor[j] -= database_mean_descriptor[j];
+        // 2nd step : Matrix multiplication to go from ncompsx1 -> (nclasses-1)*1
+        for (int j = 0; j < nclasses - 1; j++) {
+          projected_descriptor[j] = 0.0;
+          for (int k = 0; k < ncomps; k++) {
+            projected_descriptor[j] += full_descriptor[k] * lda_scalings[k][j];
+          }
+        }
+        // 3rd step : Matrix multiplication
+        for (int j = 0; j < nclasses; j++) {
+          scores[j] = lr_bias[j];
+          for (int k = 0; k < nclasses - 1; k++) {
+            scores[j] += lr_decision[j][k] * projected_descriptor[k];
+          }
+        }
+        // 4th step : Matrix multiplication
+        double sumexpscores = 0.0;
+        for (int j = 0; j < nclasses; j++) sumexpscores += exp(scores[j]);
+        for (int j = 0; j < nclasses; j++) probas[j] = exp(scores[j]) / sumexpscores;
+
+        classification[i][nclasses] = argmax(probas, nclasses);
+
+        // 5th step : Mahalanobis distance
+        for (int j = 0; j < nclasses; j++) {
+          prodright[0] = 0.0;
+          prodright[1] = 0.0;
+          prodright[2] = 0.0;
+          for (int k = 0; k < nclasses - 1; k++) {
+            for (int l = 0; l < nclasses - 1; l++) {
+              prodright[k] += (icov_list[j][k][l] *
+                               (projected_descriptor[k] - mean_projected_descriptors[j][k]));
+            }
+          }
+          double prodleft = 0.0;
+          for (int k = 0; k < nclasses - 1; k++) {
+            prodleft +=
+                (prodright[k] * (projected_descriptor[k] - mean_projected_descriptors[j][k]));
+          }
+          classification[i][j] = sqrt(prodleft);
+        }
+        // 6th step : Sanity check
+        int locclass = classification[i][nclasses];
+
+        if (classification[i][locclass] > maha_thresholds[locclass]) {
+          classification[i][nclasses] = -1.0;
+        }
+
+      } else {
+        for (int j = 0; j < ncols; j++) classification[i][j] = -1.0;
+      }
+    }
+    memory->destroy(full_descriptor);
+    memory->destroy(projected_descriptor);
+    memory->destroy(scores);
+    memory->destroy(probas);
+    memory->destroy(prodright);
+    memory->destroy(dmaha);
+  }
+}
+
+int ComputeSLCSAAtom::compute_ncomps(int twojmax)
+{
+  int ncount;
+
+  ncount = 0;
+
+  for (int j1 = 0; j1 <= twojmax; j1++)
+    for (int j2 = 0; j2 <= j1; j2++)
+      for (int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2)
+        if (j >= j1) ncount++;
+
+  return ncount;
+}
+
+int ComputeSLCSAAtom::argmax(double arr[], int size)
+{
+  int maxIndex = 0;            // Initialize the index of the maximum value to the first element.
+  double maxValue = arr[0];    // Initialize the maximum value to the first element.
+
+  for (int i = 1; i < size; ++i) {
+    if (arr[i] > maxValue) {
+      // If a greater value is found, update the maxIndex and maxValue.
+      maxIndex = i;
+      maxValue = arr[i];
+    }
+  }
+
+  return maxIndex;
+}
diff --git a/src/EXTRA-COMPUTE/compute_slcsa_atom.h b/src/EXTRA-COMPUTE/compute_slcsa_atom.h
new file mode 100644
index 0000000000..ba373a53a8
--- /dev/null
+++ b/src/EXTRA-COMPUTE/compute_slcsa_atom.h
@@ -0,0 +1,93 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Paul Lafourcade (CEA-DAM-DIF, Arpajon, France)
+------------------------------------------------------------------------- */
+
+#ifdef COMPUTE_CLASS
+// clang-format off
+ComputeStyle(slcsa/atom,ComputeSLCSAAtom);
+// clang-format on
+#else
+
+#ifndef LMP_COMPUTE_SLCSA_ATOM_H
+#define LMP_COMPUTE_SLCSA_ATOM_H
+
+#include "compute.h"
+
+namespace LAMMPS_NS {
+
+class ComputeSLCSAAtom : public Compute {
+ public:
+  ComputeSLCSAAtom(class LAMMPS *, int, char **);
+  ~ComputeSLCSAAtom() override;
+  void init() override;
+  void init_list(int, class NeighList *) override;
+  void compute_peratom() override;
+  //  double memory_usage() override;
+  int compute_ncomps(int);
+  int argmax(double *, int);
+
+ private:
+  struct value_t {
+    int which;         // type of data: COMPUTE, FIX, VARIABLE
+    int argindex;      // 1-based index if data is vector, else 0
+    std::string id;    // compute/fix/variable ID
+    union {
+      class Compute *c;
+      class Fix *f;
+      int v;
+    } val;
+  };
+  value_t descriptorval;
+  int nmax;
+  int ncols;
+  int ncomps;
+  int nclasses;
+  const char *database_mean_descriptor_file;
+  const char *lda_scalings_file;
+  const char *lr_decision_file;
+  const char *lr_bias_file;
+  const char *maha_file;
+  class NeighList *list;
+
+  // LDA dimension reduction
+  double **lda_scalings;
+  double *database_mean_descriptor;
+
+  // LR classification
+  double *lr_bias;
+  double **lr_decision;
+
+  // Mahalanobis distance calculation
+  double ***icov_list;
+  double **mean_projected_descriptors;
+  double *maha_thresholds;
+
+  // Per-atom local arrays
+  double *full_descriptor;
+  double *projected_descriptor;
+  double *scores;
+  double *probas;
+  double *prodright;
+  double *dmaha;
+
+  // Output array
+  double **classification;
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/EXTRA-COMPUTE/compute_stress_cartesian.cpp b/src/EXTRA-COMPUTE/compute_stress_cartesian.cpp
index e1bc6bcd91..622ea839be 100644
--- a/src/EXTRA-COMPUTE/compute_stress_cartesian.cpp
+++ b/src/EXTRA-COMPUTE/compute_stress_cartesian.cpp
@@ -32,7 +32,7 @@
 
 using namespace LAMMPS_NS;
 
-#define SMALL 1.0e-10
+static constexpr double SMALL = 1.0e-10;
 /*-----------------------------------------------------------------------------------
   Contributing author: Olav Galteland (Norwegian University of Science and Technology)
                         olav.galteland@ntnu.no
diff --git a/src/EXTRA-COMPUTE/compute_stress_mop.cpp b/src/EXTRA-COMPUTE/compute_stress_mop.cpp
index fc9de602a7..ee8f5e554a 100644
--- a/src/EXTRA-COMPUTE/compute_stress_mop.cpp
+++ b/src/EXTRA-COMPUTE/compute_stress_mop.cpp
@@ -23,6 +23,7 @@
 #include "atom_vec.h"
 #include "bond.h"
 #include "comm.h"
+#include "dihedral.h"
 #include "domain.h"
 #include "error.h"
 #include "force.h"
@@ -38,8 +39,10 @@
 
 using namespace LAMMPS_NS;
 
+static constexpr double SMALL =     0.001;
+
 enum { X, Y, Z };
-enum { TOTAL, CONF, KIN, PAIR, BOND, ANGLE };
+enum { TOTAL, CONF, KIN, PAIR, BOND, ANGLE, DIHEDRAL };
 
 /* ---------------------------------------------------------------------- */
 
@@ -49,6 +52,7 @@ ComputeStressMop::ComputeStressMop(LAMMPS *lmp, int narg, char **arg) : Compute(
 
   bondflag = 0;
   angleflag = 0;
+  dihedralflag = 0;
 
   // set compute mode and direction of plane(s) for pressure calculation
 
@@ -129,6 +133,11 @@ ComputeStressMop::ComputeStressMop(LAMMPS *lmp, int narg, char **arg) : Compute(
         which[nvalues] = ANGLE;
         nvalues++;
       }
+    } else if (strcmp(arg[iarg],"dihedral") == 0) {
+      for (i=0; i<3; i++) {
+        which[nvalues] = DIHEDRAL;
+        nvalues++;
+      }
     } else
       error->all(FLERR, "Illegal compute stress/mop command");    //break;
 
@@ -152,6 +161,8 @@ ComputeStressMop::ComputeStressMop(LAMMPS *lmp, int narg, char **arg) : Compute(
   bond_global = nullptr;
   angle_local = nullptr;
   angle_global = nullptr;
+  dihedral_local = nullptr;
+  dihedral_global = nullptr;
 
   // this fix produces a global vector
 
@@ -162,6 +173,8 @@ ComputeStressMop::ComputeStressMop(LAMMPS *lmp, int narg, char **arg) : Compute(
   memory->create(bond_global, nvalues, "stress/mop:bond_global");
   memory->create(angle_local, nvalues, "stress/mop:angle_local");
   memory->create(angle_global, nvalues, "stress/mop:angle_global");
+  memory->create(dihedral_local,nvalues,"stress/mop:dihedral_local");
+  memory->create(dihedral_global,nvalues,"stress/mop:dihedral_global");
   size_vector = nvalues;
 
   vector_flag = 1;
@@ -180,6 +193,8 @@ ComputeStressMop::~ComputeStressMop()
   memory->destroy(bond_global);
   memory->destroy(angle_local);
   memory->destroy(angle_global);
+  memory->destroy(dihedral_local);
+  memory->destroy(dihedral_global);
   memory->destroy(vector);
 }
 
@@ -233,9 +248,13 @@ void ComputeStressMop::init()
       }
     }
     if (force->dihedral) {
-      if ((strcmp(force->dihedral_style, "zero") != 0) &&
-          (strcmp(force->dihedral_style, "none") != 0))
-        error->all(FLERR, "compute stress/mop does not account for dihedral potentials");
+      if (force->dihedral->born_matrix_enable == 0) {
+        if ((strcmp(force->dihedral_style, "zero") != 0) &&
+            (strcmp(force->dihedral_style, "none") != 0))
+          error->all(FLERR, "compute stress/mop does not account for dihedral potentials");
+      } else {
+        dihedralflag = 1;
+      }
     }
     if (force->improper) {
       if ((strcmp(force->improper_style, "zero") != 0) &&
@@ -297,8 +316,18 @@ void ComputeStressMop::compute_vector()
 
   MPI_Allreduce(angle_local, angle_global, nvalues, MPI_DOUBLE, MPI_SUM, world);
 
+  if (dihedralflag) {
+    //Compute dihedral contribution on separate procs
+    compute_dihedrals();
+  } else {
+    for (int i=0; i<nvalues; i++) dihedral_local[i] = 0.0;
+  }
+
+  // sum dihedral contribution over all procs
+  MPI_Allreduce(dihedral_local,dihedral_global,nvalues,MPI_DOUBLE,MPI_SUM,world);
+
   for (int m = 0; m < nvalues; m++) {
-    vector[m] = values_global[m] + bond_global[m] + angle_global[m];
+    vector[m] = values_global[m] + bond_global[m] + angle_global[m] + dihedral_global[m];
   }
 }
 
@@ -429,7 +458,12 @@ void ComputeStressMop::compute_pairs()
           xi[1] = atom->x[i][1];
           xi[2] = atom->x[i][2];
 
-          // velocities at t
+          // minimum image of xi with respect to the plane
+          xi[dir] -= pos;
+          domain->minimum_image(xi[0], xi[1], xi[2]);
+          xi[dir] += pos;
+
+          //velocities at t
 
           vi[0] = atom->v[i][0];
           vi[1] = atom->v[i][1];
@@ -454,10 +488,8 @@ void ComputeStressMop::compute_pairs()
           // at each timestep, must check atoms going through the
           // image of the plane that is closest to the box
 
-          double pos_temp = pos + copysign(1.0, domain->prd_half[dir] - pos) * domain->prd[dir];
-          if (fabs(xi[dir] - pos) < fabs(xi[dir] - pos_temp)) pos_temp = pos;
-
-          if (((xi[dir] - pos_temp) * (xj[dir] - pos_temp)) < 0) {
+          double tau = (xi[dir] - pos) / (xi[dir] - xj[dir]);
+          if ((tau <= 1) && (tau >= 0)) {
 
             // sgn = copysign(1.0,vi[dir]-vcm[dir]);
 
@@ -741,7 +773,7 @@ void ComputeStressMop::compute_angles()
 
       // only left bond crossing the plane
 
-      if (!right_cross && left_cross) {
+      else if (!right_cross && left_cross) {
         double sgn = copysign(1.0, x_angle_left[dir] - pos);
         dcos_theta[0] = -sgn * (dx_left[0] * cos_theta / r1 + dx_right[0] / r2) / r1;
         dcos_theta[1] = -sgn * (dx_left[1] * cos_theta / r1 + dx_right[1] / r2) / r1;
@@ -750,7 +782,7 @@ void ComputeStressMop::compute_angles()
 
       // both bonds crossing the plane
 
-      if (right_cross && left_cross) {
+      else if (right_cross && left_cross) {
 
         // due to right bond
 
@@ -786,3 +818,304 @@ void ComputeStressMop::compute_angles()
     m += 3;
   }
 }
+
+/*------------------------------------------------------------------------
+  compute dihedral contribution to pressure of local proc
+  -------------------------------------------------------------------------*/
+
+void ComputeStressMop::compute_dihedrals()
+{
+  int i, nd, atom1, atom2, atom3, atom4, imol, iatom;
+  tagint tagprev;
+  double vb1x, vb1y, vb1z, vb2x, vb2y, vb2z, vb3x, vb3y, vb3z;
+  double vb2xm, vb2ym, vb2zm;
+  double sb1, sb2, sb3, rb1, rb3, c0, b1mag2, b1mag, b2mag2;
+  double b2mag, b3mag2, b3mag, c2mag, ctmp, r12c1, c1mag, r12c2;
+  double s1, s2, s12, sc1, sc2, a11, a22, a33, a12, a13, a23;
+  double df[3], f1[3], f2[3], f3[3], f4[3];
+  double c, sx2, sy2, sz2, sin2;
+
+  double **x = atom->x;
+  tagint *tag = atom->tag;
+  int *num_dihedral = atom->num_dihedral;
+  tagint **dihedral_atom1 = atom->dihedral_atom1;
+  tagint **dihedral_atom2 = atom->dihedral_atom2;
+  tagint **dihedral_atom3 = atom->dihedral_atom3;
+  tagint **dihedral_atom4 = atom->dihedral_atom4;
+  int *mask = atom->mask;
+
+  int *molindex = atom->molindex;
+  int *molatom = atom->molatom;
+  Molecule **onemols = atom->avec->onemols;
+
+  int nlocal = atom->nlocal;
+  int molecular = atom->molecular;
+
+  // loop over all atoms and their dihedrals
+
+  Dihedral *dihedral = force->dihedral;
+
+  double dudih, du2dih;
+
+  double diffx[3] = {0.0, 0.0, 0.0};
+  double x_atom_1[3] = {0.0, 0.0, 0.0};
+  double x_atom_2[3] = {0.0, 0.0, 0.0};
+  double x_atom_3[3] = {0.0, 0.0, 0.0};
+  double x_atom_4[3] = {0.0, 0.0, 0.0};
+
+  // initialization
+  for (int i = 0; i < nvalues; i++) {
+    dihedral_local[i] = 0.0;
+  }
+  double local_contribution[3] = {0.0, 0.0, 0.0};
+
+  for (atom2 = 0; atom2 < nlocal; atom2++) {
+    if (!(mask[atom2] & groupbit)) continue;
+
+    if (molecular == Atom::MOLECULAR)
+      nd = num_dihedral[atom2];
+    else {
+      if (molindex[atom2] < 0) continue;
+      imol = molindex[atom2];
+      iatom = molatom[atom2];
+      nd = onemols[imol]->num_dihedral[iatom];
+    }
+
+    for (i = 0; i < nd; i++) {
+      if (molecular == 1) {
+        if (tag[atom2] != dihedral_atom2[atom2][i]) continue;
+          atom1 = atom->map(dihedral_atom1[atom2][i]);
+          atom3 = atom->map(dihedral_atom3[atom2][i]);
+          atom4 = atom->map(dihedral_atom4[atom2][i]);
+      } else {
+        if (tag[atom2] != onemols[imol]->dihedral_atom2[atom2][i]) continue;
+        tagprev = tag[atom2] - iatom - 1;
+        atom1 = atom->map(onemols[imol]->dihedral_atom1[atom2][i] + tagprev);
+        atom3 = atom->map(onemols[imol]->dihedral_atom3[atom2][i] + tagprev);
+        atom4 = atom->map(onemols[imol]->dihedral_atom4[atom2][i] + tagprev);
+      }
+
+      if (atom1 < 0 || !(mask[atom1] & groupbit)) continue;
+      if (atom3 < 0 || !(mask[atom3] & groupbit)) continue;
+      if (atom4 < 0 || !(mask[atom4] & groupbit)) continue;
+
+      // minimum image of atom1 with respect to the plane of interest
+      x_atom_1[0] = x[atom1][0];
+      x_atom_1[1] = x[atom1][1];
+      x_atom_1[2] = x[atom1][2];
+      x_atom_1[dir] -= pos;
+      domain->minimum_image(x_atom_1[0], x_atom_1[1], x_atom_1[2]);
+      x_atom_1[dir] += pos;
+
+      // minimum image of atom2 with respect to atom1
+      diffx[0] = x[atom2][0] - x_atom_1[0];
+      diffx[1] = x[atom2][1] - x_atom_1[1];
+      diffx[2] = x[atom2][2] - x_atom_1[2];
+      domain->minimum_image(diffx[0], diffx[1], diffx[2]);
+      x_atom_2[0] = x_atom_1[0] + diffx[0];
+      x_atom_2[1] = x_atom_1[1] + diffx[1];
+      x_atom_2[2] = x_atom_1[2] + diffx[2];
+
+      // minimum image of atom3 with respect to atom2
+      diffx[0] = x[atom3][0] - x_atom_2[0];
+      diffx[1] = x[atom3][1] - x_atom_2[1];
+      diffx[2] = x[atom3][2] - x_atom_2[2];
+      domain->minimum_image(diffx[0], diffx[1], diffx[2]);
+      x_atom_3[0] = x_atom_2[0] + diffx[0];
+      x_atom_3[1] = x_atom_2[1] + diffx[1];
+      x_atom_3[2] = x_atom_2[2] + diffx[2];
+
+      // minimum image of atom3 with respect to atom2
+      diffx[0] = x[atom4][0] - x_atom_3[0];
+      diffx[1] = x[atom4][1] - x_atom_3[1];
+      diffx[2] = x[atom4][2] - x_atom_3[2];
+      domain->minimum_image(diffx[0], diffx[1], diffx[2]);
+      x_atom_4[0] = x_atom_3[0] + diffx[0];
+      x_atom_4[1] = x_atom_3[1] + diffx[1];
+      x_atom_4[2] = x_atom_3[2] + diffx[2];
+
+      // check if any bond vector crosses the plane of interest
+      double tau_right = (x_atom_2[dir] - pos) / (x_atom_2[dir] - x_atom_1[dir]);
+      double tau_middle = (x_atom_3[dir] - pos) / (x_atom_3[dir] - x_atom_2[dir]);
+      double tau_left = (x_atom_4[dir] - pos) / (x_atom_4[dir] - x_atom_3[dir]);
+      bool right_cross = ((tau_right >=0) && (tau_right  <= 1));
+      bool middle_cross = ((tau_middle >= 0) && (tau_middle <= 1));
+      bool left_cross = ((tau_left >=0) && (tau_left <= 1));
+
+      // no bonds crossing the plane
+      if (!right_cross && !middle_cross && !left_cross) continue;
+
+      dihedral->born_matrix(i, atom1, atom2, atom3, atom4, dudih, du2dih);
+
+      // first bond
+      vb1x = x_atom_1[0] - x_atom_2[0];
+      vb1y = x_atom_1[1] - x_atom_2[1];
+      vb1z = x_atom_1[2] - x_atom_2[2];
+
+      // second bond
+      vb2x = x_atom_3[0] - x_atom_2[0];
+      vb2y = x_atom_3[1] - x_atom_2[1];
+      vb2z = x_atom_3[2] - x_atom_2[2];
+
+      vb2xm = -vb2x;
+      vb2ym = -vb2y;
+      vb2zm = -vb2z;
+
+      // third bond
+      vb3x = x_atom_4[0] - x_atom_3[0];
+      vb3y = x_atom_4[1] - x_atom_3[1];
+      vb3z = x_atom_4[2] - x_atom_3[2];
+
+      // c0 calculation
+      sb1 = 1.0 / (vb1x*vb1x + vb1y*vb1y + vb1z*vb1z);
+      sb2 = 1.0 / (vb2x*vb2x + vb2y*vb2y + vb2z*vb2z);
+      sb3 = 1.0 / (vb3x*vb3x + vb3y*vb3y + vb3z*vb3z);
+
+      rb1 = sqrt(sb1);
+      rb3 = sqrt(sb3);
+
+      c0 = (vb1x*vb3x + vb1y*vb3y + vb1z*vb3z) * rb1*rb3;
+      // 1st and 2nd angle
+      b1mag2 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z;
+      b1mag = sqrt(b1mag2);
+      b2mag2 = vb2x*vb2x + vb2y*vb2y + vb2z*vb2z;
+      b2mag = sqrt(b2mag2);
+      b3mag2 = vb3x*vb3x + vb3y*vb3y + vb3z*vb3z;
+      b3mag = sqrt(b3mag2);
+
+      ctmp = vb1x*vb2x + vb1y*vb2y + vb1z*vb2z;
+      r12c1 = 1.0 / (b1mag*b2mag);
+      c1mag = ctmp * r12c1;
+
+      ctmp = vb2xm*vb3x + vb2ym*vb3y + vb2zm*vb3z;
+      r12c2 = 1.0 / (b2mag*b3mag);
+      c2mag = ctmp * r12c2;
+
+      // cos and sin of 2 angles and final c
+      sin2 = MAX(1.0 - c1mag*c1mag,0.0);
+      sc1 = sqrt(sin2);
+      if (sc1 < SMALL) sc1 = SMALL;
+      sc1 = 1.0/sc1;
+
+      sin2 = MAX(1.0 - c2mag*c2mag,0.0);
+      sc2 = sqrt(sin2);
+      if (sc2 < SMALL) sc2 = SMALL;
+      sc2 = 1.0/sc2;
+
+      s1 = sc1 * sc1;
+      s2 = sc2 * sc2;
+      s12 = sc1 * sc2;
+      c = (c0 + c1mag*c2mag) * s12;
+
+      // error check
+      if (c > 1.0) c = 1.0;
+      if (c < -1.0) c = -1.0;
+
+      // forces on each particle
+      double a = dudih;
+      c = c * a;
+      s12 = s12 * a;
+      a11 = c*sb1*s1;
+      a22 = -sb2 * (2.0*c0*s12 - c*(s1+s2));
+      a33 = c*sb3*s2;
+      a12 = -r12c1 * (c1mag*c*s1 + c2mag*s12);
+      a13 = -rb1*rb3*s12;
+      a23 = r12c2 * (c2mag*c*s2 + c1mag*s12);
+
+      sx2  = a12*vb1x + a22*vb2x + a23*vb3x;
+      sy2  = a12*vb1y + a22*vb2y + a23*vb3y;
+      sz2  = a12*vb1z + a22*vb2z + a23*vb3z;
+
+      f1[0] = a11*vb1x + a12*vb2x + a13*vb3x;
+      f1[1] = a11*vb1y + a12*vb2y + a13*vb3y;
+      f1[2] = a11*vb1z + a12*vb2z + a13*vb3z;
+
+      f2[0] = -sx2 - f1[0];
+      f2[1] = -sy2 - f1[1];
+      f2[2] = -sz2 - f1[2];
+
+      f4[0] = a13*vb1x + a23*vb2x + a33*vb3x;
+      f4[1] = a13*vb1y + a23*vb2y + a33*vb3y;
+      f4[2] = a13*vb1z + a23*vb2z + a33*vb3z;
+
+      f3[0] = sx2 - f4[0];
+      f3[1] = sy2 - f4[1];
+      f3[2] = sz2 - f4[2];
+
+      // no bonds crossing the plane
+
+      if (!right_cross && !middle_cross && !left_cross) continue;
+
+      // onPly right bond crossing the plane
+      if (right_cross && !middle_cross && !left_cross) {
+        double sgn = copysign(1.0, x_atom_1[dir] - pos);
+        df[0] = sgn * f1[0];
+        df[1] = sgn * f1[1];
+        df[2] = sgn * f1[2];
+      }
+
+      // only middle bond crossing the plane
+      else if (!right_cross && middle_cross && !left_cross) {
+        double sgn = copysign(1.0, x_atom_2[dir] - pos);
+        df[0] = sgn * (f2[0] + f1[0]);
+        df[1] = sgn * (f2[1] + f1[1]);
+        df[2] = sgn * (f2[2] + f1[2]);
+      }
+
+      // only left bond crossing the plane
+      else if (!right_cross && !middle_cross && left_cross) {
+        double sgn = copysign(1.0, x_atom_4[dir] - pos);
+        df[0] = sgn * f4[0];
+        df[1] = sgn * f4[1];
+        df[2] = sgn * f4[2];
+      }
+
+      // only right & middle bonds crossing the plane
+      else if (right_cross && middle_cross && !left_cross) {
+        double sgn = copysign(1.0, x_atom_2[dir] - pos);
+        df[0] = sgn * f2[0];
+        df[1] = sgn * f2[1];
+        df[2] = sgn * f2[2];
+      }
+
+      // only right & left bonds crossing the plane
+      else if (right_cross && !middle_cross && left_cross) {
+        double sgn = copysign(1.0, x_atom_1[dir] - pos);
+        df[0] = sgn * (f1[0] + f4[0]);
+        df[1] = sgn * (f1[1] + f4[1]);
+        df[2] = sgn * (f1[2] + f4[2]);
+      }
+
+      // only middle & left bonds crossing the plane
+      else if (!right_cross && middle_cross && left_cross) {
+        double sgn = copysign(1.0, x_atom_3[dir] - pos);
+        df[0] = sgn * f3[0];
+        df[1] = sgn * f3[1];
+        df[2] = sgn * f3[2];
+      }
+
+      // all three bonds crossing the plane
+      else if (right_cross && middle_cross && left_cross) {
+        double sgn = copysign(1.0, x_atom_1[dir] - pos);
+        df[0] = sgn * (f1[0] + f3[0]);
+        df[1] = sgn * (f1[1] + f3[1]);
+        df[2] = sgn * (f1[2] + f3[2]);
+      }
+      local_contribution[0] += df[0]/area*nktv2p;
+      local_contribution[1] += df[1]/area*nktv2p;
+      local_contribution[2] += df[2]/area*nktv2p;
+    }
+  }
+
+  // loop over the keywords and if necessary add the dihedral contribution
+  int m = 0;
+  while (m < nvalues) {
+    if ((which[m] == CONF) || (which[m] == TOTAL) || (which[m] == DIHEDRAL)) {
+        dihedral_local[m] = local_contribution[0];
+        dihedral_local[m+1] = local_contribution[1];
+        dihedral_local[m+2] = local_contribution[2];
+    }
+    m += 3;
+  }
+
+}
diff --git a/src/EXTRA-COMPUTE/compute_stress_mop.h b/src/EXTRA-COMPUTE/compute_stress_mop.h
index 86140dc278..0a0ea8b55a 100644
--- a/src/EXTRA-COMPUTE/compute_stress_mop.h
+++ b/src/EXTRA-COMPUTE/compute_stress_mop.h
@@ -40,15 +40,17 @@ class ComputeStressMop : public Compute {
   void compute_pairs();
   void compute_bonds();
   void compute_angles();
+  void compute_dihedrals();
 
   int nvalues, dir;
   int *which;
 
-  int bondflag, angleflag;
+  int bondflag, angleflag, dihedralflag;
 
   double *values_local, *values_global;
   double *bond_local, *bond_global;
   double *angle_local, *angle_global;
+  double *dihedral_local, *dihedral_global;
   double pos, pos1, dt, nktv2p, ftm2v;
   double area;
   class NeighList *list;
diff --git a/src/EXTRA-COMPUTE/compute_stress_mop_profile.cpp b/src/EXTRA-COMPUTE/compute_stress_mop_profile.cpp
index cc201fdbaa..676b0f5796 100644
--- a/src/EXTRA-COMPUTE/compute_stress_mop_profile.cpp
+++ b/src/EXTRA-COMPUTE/compute_stress_mop_profile.cpp
@@ -13,15 +13,17 @@
 
 /*------------------------------------------------------------------------
   Contributing Authors : Romain Vermorel (LFCR), Laurent Joly (ULyon)
-  Support for bonds added by : Evangelos Voyiatzis (NovaMechanics)
+  Support for bonds, angles and dihedrals added by : Evangelos Voyiatzis (NovaMechanics)
   --------------------------------------------------------------------------*/
 
 #include "compute_stress_mop_profile.h"
 
+#include "angle.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "bond.h"
 #include "comm.h"
+#include "dihedral.h"
 #include "domain.h"
 #include "error.h"
 #include "force.h"
@@ -37,9 +39,10 @@
 
 using namespace LAMMPS_NS;
 
+static constexpr double SMALL =     0.001;
+
 enum { X, Y, Z };
-enum { LOWER, CENTER, UPPER, COORD };
-enum { TOTAL, CONF, KIN, PAIR, BOND };
+enum { TOTAL, CONF, KIN, PAIR, BOND, ANGLE, DIHEDRAL };
 
 /* ---------------------------------------------------------------------- */
 
@@ -49,6 +52,8 @@ ComputeStressMopProfile::ComputeStressMopProfile(LAMMPS *lmp, int narg, char **a
   if (narg < 7) utils::missing_cmd_args(FLERR, "compute stress/mop/profile", error);
 
   bondflag = 0;
+  angleflag = 0;
+  dihedralflag = 0;
 
   // set compute mode and direction of plane(s) for pressure calculation
 
@@ -63,15 +68,15 @@ ComputeStressMopProfile::ComputeStressMopProfile(LAMMPS *lmp, int narg, char **a
 
   // bin parameters
 
-  if (strcmp(arg[4], "lower") == 0)
-    originflag = LOWER;
-  else if (strcmp(arg[4], "center") == 0)
-    originflag = CENTER;
-  else if (strcmp(arg[4], "upper") == 0)
-    originflag = UPPER;
-  else
-    originflag = COORD;
-  if (originflag == COORD) origin = utils::numeric(FLERR, arg[4], false, lmp);
+  if (strcmp(arg[4], "lower") == 0) {
+    origin = domain->boxlo[dir];
+  } else if (strcmp(arg[4], "center") == 0) {
+    origin = 0.5 * (domain->boxlo[dir] + domain->boxhi[dir]);
+  } else if (strcmp(arg[4], "upper") == 0) {
+    origin = domain->boxhi[dir];
+  } else {
+    origin = utils::numeric(FLERR, arg[4], false, lmp);
+  }
   delta = utils::numeric(FLERR, arg[5], false, lmp);
   invdelta = 1.0 / delta;
 
@@ -108,6 +113,16 @@ ComputeStressMopProfile::ComputeStressMopProfile(LAMMPS *lmp, int narg, char **a
         which[nvalues] = BOND;
         nvalues++;
       }
+    } else if (strcmp(arg[iarg], "angle") == 0) {
+      for (i = 0; i < 3; i++) {
+        which[nvalues] = ANGLE;
+        nvalues++;
+      }
+    } else if (strcmp(arg[iarg],"dihedral") == 0) {
+      for (i=0; i<3; i++) {
+        which[nvalues] = DIHEDRAL;
+        nvalues++;
+      }
     } else
       error->all(FLERR, "Illegal compute stress/mop/profile command");    //break;
 
@@ -133,6 +148,10 @@ ComputeStressMopProfile::ComputeStressMopProfile(LAMMPS *lmp, int narg, char **a
   values_local = values_global = array = nullptr;
   bond_local = nullptr;
   bond_global = nullptr;
+  angle_local = nullptr;
+  angle_global = nullptr;
+  dihedral_local = nullptr;
+  dihedral_global = nullptr;
   local_contribution = nullptr;
 
   // bin setup
@@ -161,6 +180,10 @@ ComputeStressMopProfile::~ComputeStressMopProfile()
   memory->destroy(values_global);
   memory->destroy(bond_local);
   memory->destroy(bond_global);
+  memory->destroy(angle_local);
+  memory->destroy(angle_global);
+  memory->destroy(dihedral_local);
+  memory->destroy(dihedral_global);
   memory->destroy(local_contribution);
   memory->destroy(array);
 }
@@ -208,13 +231,25 @@ void ComputeStressMopProfile::init()
 
     if (force->bond) bondflag = 1;
 
-    if (force->angle)
-      if ((strcmp(force->angle_style, "zero") != 0) && (strcmp(force->angle_style, "none") != 0))
-        error->all(FLERR, "compute stress/mop/profile does not account for angle potentials");
-    if (force->dihedral)
-      if ((strcmp(force->dihedral_style, "zero") != 0) &&
-          (strcmp(force->dihedral_style, "none") != 0))
-        error->all(FLERR, "compute stress/mop/profile does not account for dihedral potentials");
+    if (force->angle) {
+      if (force->angle->born_matrix_enable == 0) {
+        if ((strcmp(force->angle_style, "zero") != 0) && (strcmp(force->angle_style, "none") != 0))
+          error->all(FLERR,"compute stress/mop/profile does not account for angle potentials");
+      } else {
+        angleflag = 1;
+      }
+    }
+
+    if (force->dihedral) {
+      if (force->dihedral->born_matrix_enable == 0) {
+        if ((strcmp(force->dihedral_style, "zero") != 0) &&
+            (strcmp(force->dihedral_style, "none") != 0))
+          error->all(FLERR, "compute stress/mop/profile does not account for dihedral potentials");
+      } else {
+        dihedralflag = 1;
+      }
+    }
+
     if (force->improper)
       if ((strcmp(force->improper_style, "zero") != 0) &&
           (strcmp(force->improper_style, "none") != 0))
@@ -263,16 +298,43 @@ void ComputeStressMopProfile::compute_array()
   }
 
   // sum bond contribution over all procs
-
   MPI_Allreduce(&bond_local[0][0], &bond_global[0][0], nbins * nvalues, MPI_DOUBLE, MPI_SUM, world);
 
+  if (angleflag) {
+    //Compute angle contribution on separate procs
+    compute_angles();
+  } else {
+    for (int m = 0; m < nbins; m++) {
+      for (int i = 0; i < nvalues; i++) {
+        angle_local[m][i] = 0.0;
+      }
+    }
+  }
+
+  // sum angle contribution over all procs
+  MPI_Allreduce(&angle_local[0][0],&angle_global[0][0],nbins*nvalues,MPI_DOUBLE,MPI_SUM,world);
+
+  if (dihedralflag) {
+    //Compute dihedral contribution on separate procs
+    compute_dihedrals();
+  } else {
+    for (int m = 0; m < nbins; m++) {
+      for (int i = 0; i < nvalues; i++) {
+        dihedral_local[m][i] = 0.0;
+      }
+    }
+  }
+
+  // sum dihedral contribution over all procs
+  MPI_Allreduce(&dihedral_local[0][0],&dihedral_global[0][0],nbins*nvalues,MPI_DOUBLE,MPI_SUM,world);
+
   for (int ibin = 0; ibin < nbins; ibin++) {
-    array[ibin][0] = coord[ibin][0];
+    array[ibin][0] = coord[ibin];
 
     int mo = 1;
     int m = 0;
     while (m < nvalues) {
-      array[ibin][m + mo] = values_global[ibin][m] + bond_global[ibin][m];
+      array[ibin][m + mo] = values_global[ibin][m] + bond_global[ibin][m] + angle_global[ibin][m] + dihedral_global[ibin][m];
       m++;
     }
   }
@@ -366,8 +428,8 @@ void ComputeStressMopProfile::compute_pairs()
           if (newton_pair || j < nlocal) {
 
             for (ibin = 0; ibin < nbins; ibin++) {
-              pos = coord[ibin][0];
-              pos1 = coordp[ibin][0];
+              pos = coord[ibin];
+              pos1 = coordp[ibin];
 
               // check if ij pair is across plane, add contribution to pressure
 
@@ -392,8 +454,8 @@ void ComputeStressMopProfile::compute_pairs()
           } else {
 
             for (ibin = 0; ibin < nbins; ibin++) {
-              pos = coord[ibin][0];
-              pos1 = coordp[ibin][0];
+              pos = coord[ibin];
+              pos1 = coordp[ibin];
 
               //check if ij pair is across plane, add contribution to pressure
 
@@ -454,15 +516,29 @@ void ComputeStressMopProfile::compute_pairs()
           xj[2] = xi[2] - vi[2] * dt + fi[2] * iterm * dt;
 
           for (ibin = 0; ibin < nbins; ibin++) {
-            pos = coord[ibin][0];
-            pos1 = coordp[ibin][0];
+            pos = coord[ibin];
+            pos1 = coordp[ibin];
 
-            if (((xi[dir] - pos) * (xj[dir] - pos) * (xi[dir] - pos1) * (xj[dir] - pos1) < 0)) {
+            // minimum image of xi with respect to the plane
+            xi[dir] -= pos;
+            domain->minimum_image(xi[0], xi[1], xi[2]);
+            xi[dir] += pos;
+
+            // minimum image of xj with respect to xi
+            xj[0] -= xi[0];
+            xj[1] -= xi[1];
+            xj[2] -= xi[2];
+            domain->minimum_image(xi[0], xi[1], xi[2]);
+            xj[0] += xi[0];
+            xj[1] += xi[1];
+            xj[2] += xi[2];
+
+            double tau = (xi[dir] - pos) / (xi[dir] - xj[dir]);
+            if ((tau <= 1) && (tau >= 0)) {
 
               sgn = copysign(1.0, vi[dir]);
 
-              // approximate crossing velocity by v(t-dt/2) (based on Velocity-Verlet alg.)
-
+              //approximate crossing velocity by v(t-dt/2) (based on Velocity-Verlet alg.)
               double vcross[3];
               vcross[0] = vi[0] - fi[0] * iterm;
               vcross[1] = vi[1] - fi[1] * iterm;
@@ -549,7 +625,7 @@ void ComputeStressMopProfile::compute_bonds()
       if (btype <= 0) continue;
 
       for (int ibin = 0; ibin < nbins; ibin++) {
-        double pos = coord[ibin][0];
+        double pos = coord[ibin];
 
         // minimum image of atom1 with respect to the plane of interest
 
@@ -607,6 +683,506 @@ void ComputeStressMopProfile::compute_bonds()
   }
 }
 
+/*------------------------------------------------------------------------
+  compute angle contribution to pressure of local proc
+  -------------------------------------------------------------------------*/
+
+void ComputeStressMopProfile::compute_angles()
+{
+  int na, atom1, atom2, atom3, imol, iatom, atype;
+  tagint tagprev;
+  double r1, r2, cos_theta;
+
+  double **x = atom->x;
+  tagint *tag = atom->tag;
+  int *num_angle = atom->num_angle;
+  tagint **angle_atom1 = atom->angle_atom1;
+  tagint **angle_atom2 = atom->angle_atom2;
+  tagint **angle_atom3 = atom->angle_atom3;
+  int **angle_type = atom->angle_type;
+  int *mask = atom->mask;
+
+  int *molindex = atom->molindex;
+  int *molatom = atom->molatom;
+  Molecule **onemols = atom->avec->onemols;
+
+  int nlocal = atom->nlocal;
+  int molecular = atom->molecular;
+
+  // loop over all atoms and their angles
+  Angle *angle = force->angle;
+
+  double duang, du2ang;
+  double dx[3] = {0.0, 0.0, 0.0};
+  double dx_left[3] = {0.0, 0.0, 0.0};
+  double dx_right[3] = {0.0, 0.0, 0.0};
+  double x_angle_left[3] = {0.0, 0.0, 0.0};
+  double x_angle_middle[3] = {0.0, 0.0, 0.0};
+  double x_angle_right[3] = {0.0, 0.0, 0.0};
+  double dcos_theta[3] = {0.0, 0.0, 0.0};
+
+  // initialization
+  for (int m = 0; m < nbins; m++) {
+    for (int i = 0; i < nvalues; i++) {
+      angle_local[m][i] = 0.0;
+    }
+    local_contribution[m][0] = 0.0;
+    local_contribution[m][1] = 0.0;
+    local_contribution[m][2] = 0.0;
+  }
+
+
+  for (atom2 = 0; atom2 < nlocal; atom2++) {
+    if (!(mask[atom2] & groupbit)) continue;
+
+    if (molecular == 1)
+      na = num_angle[atom2];
+    else {
+      if (molindex[atom2] < 0) continue;
+      imol = molindex[atom2];
+      iatom = molatom[atom2];
+      na = onemols[imol]->num_angle[iatom];
+    }
+
+    for (int i = 0; i < na; i++) {
+      if (molecular == 1) {
+        if (tag[atom2] != angle_atom2[atom2][i]) continue;
+        atype = angle_type[atom2][i];
+        atom1 = atom->map(angle_atom1[atom2][i]);
+        atom3 = atom->map(angle_atom3[atom2][i]);
+      } else {
+        if (tag[atom2] != onemols[imol]->angle_atom2[atom2][i]) continue;
+        atype = onemols[imol]->angle_type[atom2][i];
+        tagprev = tag[atom2] - iatom - 1;
+        atom1 = atom->map(onemols[imol]->angle_atom1[atom2][i] + tagprev);
+        atom3 = atom->map(onemols[imol]->angle_atom3[atom2][i] + tagprev);
+      }
+
+      if (atom1 < 0 || !(mask[atom1] & groupbit)) continue;
+      if (atom3 < 0 || !(mask[atom3] & groupbit)) continue;
+      if (atype <= 0) continue;
+
+      for (int ibin = 0; ibin<nbins; ibin++) {
+        double pos = coord[ibin];
+
+        // minimum image of atom1 with respect to the plane of interest
+        dx[0] = x[atom1][0];
+        dx[1] = x[atom1][1];
+        dx[2] = x[atom1][2];
+        dx[dir] -= pos;
+        domain->minimum_image(dx[0], dx[1], dx[2]);
+        x_angle_left[0] = dx[0];
+        x_angle_left[1] = dx[1];
+        x_angle_left[2] = dx[2];
+        x_angle_left[dir] += pos;
+
+        // minimum image of atom2 with respect to atom1
+        dx_left[0] = x[atom2][0] - x_angle_left[0];
+        dx_left[1] = x[atom2][1] - x_angle_left[1];
+        dx_left[2] = x[atom2][2] - x_angle_left[2];
+        domain->minimum_image(dx_left[0], dx_left[1], dx_left[2]);
+        x_angle_middle[0] = x_angle_left[0] + dx_left[0];
+        x_angle_middle[1] = x_angle_left[1] + dx_left[1];
+        x_angle_middle[2] = x_angle_left[2] + dx_left[2];
+
+        // minimum image of atom3 with respect to atom2
+        dx_right[0] = x[atom3][0] - x_angle_middle[0];
+        dx_right[1] = x[atom3][1] - x_angle_middle[1];
+        dx_right[2] = x[atom3][2] - x_angle_middle[2];
+        domain->minimum_image(dx_right[0], dx_right[1], dx_right[2]);
+        x_angle_right[0] = x_angle_middle[0] + dx_right[0];
+        x_angle_right[1] = x_angle_middle[1] + dx_right[1];
+        x_angle_right[2] = x_angle_middle[2] + dx_right[2];
+
+        // check if any bond vector crosses the plane of interest
+        double tau_right = (x_angle_right[dir] - pos) / (x_angle_right[dir] - x_angle_middle[dir]);
+        double tau_left = (x_angle_middle[dir] - pos) / (x_angle_middle[dir] - x_angle_left[dir]);
+        bool right_cross = ((tau_right >=0) && (tau_right  <= 1));
+        bool left_cross = ((tau_left >=0) && (tau_left <= 1));
+
+        // no bonds crossing the plane
+        if (!right_cross && !left_cross) continue;
+
+        // compute the cos(theta) of the angle
+        r1 = sqrt(dx_left[0]*dx_left[0] + dx_left[1]*dx_left[1] + dx_left[2]*dx_left[2]);
+        r2 = sqrt(dx_right[0]*dx_right[0] + dx_right[1]*dx_right[1] + dx_right[2]*dx_right[2]);
+        cos_theta = -(dx_right[0]*dx_left[0] + dx_right[1]*dx_left[1] + dx_right[2]*dx_left[2])/(r1*r2);
+
+        if (cos_theta >  1.0) cos_theta = 1.0;
+        if (cos_theta < -1.0) cos_theta = -1.0;
+
+        // The method returns derivative with regards to cos(theta)
+        angle->born_matrix(atype, atom1, atom2, atom3, duang, du2ang);
+        // only right bond crossing the plane
+        if (right_cross && !left_cross)
+        {
+          double sgn = copysign(1.0, x_angle_right[dir] - pos);
+          dcos_theta[0] = sgn*(dx_right[0]*cos_theta/r2 + dx_left[0]/r1)/r2;
+          dcos_theta[1] = sgn*(dx_right[1]*cos_theta/r2 + dx_left[1]/r1)/r2;
+          dcos_theta[2] = sgn*(dx_right[2]*cos_theta/r2 + dx_left[2]/r1)/r2;
+        }
+
+        // only left bond crossing the plane
+        if (!right_cross && left_cross)
+        {
+          double sgn = copysign(1.0, x_angle_left[dir] - pos);
+          dcos_theta[0] = -sgn*(dx_left[0]*cos_theta/r1 + dx_right[0]/r2)/r1;
+          dcos_theta[1] = -sgn*(dx_left[1]*cos_theta/r1 + dx_right[1]/r2)/r1;
+          dcos_theta[2] = -sgn*(dx_left[2]*cos_theta/r1 + dx_right[2]/r2)/r1;
+        }
+
+        // both bonds crossing the plane
+        if (right_cross && left_cross)
+        {
+          // due to right bond
+          double sgn = copysign(1.0, x_angle_middle[dir] - pos);
+          dcos_theta[0] = -sgn*(dx_right[0]*cos_theta/r2 + dx_left[0]/r1)/r2;
+          dcos_theta[1] = -sgn*(dx_right[1]*cos_theta/r2 + dx_left[1]/r1)/r2;
+          dcos_theta[2] = -sgn*(dx_right[2]*cos_theta/r2 + dx_left[2]/r1)/r2;
+
+          // due to left bond
+          dcos_theta[0] += sgn*(dx_left[0]*cos_theta/r1 + dx_right[0]/r2)/r1;
+          dcos_theta[1] += sgn*(dx_left[1]*cos_theta/r1 + dx_right[1]/r2)/r1;
+          dcos_theta[2] += sgn*(dx_left[2]*cos_theta/r1 + dx_right[2]/r2)/r1;
+        }
+
+        // final contribution of the given angle term
+        local_contribution[ibin][0] += duang*dcos_theta[0]/area*nktv2p;
+        local_contribution[ibin][1] += duang*dcos_theta[1]/area*nktv2p;
+        local_contribution[ibin][2] += duang*dcos_theta[2]/area*nktv2p;
+      }
+    }
+  }
+
+  // loop over the keywords and if necessary add the angle contribution
+  int m = 0;
+  while (m < nvalues) {
+    if (which[m] == CONF || which[m] == TOTAL || which[m] == ANGLE) {
+      for (int ibin = 0; ibin < nbins; ibin++) {
+        angle_local[ibin][m] = local_contribution[ibin][0];
+        angle_local[ibin][m+1] = local_contribution[ibin][1];
+        angle_local[ibin][m+2] = local_contribution[ibin][2];
+      }
+    }
+    m += 3;
+  }
+}
+
+/*------------------------------------------------------------------------
+  compute dihedral contribution to pressure of local proc
+  -------------------------------------------------------------------------*/
+
+void ComputeStressMopProfile::compute_dihedrals()
+{
+  int i, nd, atom1, atom2, atom3, atom4, imol, iatom;
+  tagint tagprev;
+  double vb1x, vb1y, vb1z, vb2x, vb2y, vb2z, vb3x, vb3y, vb3z;
+  double vb2xm, vb2ym, vb2zm;
+  double sb1, sb2, sb3, rb1, rb3, c0, b1mag2, b1mag, b2mag2;
+  double b2mag, b3mag2, b3mag, c2mag, ctmp, r12c1, c1mag, r12c2;
+  double s1, s2, s12, sc1, sc2, a11, a22, a33, a12, a13, a23;
+  double df[3], f1[3], f2[3], f3[3], f4[3];
+  double c, sx2, sy2, sz2, sin2;
+
+  double **x = atom->x;
+  tagint *tag = atom->tag;
+  int *num_dihedral = atom->num_dihedral;
+  tagint **dihedral_atom1 = atom->dihedral_atom1;
+  tagint **dihedral_atom2 = atom->dihedral_atom2;
+  tagint **dihedral_atom3 = atom->dihedral_atom3;
+  tagint **dihedral_atom4 = atom->dihedral_atom4;
+  int *mask = atom->mask;
+
+  int *molindex = atom->molindex;
+  int *molatom = atom->molatom;
+  Molecule **onemols = atom->avec->onemols;
+
+  int nlocal = atom->nlocal;
+  int molecular = atom->molecular;
+
+  // loop over all atoms and their dihedrals
+
+  Dihedral *dihedral = force->dihedral;
+
+  double dudih, du2dih;
+
+  double diffx[3] = {0.0, 0.0, 0.0};
+  double x_atom_1[3] = {0.0, 0.0, 0.0};
+  double x_atom_2[3] = {0.0, 0.0, 0.0};
+  double x_atom_3[3] = {0.0, 0.0, 0.0};
+  double x_atom_4[3] = {0.0, 0.0, 0.0};
+
+  // initialization
+  for (int m = 0; m < nbins; m++) {
+    for (int i = 0; i < nvalues; i++) {
+      dihedral_local[m][i] = 0.0;
+    }
+    local_contribution[m][0] = 0.0;
+    local_contribution[m][1] = 0.0;
+    local_contribution[m][2] = 0.0;
+  }
+
+  for (atom2 = 0; atom2 < nlocal; atom2++) {
+    if (!(mask[atom2] & groupbit)) continue;
+
+    if (molecular == Atom::MOLECULAR)
+      nd = num_dihedral[atom2];
+    else {
+      if (molindex[atom2] < 0) continue;
+      imol = molindex[atom2];
+      iatom = molatom[atom2];
+      nd = onemols[imol]->num_dihedral[iatom];
+    }
+
+    for (i = 0; i < nd; i++) {
+      if (molecular == 1) {
+        if (tag[atom2] != dihedral_atom2[atom2][i]) continue;
+          atom1 = atom->map(dihedral_atom1[atom2][i]);
+          atom3 = atom->map(dihedral_atom3[atom2][i]);
+          atom4 = atom->map(dihedral_atom4[atom2][i]);
+      } else {
+        if (tag[atom2] != onemols[imol]->dihedral_atom2[atom2][i]) continue;
+        tagprev = tag[atom2] - iatom - 1;
+        atom1 = atom->map(onemols[imol]->dihedral_atom1[atom2][i] + tagprev);
+        atom3 = atom->map(onemols[imol]->dihedral_atom3[atom2][i] + tagprev);
+        atom4 = atom->map(onemols[imol]->dihedral_atom4[atom2][i] + tagprev);
+      }
+
+      if (atom1 < 0 || !(mask[atom1] & groupbit)) continue;
+      if (atom3 < 0 || !(mask[atom3] & groupbit)) continue;
+      if (atom4 < 0 || !(mask[atom4] & groupbit)) continue;
+
+      for (int ibin = 0; ibin<nbins; ibin++) {
+        double pos = coord[ibin];
+
+        // minimum image of atom1 with respect to the plane of interest
+        x_atom_1[0] = x[atom1][0];
+        x_atom_1[1] = x[atom1][1];
+        x_atom_1[2] = x[atom1][2];
+        x_atom_1[dir] -= pos;
+        domain->minimum_image(x_atom_1[0], x_atom_1[1], x_atom_1[2]);
+        x_atom_1[dir] += pos;
+
+        // minimum image of atom2 with respect to atom1
+        diffx[0] = x[atom2][0] - x_atom_1[0];
+        diffx[1] = x[atom2][1] - x_atom_1[1];
+        diffx[2] = x[atom2][2] - x_atom_1[2];
+        domain->minimum_image(diffx[0], diffx[1], diffx[2]);
+        x_atom_2[0] = x_atom_1[0] + diffx[0];
+        x_atom_2[1] = x_atom_1[1] + diffx[1];
+        x_atom_2[2] = x_atom_1[2] + diffx[2];
+
+        // minimum image of atom3 with respect to atom2
+        diffx[0] = x[atom3][0] - x_atom_2[0];
+        diffx[1] = x[atom3][1] - x_atom_2[1];
+        diffx[2] = x[atom3][2] - x_atom_2[2];
+        domain->minimum_image(diffx[0], diffx[1], diffx[2]);
+        x_atom_3[0] = x_atom_2[0] + diffx[0];
+        x_atom_3[1] = x_atom_2[1] + diffx[1];
+        x_atom_3[2] = x_atom_2[2] + diffx[2];
+
+        // minimum image of atom3 with respect to atom2
+        diffx[0] = x[atom4][0] - x_atom_3[0];
+        diffx[1] = x[atom4][1] - x_atom_3[1];
+        diffx[2] = x[atom4][2] - x_atom_3[2];
+        domain->minimum_image(diffx[0], diffx[1], diffx[2]);
+        x_atom_4[0] = x_atom_3[0] + diffx[0];
+        x_atom_4[1] = x_atom_3[1] + diffx[1];
+        x_atom_4[2] = x_atom_3[2] + diffx[2];
+
+        // check if any bond vector crosses the plane of interest
+        double tau_right = (x_atom_2[dir] - pos) / (x_atom_2[dir] - x_atom_1[dir]);
+        double tau_middle = (x_atom_3[dir] - pos) / (x_atom_3[dir] - x_atom_2[dir]);
+        double tau_left = (x_atom_4[dir] - pos) / (x_atom_4[dir] - x_atom_3[dir]);
+        bool right_cross = ((tau_right >=0) && (tau_right  <= 1));
+        bool middle_cross = ((tau_middle >= 0) && (tau_middle <= 1));
+        bool left_cross = ((tau_left >=0) && (tau_left <= 1));
+
+        // no bonds crossing the plane
+        if (!right_cross && !middle_cross && !left_cross) continue;
+
+        dihedral->born_matrix(i, atom1, atom2, atom3, atom4, dudih, du2dih);
+
+        // first bond
+        vb1x = x_atom_1[0] - x_atom_2[0];
+        vb1y = x_atom_1[1] - x_atom_2[1];
+        vb1z = x_atom_1[2] - x_atom_2[2];
+
+        // second bond
+        vb2x = x_atom_3[0] - x_atom_2[0];
+        vb2y = x_atom_3[1] - x_atom_2[1];
+        vb2z = x_atom_3[2] - x_atom_2[2];
+
+        vb2xm = -vb2x;
+        vb2ym = -vb2y;
+        vb2zm = -vb2z;
+
+        // third bond
+        vb3x = x_atom_4[0] - x_atom_3[0];
+        vb3y = x_atom_4[1] - x_atom_3[1];
+        vb3z = x_atom_4[2] - x_atom_3[2];
+
+        // c0 calculation
+        sb1 = 1.0 / (vb1x*vb1x + vb1y*vb1y + vb1z*vb1z);
+        sb2 = 1.0 / (vb2x*vb2x + vb2y*vb2y + vb2z*vb2z);
+        sb3 = 1.0 / (vb3x*vb3x + vb3y*vb3y + vb3z*vb3z);
+
+        rb1 = sqrt(sb1);
+        rb3 = sqrt(sb3);
+
+        c0 = (vb1x*vb3x + vb1y*vb3y + vb1z*vb3z) * rb1*rb3;
+        // 1st and 2nd angle
+        b1mag2 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z;
+        b1mag = sqrt(b1mag2);
+        b2mag2 = vb2x*vb2x + vb2y*vb2y + vb2z*vb2z;
+        b2mag = sqrt(b2mag2);
+        b3mag2 = vb3x*vb3x + vb3y*vb3y + vb3z*vb3z;
+        b3mag = sqrt(b3mag2);
+
+        ctmp = vb1x*vb2x + vb1y*vb2y + vb1z*vb2z;
+        r12c1 = 1.0 / (b1mag*b2mag);
+        c1mag = ctmp * r12c1;
+
+        ctmp = vb2xm*vb3x + vb2ym*vb3y + vb2zm*vb3z;
+        r12c2 = 1.0 / (b2mag*b3mag);
+        c2mag = ctmp * r12c2;
+
+        // cos and sin of 2 angles and final c
+        sin2 = MAX(1.0 - c1mag*c1mag,0.0);
+        sc1 = sqrt(sin2);
+        if (sc1 < SMALL) sc1 = SMALL;
+        sc1 = 1.0/sc1;
+
+        sin2 = MAX(1.0 - c2mag*c2mag,0.0);
+        sc2 = sqrt(sin2);
+        if (sc2 < SMALL) sc2 = SMALL;
+        sc2 = 1.0/sc2;
+
+        s1 = sc1 * sc1;
+        s2 = sc2 * sc2;
+        s12 = sc1 * sc2;
+        c = (c0 + c1mag*c2mag) * s12;
+
+        // error check
+        if (c > 1.0) c = 1.0;
+        if (c < -1.0) c = -1.0;
+
+        // forces on each particle
+        double a = dudih;
+        c = c * a;
+        s12 = s12 * a;
+        a11 = c*sb1*s1;
+        a22 = -sb2 * (2.0*c0*s12 - c*(s1+s2));
+        a33 = c*sb3*s2;
+        a12 = -r12c1 * (c1mag*c*s1 + c2mag*s12);
+        a13 = -rb1*rb3*s12;
+        a23 = r12c2 * (c2mag*c*s2 + c1mag*s12);
+
+        sx2  = a12*vb1x + a22*vb2x + a23*vb3x;
+        sy2  = a12*vb1y + a22*vb2y + a23*vb3y;
+        sz2  = a12*vb1z + a22*vb2z + a23*vb3z;
+
+        f1[0] = a11*vb1x + a12*vb2x + a13*vb3x;
+        f1[1] = a11*vb1y + a12*vb2y + a13*vb3y;
+        f1[2] = a11*vb1z + a12*vb2z + a13*vb3z;
+
+        f2[0] = -sx2 - f1[0];
+        f2[1] = -sy2 - f1[1];
+        f2[2] = -sz2 - f1[2];
+
+        f4[0] = a13*vb1x + a23*vb2x + a33*vb3x;
+        f4[1] = a13*vb1y + a23*vb2y + a33*vb3y;
+        f4[2] = a13*vb1z + a23*vb2z + a33*vb3z;
+
+        f3[0] = sx2 - f4[0];
+        f3[1] = sy2 - f4[1];
+        f3[2] = sz2 - f4[2];
+
+        // only right bond crossing the plane
+        if (right_cross && !middle_cross && !left_cross)
+        {
+          double sgn = copysign(1.0, x_atom_1[dir] - pos);
+          df[0] = sgn * f1[0];
+          df[1] = sgn * f1[1];
+          df[2] = sgn * f1[2];
+        }
+
+        // only middle bond crossing the plane
+        if (!right_cross && middle_cross && !left_cross)
+        {
+          double sgn = copysign(1.0, x_atom_2[dir] - pos);
+          df[0] = sgn * (f2[0] + f1[0]);
+          df[1] = sgn * (f2[1] + f1[1]);
+          df[2] = sgn * (f2[2] + f1[2]);
+        }
+
+        // only left bond crossing the plane
+        if (!right_cross && !middle_cross && left_cross)
+        {
+          double sgn = copysign(1.0, x_atom_4[dir] - pos);
+          df[0] = sgn * f4[0];
+          df[1] = sgn * f4[1];
+          df[2] = sgn * f4[2];
+        }
+
+        // only right & middle bonds crossing the plane
+        if (right_cross && middle_cross && !left_cross)
+        {
+          double sgn = copysign(1.0, x_atom_2[dir] - pos);
+          df[0] = sgn * f2[0];
+          df[1] = sgn * f2[1];
+          df[2] = sgn * f2[2];
+        }
+
+        // only right & left bonds crossing the plane
+        if (right_cross && !middle_cross && left_cross)
+        {
+          double sgn = copysign(1.0, x_atom_1[dir] - pos);
+          df[0] = sgn * (f1[0] + f4[0]);
+          df[1] = sgn * (f1[1] + f4[1]);
+          df[2] = sgn * (f1[2] + f4[2]);
+        }
+
+        // only middle & left bonds crossing the plane
+        if (!right_cross && middle_cross && left_cross)
+        {
+          double sgn = copysign(1.0, x_atom_3[dir] - pos);
+          df[0] = sgn * f3[0];
+          df[1] = sgn * f3[1];
+          df[2] = sgn * f3[2];
+        }
+
+        // all three bonds crossing the plane
+        if (right_cross && middle_cross && left_cross)
+        {
+          double sgn = copysign(1.0, x_atom_1[dir] - pos);
+          df[0] = sgn * (f1[0] + f3[0]);
+          df[1] = sgn * (f1[1] + f3[1]);
+          df[2] = sgn * (f1[2] + f3[2]);
+        }
+
+        local_contribution[ibin][0] += df[0]/area*nktv2p;
+        local_contribution[ibin][1] += df[1]/area*nktv2p;
+        local_contribution[ibin][2] += df[2]/area*nktv2p;
+      }
+    }
+  }
+
+  // loop over the keywords and if necessary add the dihedral contribution
+  int m = 0;
+  while (m < nvalues) {
+    if ((which[m] == CONF) || (which[m] == TOTAL) || (which[m] == DIHEDRAL)) {
+      for (int ibin = 0; ibin < nbins; ibin++) {
+        dihedral_local[ibin][m] = local_contribution[ibin][0];
+        dihedral_local[ibin][m+1] = local_contribution[ibin][1];
+        dihedral_local[ibin][m+2] = local_contribution[ibin][2];
+      }
+    }
+    m += 3;
+  }
+
+}
+
 /* ----------------------------------------------------------------------
    setup 1d bins and their extent and coordinates
    called at init()
@@ -621,47 +1197,39 @@ void ComputeStressMopProfile::setup_bins()
   boxlo = domain->boxlo;
   boxhi = domain->boxhi;
 
-  if (originflag == LOWER)
-    origin = boxlo[dir];
-  else if (originflag == UPPER)
-    origin = boxhi[dir];
-  else if (originflag == CENTER)
-    origin = 0.5 * (boxlo[dir] + boxhi[dir]);
+  if ((origin > domain->boxhi[dir]) || (origin < domain->boxlo[dir]))
+    error->all(FLERR, "Origin of bins for compute stress/mop/profile is out of bounds");
 
-  if (origin < boxlo[dir]) {
-    error->all(FLERR, "Origin of bins for compute stress/mop/profile is out of bounds");
-  } else {
-    n = static_cast<int>((origin - boxlo[dir]) * invdelta);
-    lo = origin - n * delta;
-  }
-  if (origin < boxhi[dir]) {
-    n = static_cast<int>((boxhi[dir] - origin) * invdelta);
-    hi = origin + n * delta;
-  } else {
-    error->all(FLERR, "Origin of bins for compute stress/mop/profile is out of bounds");
-  }
+  n = static_cast<int> ((origin - boxlo[dir]) * invdelta);
+  lo = origin - n*delta;
+
+  n = static_cast<int> ((boxhi[dir] - origin) * invdelta);
+  hi = origin + n*delta;
 
   offset = lo;
   nbins = static_cast<int>((hi - lo) * invdelta + 1.5);
 
-  // allocate bin arrays
-
-  memory->create(coord, nbins, 1, "stress/mop/profile:coord");
-  memory->create(coordp, nbins, 1, "stress/mop/profile:coordp");
+  //allocate bin arrays
+  memory->create(coord, nbins, "stress/mop/profile:coord");
+  memory->create(coordp, nbins, "stress/mop/profile:coordp");
   memory->create(values_local, nbins, nvalues, "stress/mop/profile:values_local");
   memory->create(values_global, nbins, nvalues, "stress/mop/profile:values_global");
   memory->create(bond_local, nbins, nvalues, "stress/mop/profile:bond_local");
   memory->create(bond_global, nbins, nvalues, "stress/mop/profile:bond_global");
+  memory->create(angle_local, nbins, nvalues, "stress/mop/profile:angle_local");
+  memory->create(angle_global, nbins, nvalues, "stress/mop/profile:angle_global");
+  memory->create(dihedral_local,nbins,nvalues,"stress/mop/profile:dihedral_local");
+  memory->create(dihedral_global,nbins,nvalues,"stress/mop/profile:dihedral_global");
   memory->create(local_contribution, nbins, 3, "stress/mop/profile:local_contribution");
 
   // set bin coordinates
 
   for (i = 0; i < nbins; i++) {
-    coord[i][0] = offset + i * delta;
-    if (coord[i][0] < (domain->boxlo[dir] + domain->prd_half[dir])) {
-      coordp[i][0] = coord[i][0] + domain->prd[dir];
+    coord[i] = offset + i * delta;
+    if (coord[i] < (domain->boxlo[dir] + domain->prd_half[dir])) {
+      coordp[i] = coord[i] + domain->prd[dir];
     } else {
-      coordp[i][0] = coord[i][0] - domain->prd[dir];
+      coordp[i] = coord[i] - domain->prd[dir];
     }
   }
 }
diff --git a/src/EXTRA-COMPUTE/compute_stress_mop_profile.h b/src/EXTRA-COMPUTE/compute_stress_mop_profile.h
index 2b0ffef0f8..b9b97617c0 100644
--- a/src/EXTRA-COMPUTE/compute_stress_mop_profile.h
+++ b/src/EXTRA-COMPUTE/compute_stress_mop_profile.h
@@ -39,19 +39,22 @@ class ComputeStressMopProfile : public Compute {
  private:
   void compute_pairs();
   void compute_bonds();
+  void compute_angles();
+  void compute_dihedrals();
   void setup_bins();
 
   int nvalues, dir;
   int *which;
 
-  int bondflag;
+  int bondflag, angleflag, dihedralflag;
 
-  int originflag;
   double origin, delta, offset, invdelta;
   int nbins;
-  double **coord, **coordp;
+  double *coord, *coordp;
   double **values_local, **values_global;
   double **bond_local, **bond_global;
+  double **angle_local, **angle_global;
+  double **dihedral_local, **dihedral_global;
   double **local_contribution;
 
   double dt, nktv2p, ftm2v;
diff --git a/src/EXTRA-COMPUTE/compute_stress_spherical.cpp b/src/EXTRA-COMPUTE/compute_stress_spherical.cpp
index db20ab6706..983d31559d 100644
--- a/src/EXTRA-COMPUTE/compute_stress_spherical.cpp
+++ b/src/EXTRA-COMPUTE/compute_stress_spherical.cpp
@@ -34,7 +34,7 @@ using namespace MathConst;
 using MathSpecial::cube;
 using MathSpecial::square;
 
-#define SMALL 1.0e-10
+static constexpr double SMALL = 1.0e-10;
 
 /*-----------------------------------------------------------------------------------
   Contributing author: Olav Galteland (Norwegian University of Science and Technology)
diff --git a/src/EXTRA-FIX/fix_ave_correlate_long.cpp b/src/EXTRA-FIX/fix_ave_correlate_long.cpp
index 7fa57af343..fc1760b353 100644
--- a/src/EXTRA-FIX/fix_ave_correlate_long.cpp
+++ b/src/EXTRA-FIX/fix_ave_correlate_long.cpp
@@ -503,7 +503,7 @@ void FixAveCorrelateLong::end_of_step()
     if (overwrite) {
       bigint fileend = platform::ftell(fp);
       if ((fileend > 0) && (platform::ftruncate(fp,fileend)))
-        error->warning(FLERR,"Error while tuncating output: {}", utils::getsyserror());
+        error->warning(FLERR,"Error while truncating output: {}", utils::getsyserror());
     }
   }
 }
@@ -728,7 +728,7 @@ double FixAveCorrelateLong::memory_usage() {
 void FixAveCorrelateLong::write_restart(FILE *fp) {
   if (comm->me == 0) {
     int nsize = 3*npair*numcorrelators*p + 2*npair*numcorrelators
-                + numcorrelators*p + 2*numcorrelators + 6;
+                + numcorrelators*p + 2*numcorrelators + 7;
     int n=0;
     double *list;
     memory->create(list,nsize,"correlator:list");
@@ -736,6 +736,7 @@ void FixAveCorrelateLong::write_restart(FILE *fp) {
     list[n++] = numcorrelators;
     list[n++] = p;
     list[n++] = m;
+    list[n++] = kmax;
     list[n++] = last_accumulated_step;
     for (int i=0; i < npair; i++)
       for (int j=0; j < numcorrelators; j++) {
@@ -771,6 +772,7 @@ void FixAveCorrelateLong::restart(char *buf)
   int numcorrelatorsin = static_cast<int> (list[n++]);
   int pin = static_cast<int>(list[n++]);
   int min = static_cast<int>(list[n++]);
+  kmax = static_cast<int>(list[n++]);
   last_accumulated_step = static_cast<int>(list[n++]);
 
   if ((npairin!=npair) || (numcorrelatorsin!=numcorrelators) || (pin!=(int)p) || (min!=(int)m))
diff --git a/src/EXTRA-FIX/fix_ffl.cpp b/src/EXTRA-FIX/fix_ffl.cpp
index 0a06707fbd..3039d338fc 100644
--- a/src/EXTRA-FIX/fix_ffl.cpp
+++ b/src/EXTRA-FIX/fix_ffl.cpp
@@ -43,7 +43,7 @@ enum {CONSTANT,EQUAL,ATOM};
 enum {NO_FLIP, FLIP_RESCALE, FLIP_HARD, FLIP_SOFT};
 //#define FFL_DEBUG 1
 
-#define MAXLINE 1024
+static constexpr int MAXLINE = 1024;
 
 /* syntax for fix_ffl:
  * fix nfix id-group ffl tau Tstart Tstop seed [flip_type]
diff --git a/src/EXTRA-FIX/fix_filter_corotate.cpp b/src/EXTRA-FIX/fix_filter_corotate.cpp
index c88969344a..74d1d18486 100644
--- a/src/EXTRA-FIX/fix_filter_corotate.cpp
+++ b/src/EXTRA-FIX/fix_filter_corotate.cpp
@@ -42,8 +42,8 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace FixConst;
 
-#define BIG 1.0e20
-#define MASSDELTA 0.1
+static constexpr double BIG = 1.0e20;
+static constexpr double MASSDELTA = 0.1;
 
 static const char cite_filter_corotate[] =
   "Mollified Impulse Method with Corotational Filter: doi:10.1016/j.jcp.2016.12.024\n\n"
diff --git a/src/EXTRA-FIX/fix_gle.cpp b/src/EXTRA-FIX/fix_gle.cpp
index c5a6c974f6..a65b60377e 100644
--- a/src/EXTRA-FIX/fix_gle.cpp
+++ b/src/EXTRA-FIX/fix_gle.cpp
@@ -41,7 +41,7 @@ enum{CONSTANT,EQUAL,ATOM};
 
 //#define GLE_DEBUG 1
 
-#define MAXLINE 1024
+static constexpr int MAXLINE = 1024;
 
 /* syntax for fix_gle:
  * fix nfix id-group gle ns Tstart Tstop seed amatrix [noneq cmatrix] [every nmts]
diff --git a/src/EXTRA-FIX/fix_nonaffine_displacement.cpp b/src/EXTRA-FIX/fix_nonaffine_displacement.cpp
new file mode 100644
index 0000000000..a426a8fb55
--- /dev/null
+++ b/src/EXTRA-FIX/fix_nonaffine_displacement.cpp
@@ -0,0 +1,734 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Joel Clemmer (SNL), Ishan Srivastava (LBNL)
+------------------------------------------------------------------------- */
+
+#include "fix_nonaffine_displacement.h"
+
+#include "atom.h"
+#include "citeme.h"
+#include "comm.h"
+#include "domain.h"
+#include "error.h"
+#include "fix_store_atom.h"
+#include "force.h"
+#include "group.h"
+#include "math_extra.h"
+#include "memory.h"
+#include "modify.h"
+#include "neigh_list.h"
+#include "neigh_request.h"
+#include "neighbor.h"
+#include "pair.h"
+#include "update.h"
+
+#include <cstring>
+
+using namespace LAMMPS_NS;
+using namespace FixConst;
+using namespace MathExtra;
+
+enum { TYPE, RADIUS, CUSTOM };
+enum { INTEGRATED, D2MIN };
+enum { FIXED, OFFSET, UPDATE };
+
+static const char cite_nonaffine_d2min[] =
+  "@article{PhysRevE.57.7192,\n"
+  " title = {Dynamics of viscoplastic deformation in amorphous solids},\n"
+  " author = {Falk, M. L. and Langer, J. S.},\n"
+  " journal = {Phys. Rev. E},\n"
+  " volume = {57},\n"
+  " issue = {6},\n"
+  " pages = {7192--7205},\n"
+  " numpages = {0},\n"
+  " year = {1998},\n"
+  " month = {Jun},\n"
+  " publisher = {American Physical Society},\n"
+  " doi = {10.1103/PhysRevE.57.7192},\n"
+  "url = {https://link.aps.org/doi/10.1103/PhysRevE.57.7192}\n"
+  "}\n\n";
+
+/* ---------------------------------------------------------------------- */
+
+FixNonaffineDisplacement::FixNonaffineDisplacement(LAMMPS *lmp, int narg, char **arg) :
+  Fix(lmp, narg, arg), id_fix(nullptr), X(nullptr), Y(nullptr), F(nullptr), norm(nullptr)
+{
+  if (narg < 4) error->all(FLERR,"Illegal fix nonaffine/displacement command");
+
+  nevery = utils::inumeric(FLERR, arg[3], false, lmp);
+  if (nevery <= 0) error->all(FLERR,"Illegal nevery value {} in fix nonaffine/displacement", nevery);
+
+  reference_timestep = update_timestep = offset_timestep = -1;
+  int iarg = 4;
+  if (strcmp(arg[iarg], "integrated") == 0) {
+    nad_style = INTEGRATED;
+    nevery = 1;
+    iarg += 1;
+  } else if (strcmp(arg[iarg], "d2min") == 0) {
+    if (iarg + 1 > narg) error->all(FLERR,"Illegal fix nonaffine/displacement command");
+    nad_style = D2MIN;
+    if (strcmp(arg[iarg + 1], "type") == 0) {
+      cut_style = TYPE;
+    } else if (strcmp(arg[iarg + 1], "radius") == 0) {
+      cut_style = RADIUS;
+    } else if (strcmp(arg[iarg + 1], "custom") == 0) {
+      if (iarg + 2 > narg) error->all(FLERR,"Illegal fix nonaffine/displacement command");
+      cut_style = CUSTOM;
+      cutoff_custom = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
+      cutsq_custom = cutoff_custom * cutoff_custom;
+      if (cutoff_custom <= 0)
+        error->all(FLERR, "Illegal custom cutoff length {}", arg[iarg + 2]);
+      iarg += 1;
+    } else error->all(FLERR,"Illegal cutoff style {} in fix nonaffine/displacement", arg[iarg + 1]);
+    iarg += 2;
+  } else error->all(FLERR,"Illegal nonaffine displacement style {} in fix nonaffine/displacement", arg[iarg]);
+
+  if (iarg + 2 > narg) error->all(FLERR,"Illegal fix nonaffine/displacement command");
+  if (strcmp(arg[iarg], "fixed") == 0) {
+    reference_style = FIXED;
+    reference_timestep = utils::inumeric(FLERR, arg[iarg + 1], false, lmp);
+    if (reference_timestep < 0)
+      error->all(FLERR, "Illegal reference timestep {} in fix nonaffine/displacement", arg[iarg + 1]);
+  } else if (strcmp(arg[iarg], "update") == 0) {
+    reference_style = UPDATE;
+    update_timestep = utils::inumeric(FLERR, arg[iarg + 1], false, lmp);
+    if (update_timestep < 0)
+      error->all(FLERR, "Illegal update timestep {} in fix nonaffine/displacement", arg[iarg + 1]);
+  } else if (strcmp(arg[iarg], "offset") == 0) {
+    reference_style = OFFSET;
+    offset_timestep = utils::inumeric(FLERR, arg[iarg + 1], false, lmp);
+    if ((offset_timestep <= 0) || (offset_timestep > nevery))
+      error->all(FLERR, "Illegal offset timestep {} in fix nonaffine/displacement", arg[iarg + 1]);
+  } else error->all(FLERR,"Illegal reference style {} in fix nonaffine/displacement", arg[iarg]);
+
+  if (nad_style == D2MIN)
+    if (cut_style == RADIUS && (!atom->radius_flag))
+      error->all(FLERR, "Fix nonaffine/displacement radius style requires atom attribute radius");
+
+  if (nad_style == INTEGRATED && reference_style == OFFSET)
+    error->all(FLERR, "Fix nonaffine/displacement cannot use the integrated style with an offset reference state");
+
+  peratom_flag = 1;
+  peratom_freq = nevery;
+  nmax = -1;
+  reference_saved = 0;
+  restart_global = 1;
+
+  size_peratom_cols = 3;
+  comm_reverse = 0;
+  comm_forward = 0;
+  if (nad_style == D2MIN) {
+    comm_reverse = 18;
+    comm_forward = 9;
+  }
+
+  if (nad_style == D2MIN && lmp->citeme) lmp->citeme->add(cite_nonaffine_d2min);
+}
+
+/* ---------------------------------------------------------------------- */
+
+FixNonaffineDisplacement::~FixNonaffineDisplacement()
+{
+  if (id_fix && modify->nfix) modify->delete_fix(id_fix);
+  delete[] id_fix;
+
+  if (nad_style == D2MIN) {
+    memory->destroy(X);
+    memory->destroy(Y);
+    memory->destroy(F);
+    memory->destroy(norm);
+    memory->destroy(array_atom);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int FixNonaffineDisplacement::setmask()
+{
+  int mask = 0;
+  mask |= POST_FORCE;
+  return mask;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixNonaffineDisplacement::post_constructor()
+{
+  // Create persistent peratom storage for either an integrated velocity or reference position
+  // Ghost atoms need reference coordinates for D2min
+  std::string ghost_status = "0";
+  if (nad_style == D2MIN) ghost_status = "1";
+
+  id_fix = utils::strdup(id + std::string("_FIX_PA"));
+  fix = dynamic_cast<FixStoreAtom *>(modify->add_fix(fmt::format("{} {} STORE/ATOM 3 0 {} 1", id_fix, group->names[igroup], ghost_status)));
+
+  if (nad_style == INTEGRATED)
+    array_atom = fix->astore;
+
+  if (nad_style == D2MIN)
+    grow_arrays(atom->nmax);
+
+  for (int i = 0; i < atom->nlocal; i++)
+    for (int j = 0; j < 3; j++) array_atom[i][j] = 0.0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixNonaffineDisplacement::init()
+{
+  dtv = update->dt;
+
+  if ((!reference_saved) && (reference_style == FIXED) && (update->ntimestep > reference_timestep))
+    error->all(FLERR, "Initial timestep exceeds that of the reference state in fix nonaffine/displacement");
+
+  if (nad_style == D2MIN) {
+    if ((!force->pair) && (cut_style == TYPE))
+    error->all(FLERR,"Fix nonaffine/displacement D2Min option requires a pair style be defined "
+               "or cutoff specified");
+
+    // need an occasional half neighbor list
+
+    if (cut_style == RADIUS) {
+      neighbor->add_request(this, NeighConst::REQ_SIZE | NeighConst::REQ_OCCASIONAL);
+    } else {
+      auto req = neighbor->add_request(this, NeighConst::REQ_OCCASIONAL);
+      if (cut_style == CUSTOM) {
+        double skin = neighbor->skin;
+        mycutneigh = cutoff_custom + skin;
+
+        double cutghost;            // as computed by Neighbor and Comm
+        if (force->pair)
+          cutghost = MAX(force->pair->cutforce + skin, comm->cutghostuser);
+        else
+          cutghost = comm->cutghostuser;
+
+        if (mycutneigh > cutghost)
+          error->all(FLERR,"Fix nonaffine/displacement D2Min option cutoff exceeds ghost atom range - use comm_modify cutoff command");
+
+        req->set_cutoff(mycutneigh);
+      }
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixNonaffineDisplacement::init_list(int /*id*/, NeighList *ptr)
+{
+  list = ptr;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixNonaffineDisplacement::setup(int /*vflag*/)
+{
+  post_force(0); // Save state if needed before starting the 1st timestep
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixNonaffineDisplacement::post_force(int /*vflag*/)
+{
+  if (reference_saved && (!update->setupflag)) {
+    if (nad_style == INTEGRATED) {
+      integrate_velocity();
+    } else {
+      if ((update->ntimestep % nevery) == 0) calculate_D2Min();
+    }
+  }
+
+  if (reference_style == FIXED)
+    if (update->ntimestep == reference_timestep)
+      save_reference_state();
+
+  if (reference_style == UPDATE)
+    if ((update->ntimestep % update_timestep) == 0)
+      save_reference_state();
+
+  if (reference_style == OFFSET)
+    if (((update->ntimestep + offset_timestep) % nevery) == 0)
+      save_reference_state();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixNonaffineDisplacement::write_restart(FILE *fp)
+{
+  if (comm->me == 0) {
+    int size = sizeof(int);
+    fwrite(&size, sizeof(int), 1, fp);
+    fwrite(&reference_saved, sizeof(int), 1, fp);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixNonaffineDisplacement::restart(char *buf)
+{
+  reference_saved = (int) ubuf(buf[0]).i;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixNonaffineDisplacement::integrate_velocity()
+{
+  dtv = update->dt;
+
+  double **v = atom->v;
+
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+
+  for (int m = 0; m < 3; m++) {
+    for (int i = 0; i < nlocal; i++) {
+      if (mask[i] & groupbit) {
+        array_atom[i][m] += dtv * v[i][m];
+      }
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixNonaffineDisplacement::save_reference_state()
+{
+  double **x = atom->x;
+
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  int nall = nlocal + atom->nghost;
+
+  if (nad_style == D2MIN) {
+    for (int m = 0; m < 3; m++) {
+      for (int i = 0; i < nall; i++) {
+        if (mask[i] & groupbit)  array_atom[i][m] = x[i][m];
+      }
+    }
+  } else {
+    for (int m = 0; m < 3; m++) {
+      for (int i = 0; i < nall; i++) {
+        if (mask[i] & groupbit)  array_atom[i][m] = 0.0;
+      }
+    }
+  }
+
+  if (nad_style == D2MIN) {
+    xprd0 = domain->xprd;
+    yprd0 = domain->yprd;
+    zprd0 = domain->zprd;
+    xprd0_half = domain->xprd_half;
+    yprd0_half = domain->yprd_half;
+    zprd0_half = domain->zprd_half;
+    xy0 = domain->xy;
+    xz0 = domain->xz;
+    yz0 = domain->yz;
+  }
+
+  reference_saved = 1;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixNonaffineDisplacement::calculate_D2Min()
+{
+  // invoke half neighbor list (will copy or build if necessary)
+  neighbor->build_one(list);
+
+  if (atom->nmax > nmax)
+    grow_arrays(atom->nmax);
+
+  int i, j, k, l, ii, jj, inum, jnum, itype, jtype;
+  double evol, j2, edev;
+  double r[3], r0[3], rsq, radsum, temp[3];
+  double X_tmp[3][3], Y_tmp[3][3], F_tmp[3][3], E[3][3];
+  double Y_inv[3][3] = {{0.0,0.0,0.0},{0.0,0.0,0.0},{0.0,0.0,0.0}}; // Zero for 2d since not all entries used
+  int *ilist, *jlist, *numneigh, **firstneigh;
+
+  double **x = atom->x;
+  double **x0 = array_atom;
+  double *radius = atom->radius;
+  int *type = atom->type;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  int newton_pair = force->newton_pair;
+  int dim = domain->dimension;
+
+  inum = list->inum;
+  ilist = list->ilist;
+  numneigh = list->numneigh;
+  firstneigh = list->firstneigh;
+
+  Pair *pair = force->pair;
+  double **cutsq;
+  if (pair) cutsq = force->pair->cutsq;
+
+  for (i = 0; i < nmax; i++) {
+    for (k = 0; k < 3; k++) {
+      for (l = 0; l < 3; l++) {
+        X[i][k][l] = 0.0;
+        Y[i][k][l] = 0.0;
+      }
+    }
+    norm[i] = 0;
+    array_atom[i][0] = 0;
+  }
+
+  // First loop through neighbors
+  for (ii = 0; ii < inum; ii++) {
+    i = ilist[ii];
+    if (!(mask[i] & groupbit)) continue;
+
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+
+      if (!(mask[j] & groupbit)) continue;
+
+      jtype = type[j];
+      r[0] = x[i][0] - x[j][0];
+      r[1] = x[i][1] - x[j][1];
+      r[2] = x[i][2] - x[j][2];
+      rsq = lensq3(r);
+
+      // Only include contributions from atoms that are CURRENTLY neighbors
+      if (cut_style == TYPE) {
+        if (rsq > cutsq[itype][jtype]) continue;
+      } else if (cut_style == CUSTOM) {
+        if (rsq > cutsq_custom) continue;
+      } else {
+        radsum = radius[i] + radius[j];
+        if (rsq > (radsum * radsum)) continue;
+      }
+
+      r0[0] = x0[i][0] - x0[j][0];
+      r0[1] = x0[i][1] - x0[j][1];
+      r0[2] = x0[i][2] - x0[j][2];
+      minimum_image0(r0);
+
+      // Using notation from Falk & Langer 1998
+      outer3(r, r0, X_tmp);
+      outer3(r0, r0, Y_tmp);
+
+      for (k = 0; k < 3; k++) {
+        for (l = 0; l < 3; l++) {
+          X[i][k][l] += X_tmp[k][l];
+          Y[i][k][l] += Y_tmp[k][l];
+        }
+      }
+
+      if (newton_pair || j < nlocal) {
+        for (k = 0; k < 3; k++) {
+          for (l = 0; l < 3; l++) {
+            X[j][k][l] += X_tmp[k][l];
+            Y[j][k][l] += Y_tmp[k][l];
+          }
+        }
+      }
+    }
+  }
+
+  comm_flag = 0;
+  if (newton_pair) comm->reverse_comm(this, 18);
+
+  // Calculate contributions to strain tensor
+  double denom;
+  for (i = 0; i < nlocal; i++) {
+    if (!(mask[i] & groupbit)) continue;
+    for (j = 0; j < 3; j++) {
+      for (k = 0; k < 3; k++) {
+        Y_tmp[j][k] = Y[i][j][k];
+        X_tmp[j][k] = X[i][j][k];
+      }
+    }
+
+    if (dim == 3) {
+      invert3(Y_tmp, Y_inv);
+    } else {
+      denom = Y_tmp[0][0] * Y_tmp[1][1] - Y_tmp[0][1] * Y_tmp[1][0];
+      if (denom != 0.0) denom = 1.0 / denom;
+      Y_inv[0][0] = Y_tmp[1][1] * denom;
+      Y_inv[0][1] = -Y_tmp[0][1] * denom;
+      Y_inv[1][0] = -Y_tmp[1][0] * denom;
+      Y_inv[1][1] = Y_tmp[0][0] * denom;
+    }
+
+    times3(X_tmp, Y_inv, F_tmp);
+
+    for (j = 0; j < 3; j++) {
+      for (k = 0; k < 3; k++) {
+        F[i][j][k] = F_tmp[j][k];
+      }
+    }
+  }
+
+  comm->forward_comm(this);
+
+  // Second loop through neighbors
+  for (ii = 0; ii < inum; ii++) {
+    i = ilist[ii];
+    if (!(mask[i] & groupbit)) continue;
+
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      j &= NEIGHMASK;
+
+      if (!(mask[j] & groupbit)) continue;
+
+      jtype = type[j];
+      r[0] = x[i][0] - x[j][0];
+      r[1] = x[i][1] - x[j][1];
+      r[2] = x[i][2] - x[j][2];
+      rsq = lensq3(r);
+
+      // Only include contributions from atoms that are CURRENTLY neighbors
+      if (cut_style == TYPE) {
+        if (rsq >= cutsq[itype][jtype]) continue;
+      } else if (cut_style == CUSTOM) {
+        if (rsq >= cutsq_custom) continue;
+      } else {
+        radsum = radius[i] + radius[j];
+        if (rsq >= radsum * radsum) continue;
+      }
+
+      r0[0] = x0[i][0] - x0[j][0];
+      r0[1] = x0[i][1] - x0[j][1];
+      r0[2] = x0[i][2] - x0[j][2];
+      minimum_image0(r0);
+
+      // E * r0
+      for (k = 0; k < 3; k++) {
+        temp[k] = 0.0;
+        for (l = 0; l < 3; l++)
+          temp[k] += F[i][k][l] * r0[l];
+      }
+
+      sub3(r, temp, temp);
+      array_atom[i][0] += lensq3(temp);
+      norm[i] += 1;
+
+      if (newton_pair || j < nlocal) {
+        for (k = 0; k < 3; k++) {
+          temp[k] = 0.0;
+          for (l = 0; l < 3; l++)
+            temp[k] += F[j][k][l] * r0[l];
+        }
+
+        sub3(r, temp, temp);
+        array_atom[j][0] += lensq3(temp);
+        norm[j] += 1;
+      }
+    }
+  }
+
+  comm_flag = 1;
+  if (newton_pair) comm->reverse_comm(this, 2);
+
+  for (i = 0; i < nlocal; i++) {
+    if (!(mask[i] & groupbit)) continue;
+
+    if (norm[i] != 0)
+      array_atom[i][0] /= norm[i];
+    else
+      array_atom[i][0] = 0.0;
+    array_atom[i][0] = sqrt(array_atom[i][0]);
+
+    for (j = 0; j < 3; j++)
+      for (k = 0; k < 3; k++)
+        F_tmp[j][k] = F[i][j][k];
+
+    transpose_times3(F_tmp, F_tmp, E);
+    for (j = 0; j < dim; j++) E[j][j] -= 1.0;
+
+    evol = (E[0][0] + E[1][1] + E[2][2]) / dim;
+
+    // Calculate deviatoric strain
+    for (j = 0; j < dim; j++) E[j][j] -= evol;
+    j2 = 0.0;
+    for (j = 0; j < 3; j++)
+      for (k = 0; k < 3; k++)
+        j2 += E[j][k] * E[j][k];
+
+    edev = sqrt(0.5 * j2);
+
+    array_atom[i][1] = evol;
+    array_atom[i][2] = edev;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int FixNonaffineDisplacement::pack_reverse_comm(int n, int first, double *buf)
+{
+  int i, m, last, k, l;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    if (comm_flag == 0) {
+      for (k = 0; k < 3; k++) {
+        for (l = 0; l < 3; l++) {
+          buf[m++] = X[i][k][l];
+          buf[m++] = Y[i][k][l];
+        }
+      }
+    } else {
+      buf[m++] = array_atom[i][0];
+      buf[m++] = ubuf(norm[i]).d;
+    }
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixNonaffineDisplacement::unpack_reverse_comm(int n, int *list, double *buf)
+{
+  int i, j, m, k, l;
+
+  m = 0;
+  for (i = 0; i < n; i++) {
+    j = list[i];
+    if (comm_flag == 0) {
+      for (k = 0; k < 3; k++) {
+        for (l = 0; l < 3; l++) {
+          X[j][k][l] += buf[m++];
+          Y[j][k][l] += buf[m++];
+        }
+      }
+    } else {
+      array_atom[j][0] += buf[m++];
+      norm[j] += (int) ubuf(buf[m++]).i;
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int FixNonaffineDisplacement::pack_forward_comm(int n, int *list, double *buf,
+                                          int /*pbc_flag*/, int * /*pbc*/)
+{
+  int i, j, m, k, l;
+
+  m = 0;
+  for (i = 0; i < n; i++) {
+    j = list[i];
+    for (k = 0; k < 3; k++) {
+      for (l = 0; l < 3; l ++) {
+        buf[m++] = F[j][k][l];
+      }
+    }
+  }
+
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixNonaffineDisplacement::unpack_forward_comm(int n, int first, double *buf)
+{
+  int i, m, last, k, l;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) {
+    for (k = 0; k < 3; k++) {
+      for (l = 0; l < 3; l ++) {
+        F[i][k][l] = buf[m++];
+      }
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixNonaffineDisplacement::minimum_image0(double *delta)
+{
+  if (domain->triclinic == 0) {
+    if (domain->xperiodic) {
+      while (fabs(delta[0]) > xprd0_half) {
+        if (delta[0] < 0.0) delta[0] += xprd0;
+        else delta[0] -= xprd0;
+      }
+    }
+    if (domain->yperiodic) {
+      while (fabs(delta[1]) > yprd0_half) {
+        if (delta[1] < 0.0) delta[1] += yprd0;
+        else delta[1] -= yprd0;
+      }
+    }
+    if (domain->zperiodic) {
+      while (fabs(delta[2]) > zprd0_half) {
+        if (delta[2] < 0.0) delta[2] += zprd0;
+        else delta[2] -= zprd0;
+      }
+    }
+
+  } else {
+    if (domain->zperiodic) {
+      while (fabs(delta[2]) > zprd0_half) {
+        if (delta[2] < 0.0) {
+          delta[2] += zprd0;
+          delta[1] += yz0;
+          delta[0] += xz0;
+        } else {
+          delta[2] -= zprd0;
+          delta[1] -= yz0;
+          delta[0] -= xz0;
+        }
+      }
+    }
+    if (domain->yperiodic) {
+      while (fabs(delta[1]) > yprd0_half) {
+        if (delta[1] < 0.0) {
+          delta[1] += yprd0;
+          delta[0] += xy0;
+        } else {
+          delta[1] -= yprd0;
+          delta[0] -= xy0;
+        }
+      }
+    }
+    if (domain->xperiodic) {
+      while (fabs(delta[0]) > xprd0_half) {
+        if (delta[0] < 0.0) delta[0] += xprd0;
+        else delta[0] -= xprd0;
+      }
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixNonaffineDisplacement::grow_arrays(int nmax_new)
+{
+  nmax = nmax_new;
+  memory->destroy(X);
+  memory->destroy(Y);
+  memory->destroy(F);
+  memory->destroy(norm);
+  memory->create(X, nmax, 3, 3, "fix_nonaffine_displacement:X");
+  memory->create(Y, nmax, 3, 3, "fix_nonaffine_displacement:Y");
+  memory->create(F, nmax, 3, 3, "fix_nonaffine_displacement:F");
+  memory->create(norm, nmax, "fix_nonaffine_displacement:norm");
+}
diff --git a/src/EXTRA-FIX/fix_nonaffine_displacement.h b/src/EXTRA-FIX/fix_nonaffine_displacement.h
new file mode 100644
index 0000000000..3341ab1834
--- /dev/null
+++ b/src/EXTRA-FIX/fix_nonaffine_displacement.h
@@ -0,0 +1,71 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+// clang-format off
+FixStyle(nonaffine/displacement,FixNonaffineDisplacement)
+// clang-format on
+#else
+
+#ifndef LMP_FIX_NONAFFINE_DISPLACEMENT_H
+#define LMP_FIX_NONAFFINE_DISPLACEMENT_H
+
+#include "fix.h"
+
+namespace LAMMPS_NS {
+
+class FixNonaffineDisplacement : public Fix {
+ public:
+  FixNonaffineDisplacement(class LAMMPS *, int, char **);
+  ~FixNonaffineDisplacement() override;
+  int setmask() override;
+  void post_constructor() override;
+  void init() override;
+  void init_list(int, class NeighList *) override;
+  void setup(int) override;
+  void post_force(int) override;
+  void write_restart(FILE *fp) override;
+  void restart(char *buf) override;
+  int pack_forward_comm(int, int *, double *, int, int *) override;
+  void unpack_forward_comm(int, int, double *) override;
+  int pack_reverse_comm(int, int, double *) override;
+  void unpack_reverse_comm(int, int *, double *) override;
+
+ private:
+  double dtv;
+  char *id_fix;
+  class FixStoreAtom *fix;
+  int nmax, comm_flag;
+  int nad_style, cut_style;
+  int reference_style, offset_timestep, reference_timestep, update_timestep;
+  int reference_saved;
+  double cutoff_custom, cutsq_custom, mycutneigh;
+  double xprd0, yprd0, zprd0, xprd0_half, yprd0_half, zprd0_half, xy0, xz0, yz0;
+
+  double ***X, ***Y, ***F;
+  int *norm;
+
+  class NeighList *list;    // half neighbor list
+
+
+  void integrate_velocity();
+  void calculate_D2Min();
+  void save_reference_state();
+  void minimum_image0(double *);
+  void grow_arrays(int) override;
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/EXTRA-FIX/fix_npt_cauchy.cpp b/src/EXTRA-FIX/fix_npt_cauchy.cpp
index f3dfd1af36..8eb6a80b6d 100644
--- a/src/EXTRA-FIX/fix_npt_cauchy.cpp
+++ b/src/EXTRA-FIX/fix_npt_cauchy.cpp
@@ -42,8 +42,8 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-#define DELTAFLIP 0.1
-#define TILTMAX 1.5
+static constexpr double DELTAFLIP = 0.1;
+static constexpr double TILTMAX = 1.5;
 
 enum{NOBIAS,BIAS};
 enum{NONE,XYZ,XY,YZ,XZ};
@@ -91,8 +91,6 @@ FixNPTCauchy::FixNPTCauchy(LAMMPS *lmp, int narg, char **arg) :
   omega_mass_flag = 0;
   etap_mass_flag = 0;
   flipflag = 1;
-  dipole_flag = 0;
-  dlm_flag = 0;
 
   tcomputeflag = 0;
   pcomputeflag = 0;
@@ -327,14 +325,6 @@ FixNPTCauchy::FixNPTCauchy(LAMMPS *lmp, int narg, char **arg) :
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix npt/cauchy command");
       flipflag = utils::logical(FLERR,arg[iarg+1],false,lmp);
       iarg += 2;
-    } else if (strcmp(arg[iarg],"update") == 0) {
-      if (iarg+2 > narg) error->all(FLERR,"Illegal fix npt/cauchy command");
-      if (strcmp(arg[iarg+1],"dipole") == 0) dipole_flag = 1;
-      else if (strcmp(arg[iarg+1],"dipole/dlm") == 0) {
-        dipole_flag = 1;
-        dlm_flag = 1;
-      } else error->all(FLERR,"Illegal fix npt/cauchy command");
-      iarg += 2;
     } else if (strcmp(arg[iarg],"alpha") == 0) {
       alpha = utils::numeric(FLERR,arg[iarg+1],false,lmp);
       iarg += 2;
@@ -349,20 +339,6 @@ FixNPTCauchy::FixNPTCauchy(LAMMPS *lmp, int narg, char **arg) :
       fixedpoint[2] = utils::numeric(FLERR,arg[iarg+3],false,lmp);
       iarg += 4;
 
-    // disc keyword is also parsed in fix/nh/sphere
-
-    } else if (strcmp(arg[iarg],"disc") == 0) {
-      iarg++;
-
-    // keywords erate, strain, and ext are also parsed in fix/nh/uef
-
-    } else if (strcmp(arg[iarg],"erate") == 0) {
-      iarg += 3;
-    } else if (strcmp(arg[iarg],"strain") == 0) {
-      iarg += 3;
-    } else if (strcmp(arg[iarg],"ext") == 0) {
-      iarg += 2;
-
     } else error->all(FLERR,"Illegal fix npt/cauchy command");
   }
 
@@ -453,13 +429,6 @@ FixNPTCauchy::FixNPTCauchy(LAMMPS *lmp, int narg, char **arg) :
        p_period[0] != p_period[2]))
     error->all(FLERR,"Invalid fix npt/cauchy pressure settings");
 
-  if (dipole_flag) {
-    if (!atom->sphere_flag)
-      error->all(FLERR,"Using update dipole flag requires atom style sphere");
-    if (!atom->mu_flag)
-      error->all(FLERR,"Using update dipole flag requires atom attribute mu");
-  }
-
   if ((tstat_flag && t_period <= 0.0) ||
       (p_flag[0] && p_period[0] <= 0.0) ||
       (p_flag[1] && p_period[1] <= 0.0) ||
diff --git a/src/EXTRA-FIX/fix_npt_cauchy.h b/src/EXTRA-FIX/fix_npt_cauchy.h
index 43a944acb4..4a738e48ab 100644
--- a/src/EXTRA-FIX/fix_npt_cauchy.h
+++ b/src/EXTRA-FIX/fix_npt_cauchy.h
@@ -117,8 +117,6 @@ class FixNPTCauchy : public Fix {
   int eta_mass_flag;      // 1 if eta_mass updated, 0 if not.
   int omega_mass_flag;    // 1 if omega_mass updated, 0 if not.
   int etap_mass_flag;     // 1 if etap_mass updated, 0 if not.
-  int dipole_flag;        // 1 if dipole is updated, 0 if not.
-  int dlm_flag;           // 1 if using the DLM rotational integrator, 0 if not
 
   int scaleyz;     // 1 if yz scaled with lz
   int scalexz;     // 1 if xz scaled with lz
diff --git a/src/EXTRA-FIX/fix_smd.cpp b/src/EXTRA-FIX/fix_smd.cpp
index e19a605e71..a9dbfcc2d1 100644
--- a/src/EXTRA-FIX/fix_smd.cpp
+++ b/src/EXTRA-FIX/fix_smd.cpp
@@ -38,7 +38,7 @@ enum { SMD_NONE=0,
        SMD_CVEL=1<<2, SMD_CFOR=1<<3,
        SMD_AUTOX=1<<4, SMD_AUTOY=1<<5, SMD_AUTOZ=1<<6};
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/EXTRA-FIX/fix_tmd.cpp b/src/EXTRA-FIX/fix_tmd.cpp
index e88007a29d..401ed41573 100644
--- a/src/EXTRA-FIX/fix_tmd.cpp
+++ b/src/EXTRA-FIX/fix_tmd.cpp
@@ -36,8 +36,8 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-#define CHUNK 1000
-#define MAXLINE 256
+static constexpr int CHUNK = 1000;
+static constexpr int MAXLINE = 256;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/EXTRA-FIX/fix_viscosity.cpp b/src/EXTRA-FIX/fix_viscosity.cpp
index a44050636d..715c30afdd 100644
--- a/src/EXTRA-FIX/fix_viscosity.cpp
+++ b/src/EXTRA-FIX/fix_viscosity.cpp
@@ -32,7 +32,7 @@ using namespace FixConst;
 
 // needs to be big, but not so big that lose precision when subtract velocity
 
-#define BIG 1.0e10
+static constexpr double BIG = 1.0e10;
 
 /* ---------------------------------------------------------------------- */
 
@@ -120,14 +120,14 @@ int FixViscosity::setmask()
 
 void FixViscosity::init()
 {
-  // warn if any fix ave/spatial comes after this fix
+  // warn if any fix ave/chunk comes after this fix
   // can cause glitch in averaging since ave will happen after swap
 
   int foundme = 0;
-  for (int i = 0; i < modify->nfix; i++) {
-    if (modify->fix[i] == this) foundme = 1;
-    if (foundme && strcmp(modify->fix[i]->style,"ave/spatial") == 0 && me == 0)
-      error->warning(FLERR,"Fix viscosity comes before fix ave/spatial");
+  for (const auto &ifix : modify->get_fix_list()) {
+    if (ifix == this) foundme = 1;
+    if (foundme && utils::strmatch(ifix->style,"^ave/chunk") && (me == 0))
+      error->warning(FLERR,"Fix viscosity comes before fix ave/chunk");
   }
 
   // set bounds of 2 slabs in pdim
diff --git a/src/EXTRA-FIX/fix_viscous_sphere.cpp b/src/EXTRA-FIX/fix_viscous_sphere.cpp
index 5b4dd72231..0eda323c15 100644
--- a/src/EXTRA-FIX/fix_viscous_sphere.cpp
+++ b/src/EXTRA-FIX/fix_viscous_sphere.cpp
@@ -38,7 +38,7 @@ FixViscousSphere::FixViscousSphere(LAMMPS *_lmp, int narg, char **arg) :
 {
   dynamic_group_allow = 1;
 
-  if (!atom->sphere_flag) error->all(FLERR, "Fix viscous/sphere requires atom style sphere");
+  if (!atom->omega_flag) error->all(FLERR, "Fix viscous/sphere requires atom attribute omega");
 
   if (narg < 4) error->all(FLERR, "Illegal fix viscous/sphere command");
 
diff --git a/src/EXTRA-MOLECULE/angle_cosine_delta.cpp b/src/EXTRA-MOLECULE/angle_cosine_delta.cpp
index 71acca6001..51a53e1a96 100644
--- a/src/EXTRA-MOLECULE/angle_cosine_delta.cpp
+++ b/src/EXTRA-MOLECULE/angle_cosine_delta.cpp
@@ -25,7 +25,7 @@
 
 using namespace LAMMPS_NS;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/EXTRA-MOLECULE/angle_cosine_periodic.cpp b/src/EXTRA-MOLECULE/angle_cosine_periodic.cpp
index 15d0575f6d..0b2a6d336d 100644
--- a/src/EXTRA-MOLECULE/angle_cosine_periodic.cpp
+++ b/src/EXTRA-MOLECULE/angle_cosine_periodic.cpp
@@ -34,11 +34,14 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
-AngleCosinePeriodic::AngleCosinePeriodic(LAMMPS *lmp) : Angle(lmp) {}
+AngleCosinePeriodic::AngleCosinePeriodic(LAMMPS *lmp) : Angle(lmp)
+{
+  born_matrix_enable = 1;
+}
 
 /* ---------------------------------------------------------------------- */
 
@@ -117,7 +120,7 @@ void AngleCosinePeriodic::compute(int eflag, int vflag)
     tn = 1.0;
     tn_1 = 1.0;
     tn_2 = 0.0;
-    un = 1.0;
+    un = (m==1) ? 2.0 : 1.0;
     un_1 = 2.0;
     un_2 = 0.0;
 
@@ -298,3 +301,38 @@ double AngleCosinePeriodic::single(int type, int i1, int i2, int i3)
   c = cos(acos(c)*multiplicity[type]);
   return 2.0*k[type]*(1.0-b[type]*powsign(multiplicity[type])*c);
 }
+
+/* ---------------------------------------------------------------------- */
+
+void AngleCosinePeriodic::born_matrix(int type, int i1, int i2, int i3, double &du, double &du2)
+{
+  double **x = atom->x;
+
+  double delx1 = x[i1][0] - x[i2][0];
+  double dely1 = x[i1][1] - x[i2][1];
+  double delz1 = x[i1][2] - x[i2][2];
+  domain->minimum_image(delx1,dely1,delz1);
+  double r1 = sqrt(delx1*delx1 + dely1*dely1 + delz1*delz1);
+
+  double delx2 = x[i3][0] - x[i2][0];
+  double dely2 = x[i3][1] - x[i2][1];
+  double delz2 = x[i3][2] - x[i2][2];
+  domain->minimum_image(delx2,dely2,delz2);
+  double r2 = sqrt(delx2*delx2 + dely2*dely2 + delz2*delz2);
+
+  double c = delx1*delx2 + dely1*dely2 + delz1*delz2;
+  c /= r1*r2;
+  if (c > 1.0) c = 1.0;
+  if (c < -1.0) c = -1.0;
+  double theta = acos(c);
+
+  double s = sqrt(1.0 - c*c);
+  if (s < SMALL) s = SMALL;
+  s = 1.0/s;
+
+  double m_angle = multiplicity[type] * theta;
+  double prefactor = -2.0 * k[type] * b[type] * powsign(multiplicity[type]) * multiplicity[type];
+
+  du = prefactor * sin(m_angle) / s;
+  du2 = prefactor * (c * sin(m_angle) - s * cos(m_angle) * multiplicity[type]) / (s * s * s);
+}
diff --git a/src/EXTRA-MOLECULE/angle_cosine_periodic.h b/src/EXTRA-MOLECULE/angle_cosine_periodic.h
index 4e584b4543..f04ed04784 100644
--- a/src/EXTRA-MOLECULE/angle_cosine_periodic.h
+++ b/src/EXTRA-MOLECULE/angle_cosine_periodic.h
@@ -35,6 +35,7 @@ class AngleCosinePeriodic : public Angle {
   void read_restart(FILE *) override;
   void write_data(FILE *) override;
   double single(int, int, int, int) override;
+  void born_matrix(int type, int i1, int i2, int i3, double &du, double &du2) override;
 
  protected:
   double *k;
diff --git a/src/EXTRA-MOLECULE/angle_cosine_shift.cpp b/src/EXTRA-MOLECULE/angle_cosine_shift.cpp
index ce9b4c4133..53ecb35eaf 100644
--- a/src/EXTRA-MOLECULE/angle_cosine_shift.cpp
+++ b/src/EXTRA-MOLECULE/angle_cosine_shift.cpp
@@ -32,7 +32,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/EXTRA-MOLECULE/angle_cosine_shift_exp.cpp b/src/EXTRA-MOLECULE/angle_cosine_shift_exp.cpp
index a411ea3199..acca92c48e 100644
--- a/src/EXTRA-MOLECULE/angle_cosine_shift_exp.cpp
+++ b/src/EXTRA-MOLECULE/angle_cosine_shift_exp.cpp
@@ -32,7 +32,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/EXTRA-MOLECULE/angle_fourier.cpp b/src/EXTRA-MOLECULE/angle_fourier.cpp
index 549da0c196..2c458c07cc 100644
--- a/src/EXTRA-MOLECULE/angle_fourier.cpp
+++ b/src/EXTRA-MOLECULE/angle_fourier.cpp
@@ -33,12 +33,13 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
 AngleFourier::AngleFourier(LAMMPS *lmp) : Angle(lmp)
 {
+  born_matrix_enable = 1;
   k = nullptr;
   C0 = nullptr;
   C1 = nullptr;
diff --git a/src/EXTRA-MOLECULE/angle_fourier_simple.cpp b/src/EXTRA-MOLECULE/angle_fourier_simple.cpp
index 0ba890b273..6de7956ffa 100644
--- a/src/EXTRA-MOLECULE/angle_fourier_simple.cpp
+++ b/src/EXTRA-MOLECULE/angle_fourier_simple.cpp
@@ -32,7 +32,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define SMALL 0.0001
+static constexpr double SMALL = 0.0001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/EXTRA-MOLECULE/angle_quartic.cpp b/src/EXTRA-MOLECULE/angle_quartic.cpp
index f28e209a77..aade6b4534 100644
--- a/src/EXTRA-MOLECULE/angle_quartic.cpp
+++ b/src/EXTRA-MOLECULE/angle_quartic.cpp
@@ -33,11 +33,14 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
-AngleQuartic::AngleQuartic(LAMMPS *lmp) : Angle(lmp) {}
+AngleQuartic::AngleQuartic(LAMMPS *lmp) : Angle(lmp)
+{
+  born_matrix_enable = 1;
+}
 
 /* ---------------------------------------------------------------------- */
 
@@ -286,3 +289,39 @@ double AngleQuartic::single(int type, int i1, int i2, int i3)
   double dtheta4 = dtheta3 * dtheta;
   return k2[type] * dtheta2 + k3[type] * dtheta3 + k4[type] * dtheta4;
 }
+
+/* ---------------------------------------------------------------------- */
+
+void AngleQuartic::born_matrix(int type, int i1, int i2, int i3, double &du, double &du2)
+{
+  double **x = atom->x;
+
+  double delx1 = x[i1][0] - x[i2][0];
+  double dely1 = x[i1][1] - x[i2][1];
+  double delz1 = x[i1][2] - x[i2][2];
+  domain->minimum_image(delx1,dely1,delz1);
+  double r1 = sqrt(delx1*delx1 + dely1*dely1 + delz1*delz1);
+
+  double delx2 = x[i3][0] - x[i2][0];
+  double dely2 = x[i3][1] - x[i2][1];
+  double delz2 = x[i3][2] - x[i2][2];
+  domain->minimum_image(delx2,dely2,delz2);
+  double r2 = sqrt(delx2*delx2 + dely2*dely2 + delz2*delz2);
+
+  double c = delx1*delx2 + dely1*dely2 + delz1*delz2;
+  c /= r1*r2;
+  if (c > 1.0) c = 1.0;
+  if (c < -1.0) c = -1.0;
+  double theta = acos(c);
+
+  double s = sqrt(1.0 - c*c);
+  if (s < SMALL) s = SMALL;
+
+  double dtheta = theta - theta0[type];
+  double dtheta2 = dtheta * dtheta;
+  double dtheta3 = dtheta2 * dtheta;
+
+  du = -(2.0 * k2[type] * dtheta + 3.0 * k3[type] * dtheta2 + 4.0 * k4[type] * dtheta3) / s;
+  du2 = (2.0 * k2[type] + 6.0 * k3[type] * dtheta + 12.0 * k4[type] * dtheta2) / (s*s) -
+          (2.0 * k2[type] * dtheta + 3.0 * k3[type] * dtheta2 + 4.0 * k4[type] * dtheta3) * c / (s*s*s);
+}
diff --git a/src/EXTRA-MOLECULE/angle_quartic.h b/src/EXTRA-MOLECULE/angle_quartic.h
index 3f0396f27b..7de51b24d1 100644
--- a/src/EXTRA-MOLECULE/angle_quartic.h
+++ b/src/EXTRA-MOLECULE/angle_quartic.h
@@ -35,6 +35,7 @@ class AngleQuartic : public Angle {
   void read_restart(FILE *) override;
   void write_data(FILE *) override;
   double single(int, int, int, int) override;
+  void born_matrix(int type, int i1, int i2, int i3, double &du, double &du2) override;
 
  protected:
   double *k2, *k3, *k4, *theta0;
diff --git a/src/EXTRA-MOLECULE/bond_gaussian.cpp b/src/EXTRA-MOLECULE/bond_gaussian.cpp
index baca0b6e1a..9a8546e278 100644
--- a/src/EXTRA-MOLECULE/bond_gaussian.cpp
+++ b/src/EXTRA-MOLECULE/bond_gaussian.cpp
@@ -35,6 +35,7 @@ BondGaussian::BondGaussian(LAMMPS *lmp) :
     Bond(lmp), nterms(nullptr), bond_temperature(nullptr), alpha(nullptr), width(nullptr),
     r0(nullptr)
 {
+  born_matrix_enable = 1;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -294,3 +295,45 @@ double BondGaussian::single(int type, double rsq, int /*i*/, int /*j*/, double &
 
   return -(force->boltz * bond_temperature[type]) * log(sum_g_i);
 }
+
+/* ---------------------------------------------------------------------- */
+
+void BondGaussian::born_matrix(int type, double rsq, int /*i*/, int /*j*/, double &du, double &du2)
+{
+  double r = sqrt(rsq);
+
+  // first derivative of energy with respect to distance
+  double sum_g_i = 0.0;
+  double sum_numerator = 0.0;
+  for (int i = 0; i < nterms[type]; i++) {
+    double dr = r - r0[type][i];
+    double prefactor = (alpha[type][i] / (width[type][i] * sqrt(MY_PI2)));
+    double exponent = -2 * dr * dr / (width[type][i] * width[type][i]);
+    double g_i = prefactor * exp(exponent);
+    sum_g_i += g_i;
+    sum_numerator += g_i * dr / (width[type][i] * width[type][i]);
+  }
+
+  if (sum_g_i < SMALL) sum_g_i = SMALL;
+  du = 4.0 * (force->boltz * bond_temperature[type]) * (sum_numerator / sum_g_i);
+
+  // second derivative of energy with respect to distance
+  sum_g_i = 0.0;
+  double sum_dg_i = 0.0;
+  double sum_d2g_i = 0.0;
+  for (int i = 0; i < nterms[type]; i++) {
+    double dr = r - r0[type][i];
+    double prefactor = (alpha[type][i] / (width[type][i] * sqrt(MY_PI2)));
+    double exponent = -2 * dr * dr / (width[type][i] * width[type][i]);
+    double g_i = prefactor * exp(exponent);
+    sum_g_i += g_i;
+    sum_dg_i -= 4.0 * g_i * dr / pow(width[type][i], 2);
+    sum_d2g_i += 4.0 *  g_i * (4.0 * pow(r0[type][i], 2) - 8.0 * r0[type][i] * r - pow(width[type][i], 2) + 4.0 * r * r) / pow(width[type][i], 4) ;
+  }
+
+  if (sum_g_i < SMALL) sum_g_i = SMALL;
+  double numerator = sum_d2g_i*sum_g_i - sum_dg_i*sum_dg_i;
+  double denominator = sum_g_i * sum_g_i;
+
+  du2 = - (force->boltz * bond_temperature[type]) * numerator / denominator;
+}
diff --git a/src/EXTRA-MOLECULE/bond_gaussian.h b/src/EXTRA-MOLECULE/bond_gaussian.h
index 7af6f1f4d9..e466df47d4 100644
--- a/src/EXTRA-MOLECULE/bond_gaussian.h
+++ b/src/EXTRA-MOLECULE/bond_gaussian.h
@@ -35,6 +35,7 @@ class BondGaussian : public Bond {
   void read_restart(FILE *) override;
   void write_data(FILE *) override;
   double single(int, double, int, int, double &) override;
+  void born_matrix(int, double, int, int, double &, double &) override;
 
  protected:
   int *nterms;
diff --git a/src/EXTRA-MOLECULE/bond_harmonic_shift_cut.cpp b/src/EXTRA-MOLECULE/bond_harmonic_shift_cut.cpp
index fedcb95ee8..ebcfdb0258 100644
--- a/src/EXTRA-MOLECULE/bond_harmonic_shift_cut.cpp
+++ b/src/EXTRA-MOLECULE/bond_harmonic_shift_cut.cpp
@@ -31,7 +31,10 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-BondHarmonicShiftCut::BondHarmonicShiftCut(LAMMPS *lmp) : Bond(lmp) {}
+BondHarmonicShiftCut::BondHarmonicShiftCut(LAMMPS *lmp) : Bond(lmp)
+{
+  born_matrix_enable = 1;
+}
 
 /* ---------------------------------------------------------------------- */
 
@@ -219,3 +222,19 @@ double BondHarmonicShiftCut::single(int type, double rsq, int /*i*/, int /*j*/,
   fforce = -2.0*k[type]*dr/r;
   return k[type]*(dr*dr - dr2*dr2);
 }
+
+/* ---------------------------------------------------------------------- */
+
+void BondHarmonicShiftCut::born_matrix(int type, double rsq, int /*i*/, int /*j*/, double &du, double &du2)
+{
+  du = 0.0;
+  du2 = 0.0;
+
+  double r = sqrt(rsq);
+  if (r>r1[type]) return;
+
+  double dr = r - r0[type];
+
+  du2 = 2 * k[type];
+  if (r > 0.0) du = du2 * dr;
+}
diff --git a/src/EXTRA-MOLECULE/bond_harmonic_shift_cut.h b/src/EXTRA-MOLECULE/bond_harmonic_shift_cut.h
index 752ac010d9..09d6ab5330 100644
--- a/src/EXTRA-MOLECULE/bond_harmonic_shift_cut.h
+++ b/src/EXTRA-MOLECULE/bond_harmonic_shift_cut.h
@@ -35,6 +35,7 @@ class BondHarmonicShiftCut : public Bond {
   void read_restart(FILE *) override;
   void write_data(FILE *) override;
   double single(int, double, int, int, double &) override;
+  void born_matrix(int, double, int, int, double &, double &) override;
 
  protected:
   double *k, *r0, *r1;
diff --git a/src/EXTRA-MOLECULE/dihedral_cosine_shift_exp.cpp b/src/EXTRA-MOLECULE/dihedral_cosine_shift_exp.cpp
index 7165cde1fd..36874ee934 100644
--- a/src/EXTRA-MOLECULE/dihedral_cosine_shift_exp.cpp
+++ b/src/EXTRA-MOLECULE/dihedral_cosine_shift_exp.cpp
@@ -30,8 +30,8 @@
 
 using namespace LAMMPS_NS;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/EXTRA-MOLECULE/dihedral_fourier.cpp b/src/EXTRA-MOLECULE/dihedral_fourier.cpp
index f9b4a0d13e..37e1ae8328 100644
--- a/src/EXTRA-MOLECULE/dihedral_fourier.cpp
+++ b/src/EXTRA-MOLECULE/dihedral_fourier.cpp
@@ -32,7 +32,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define TOLERANCE 0.05
+static constexpr double TOLERANCE = 0.05;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/EXTRA-MOLECULE/dihedral_helix.cpp b/src/EXTRA-MOLECULE/dihedral_helix.cpp
index 059bef74a4..0111da9f99 100644
--- a/src/EXTRA-MOLECULE/dihedral_helix.cpp
+++ b/src/EXTRA-MOLECULE/dihedral_helix.cpp
@@ -32,15 +32,16 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
-#define SMALLER   0.00001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
+static constexpr double SMALLER =   0.00001;
 
 /* ---------------------------------------------------------------------- */
 
 DihedralHelix::DihedralHelix(LAMMPS *lmp) : Dihedral(lmp)
 {
   writedata = 1;
+  born_matrix_enable = 1;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -324,3 +325,108 @@ void DihedralHelix::write_data(FILE *fp)
   for (int i = 1; i <= atom->ndihedraltypes; i++)
     fprintf(fp,"%d %g %g %g\n",i,aphi[i],bphi[i],cphi[i]);
 }
+
+/* ----------------------------------------------------------------------*/
+
+void DihedralHelix::born_matrix(int nd, int i1, int i2, int i3, int i4,
+                                             double &du, double &du2)
+{
+  double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm;
+  double sb1,sb3,rb1,rb3,c0,b1mag2,b1mag,b2mag2;
+  double b2mag,b3mag2,b3mag,ctmp,r12c1,c1mag,r12c2;
+  double c2mag,sc1,sc2,s12,c;
+  double cx,cy,cz,cmag,dx,phi,si,siinv,sin2;
+
+  int **dihedrallist = neighbor->dihedrallist;
+  double **x = atom->x;
+
+  int type = dihedrallist[nd][4];
+
+  // 1st bond
+
+  vb1x = x[i1][0] - x[i2][0];
+  vb1y = x[i1][1] - x[i2][1];
+  vb1z = x[i1][2] - x[i2][2];
+
+  // 2nd bond
+
+  vb2x = x[i3][0] - x[i2][0];
+  vb2y = x[i3][1] - x[i2][1];
+  vb2z = x[i3][2] - x[i2][2];
+
+  vb2xm = -vb2x;
+  vb2ym = -vb2y;
+  vb2zm = -vb2z;
+
+  // 3rd bond
+
+  vb3x = x[i4][0] - x[i3][0];
+  vb3y = x[i4][1] - x[i3][1];
+  vb3z = x[i4][2] - x[i3][2];
+
+  // c0 calculation
+
+  sb1 = 1.0 / (vb1x*vb1x + vb1y*vb1y + vb1z*vb1z);
+  sb3 = 1.0 / (vb3x*vb3x + vb3y*vb3y + vb3z*vb3z);
+
+  rb1 = sqrt(sb1);
+  rb3 = sqrt(sb3);
+
+  c0 = (vb1x*vb3x + vb1y*vb3y + vb1z*vb3z) * rb1*rb3;
+
+  // 1st and 2nd angle
+
+  b1mag2 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z;
+  b1mag = sqrt(b1mag2);
+  b2mag2 = vb2x*vb2x + vb2y*vb2y + vb2z*vb2z;
+  b2mag = sqrt(b2mag2);
+  b3mag2 = vb3x*vb3x + vb3y*vb3y + vb3z*vb3z;
+  b3mag = sqrt(b3mag2);
+
+  ctmp = vb1x*vb2x + vb1y*vb2y + vb1z*vb2z;
+  r12c1 = 1.0 / (b1mag*b2mag);
+  c1mag = ctmp * r12c1;
+
+  ctmp = vb2xm*vb3x + vb2ym*vb3y + vb2zm*vb3z;
+  r12c2 = 1.0 / (b2mag*b3mag);
+  c2mag = ctmp * r12c2;
+
+  // cos and sin of 2 angles and final c
+
+  sin2 = MAX(1.0 - c1mag*c1mag,0.0);
+  sc1 = sqrt(sin2);
+  if (sc1 < SMALL) sc1 = SMALL;
+  sc1 = 1.0/sc1;
+
+  sin2 = MAX(1.0 - c2mag*c2mag,0.0);
+  sc2 = sqrt(sin2);
+  if (sc2 < SMALL) sc2 = SMALL;
+  sc2 = 1.0/sc2;
+
+  s12 = sc1 * sc2;
+  c = (c0 + c1mag*c2mag) * s12;
+
+  cx = vb1y*vb2z - vb1z*vb2y;
+  cy = vb1z*vb2x - vb1x*vb2z;
+  cz = vb1x*vb2y - vb1y*vb2x;
+  cmag = sqrt(cx*cx + cy*cy + cz*cz);
+  dx = (cx*vb3x + cy*vb3y + cz*vb3z)/cmag/b3mag;
+
+  // error check
+
+  if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) problem(FLERR, i1, i2, i3, i4);
+
+  if (c > 1.0) c = 1.0;
+  if (c < -1.0) c = -1.0;
+
+  phi = acos(c);
+  if (dx > 0.0) phi *= -1.0;
+  si = sin(phi);
+  if (fabs(si) < SMALLER) si = SMALLER;
+  siinv = 1.0/si;
+
+  du = -aphi[type] + 3.0*bphi[type]*sin(3.0*phi)*siinv +
+                                            cphi[type]*sin(phi + MY_PI4)*siinv;
+  du2 = -(9.0*bphi[type]*cos(3.0*phi) + cphi[type]*cos(phi + MY_PI4))*siinv*siinv +
+          (3.0*bphi[type]*sin(3.0*phi) + cphi[type]*sin(phi + MY_PI4))*c*siinv*siinv*siinv;
+}
diff --git a/src/EXTRA-MOLECULE/dihedral_helix.h b/src/EXTRA-MOLECULE/dihedral_helix.h
index 436895c5c3..172a8c3469 100644
--- a/src/EXTRA-MOLECULE/dihedral_helix.h
+++ b/src/EXTRA-MOLECULE/dihedral_helix.h
@@ -33,6 +33,7 @@ class DihedralHelix : public Dihedral {
   void write_restart(FILE *) override;
   void read_restart(FILE *) override;
   void write_data(FILE *) override;
+  void born_matrix(int, int, int, int, int, double &, double &) override;
 
  protected:
   double *aphi, *bphi, *cphi;
diff --git a/src/EXTRA-MOLECULE/dihedral_nharmonic.cpp b/src/EXTRA-MOLECULE/dihedral_nharmonic.cpp
index 206ad4f3ad..4c3cd3be2c 100644
--- a/src/EXTRA-MOLECULE/dihedral_nharmonic.cpp
+++ b/src/EXTRA-MOLECULE/dihedral_nharmonic.cpp
@@ -30,8 +30,8 @@
 
 using namespace LAMMPS_NS;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/EXTRA-MOLECULE/dihedral_quadratic.cpp b/src/EXTRA-MOLECULE/dihedral_quadratic.cpp
index cbe9e3e3a2..1bef5956fa 100644
--- a/src/EXTRA-MOLECULE/dihedral_quadratic.cpp
+++ b/src/EXTRA-MOLECULE/dihedral_quadratic.cpp
@@ -32,15 +32,16 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
-#define SMALLER   0.00001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
+static constexpr double SMALLER =   0.00001;
 
 /* ---------------------------------------------------------------------- */
 
 DihedralQuadratic::DihedralQuadratic(LAMMPS *lmp) : Dihedral(lmp)
 {
   writedata = 1;
+  born_matrix_enable = 1;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -327,3 +328,110 @@ void DihedralQuadratic::write_data(FILE *fp)
   for (int i = 1; i <= atom->ndihedraltypes; i++)
     fprintf(fp,"%d %g %g \n",i,k[i],phi0[i]*180.0/MY_PI);
 }
+
+/* ----------------------------------------------------------------------*/
+
+void DihedralQuadratic::born_matrix(int nd, int i1, int i2, int i3, int i4,
+                             double &du, double &du2)
+{
+  double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm;
+  double sb1,sb3,rb1,rb3,c0,b1mag2,b1mag,b2mag2;
+  double b2mag,b3mag2,b3mag,ctmp,r12c1,c1mag,r12c2;
+  double c2mag,sc1,sc2,s12,c;
+  double cx,cy,cz,cmag,dx,phi,si,siinv,sin2;
+
+  int **dihedrallist = neighbor->dihedrallist;
+  double **x = atom->x;
+
+  int type = dihedrallist[nd][4];
+
+  // 1st bond
+
+  vb1x = x[i1][0] - x[i2][0];
+  vb1y = x[i1][1] - x[i2][1];
+  vb1z = x[i1][2] - x[i2][2];
+
+  // 2nd bond
+
+  vb2x = x[i3][0] - x[i2][0];
+  vb2y = x[i3][1] - x[i2][1];
+  vb2z = x[i3][2] - x[i2][2];
+
+  vb2xm = -vb2x;
+  vb2ym = -vb2y;
+  vb2zm = -vb2z;
+
+  // 3rd bond
+  vb3x = x[i4][0] - x[i3][0];
+  vb3y = x[i4][1] - x[i3][1];
+  vb3z = x[i4][2] - x[i3][2];
+
+  // c0 calculation
+
+  sb1 = 1.0 / (vb1x*vb1x + vb1y*vb1y + vb1z*vb1z);
+  sb3 = 1.0 / (vb3x*vb3x + vb3y*vb3y + vb3z*vb3z);
+
+  rb1 = sqrt(sb1);
+  rb3 = sqrt(sb3);
+
+  c0 = (vb1x*vb3x + vb1y*vb3y + vb1z*vb3z) * rb1*rb3;
+
+  // 1st and 2nd angle
+
+  b1mag2 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z;
+  b1mag = sqrt(b1mag2);
+  b2mag2 = vb2x*vb2x + vb2y*vb2y + vb2z*vb2z;
+  b2mag = sqrt(b2mag2);
+  b3mag2 = vb3x*vb3x + vb3y*vb3y + vb3z*vb3z;
+  b3mag = sqrt(b3mag2);
+
+  ctmp = vb1x*vb2x + vb1y*vb2y + vb1z*vb2z;
+  r12c1 = 1.0 / (b1mag*b2mag);
+  c1mag = ctmp * r12c1;
+
+  ctmp = vb2xm*vb3x + vb2ym*vb3y + vb2zm*vb3z;
+  r12c2 = 1.0 / (b2mag*b3mag);
+  c2mag = ctmp * r12c2;
+
+  // cos and sin of 2 angles and final c
+
+  sin2 = MAX(1.0 - c1mag*c1mag,0.0);
+  sc1 = sqrt(sin2);
+  if (sc1 < SMALL) sc1 = SMALL;
+  sc1 = 1.0/sc1;
+
+  sin2 = MAX(1.0 - c2mag*c2mag,0.0);
+  sc2 = sqrt(sin2);
+  if (sc2 < SMALL) sc2 = SMALL;
+  sc2 = 1.0/sc2;
+
+  s12 = sc1 * sc2;
+  c = (c0 + c1mag*c2mag) * s12;
+
+  cx = vb1y*vb2z - vb1z*vb2y;
+  cy = vb1z*vb2x - vb1x*vb2z;
+  cz = vb1x*vb2y - vb1y*vb2x;
+  cmag = sqrt(cx*cx + cy*cy + cz*cz);
+  dx = (cx*vb3x + cy*vb3y + cz*vb3z)/cmag/b3mag;
+
+  // error check
+
+  if (c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE))
+                  problem(FLERR, i1, i2, i3, i4);
+
+  if (c > 1.0) c = 1.0;
+  if (c < -1.0) c = -1.0;
+
+  phi = acos(c);
+  if (dx > 0.0) phi *= -1.0;
+  si = sin(phi);
+  if (fabs(si) < SMALLER) si = SMALLER;
+  siinv = 1.0/si;
+
+  double dphi = phi-phi0[type];
+  if (dphi > MY_PI) dphi -= 2*MY_PI;
+  else if (dphi < -MY_PI) dphi += 2*MY_PI;
+
+  du = - 2.0 * k[type] * dphi * siinv;
+  du2 = 2.0 * k[type] * siinv * siinv * ( 1.0 - dphi * c * siinv) ;
+}
diff --git a/src/EXTRA-MOLECULE/dihedral_quadratic.h b/src/EXTRA-MOLECULE/dihedral_quadratic.h
index 90d8c3be6e..89f6fa3b25 100644
--- a/src/EXTRA-MOLECULE/dihedral_quadratic.h
+++ b/src/EXTRA-MOLECULE/dihedral_quadratic.h
@@ -33,6 +33,7 @@ class DihedralQuadratic : public Dihedral {
   void write_restart(FILE *) override;
   void read_restart(FILE *) override;
   void write_data(FILE *) override;
+  void born_matrix(int, int, int, int, int, double &, double &) override;
 
  protected:
   double *k, *phi0;
diff --git a/src/EXTRA-MOLECULE/dihedral_table_cut.cpp b/src/EXTRA-MOLECULE/dihedral_table_cut.cpp
index ce0d431e7c..a06df279f3 100644
--- a/src/EXTRA-MOLECULE/dihedral_table_cut.cpp
+++ b/src/EXTRA-MOLECULE/dihedral_table_cut.cpp
@@ -47,8 +47,8 @@ static const char cite_dihedral_tablecut[] =
 
 /* ---------------------------------------------------------------------- */
 
-#define TOLERANCE 0.05
-#define SMALL     0.0000001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.0000001;
 
 // ------------------------------------------------------------------------
 // The following auxiliary functions were left out of the
diff --git a/src/EXTRA-MOLECULE/improper_cossq.cpp b/src/EXTRA-MOLECULE/improper_cossq.cpp
index bd21fa12e7..864ec28927 100644
--- a/src/EXTRA-MOLECULE/improper_cossq.cpp
+++ b/src/EXTRA-MOLECULE/improper_cossq.cpp
@@ -32,8 +32,8 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/EXTRA-MOLECULE/improper_distance.cpp b/src/EXTRA-MOLECULE/improper_distance.cpp
index 18f6dd9a3f..acb962c4f0 100644
--- a/src/EXTRA-MOLECULE/improper_distance.cpp
+++ b/src/EXTRA-MOLECULE/improper_distance.cpp
@@ -30,8 +30,8 @@
 
 using namespace LAMMPS_NS;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/EXTRA-MOLECULE/improper_fourier.cpp b/src/EXTRA-MOLECULE/improper_fourier.cpp
index 295657b1b6..1db8b3697c 100644
--- a/src/EXTRA-MOLECULE/improper_fourier.cpp
+++ b/src/EXTRA-MOLECULE/improper_fourier.cpp
@@ -30,8 +30,8 @@
 
 using namespace LAMMPS_NS;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/EXTRA-MOLECULE/improper_ring.cpp b/src/EXTRA-MOLECULE/improper_ring.cpp
index 36d6277e46..8c570550e4 100644
--- a/src/EXTRA-MOLECULE/improper_ring.cpp
+++ b/src/EXTRA-MOLECULE/improper_ring.cpp
@@ -54,8 +54,8 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/FEP/fix_adapt_fep.cpp b/src/FEP/fix_adapt_fep.cpp
index c32b44b081..e0c5868e96 100644
--- a/src/FEP/fix_adapt_fep.cpp
+++ b/src/FEP/fix_adapt_fep.cpp
@@ -48,9 +48,9 @@ enum{DIAMETER, CHARGE};
 FixAdaptFEP::FixAdaptFEP(LAMMPS *lmp, int narg, char **arg) :
   Fix(lmp, narg, arg)
 {
-  if (narg < 5) error->all(FLERR,"Illegal fix adapt/fep command");
+  if (narg < 5) utils::missing_cmd_args(FLERR,"fix adapt/fep", error);
   nevery = utils::inumeric(FLERR,arg[3],false,lmp);
-  if (nevery < 0) error->all(FLERR,"Illegal fix adapt/fep command");
+  if (nevery < 0) error->all(FLERR,"Illegal fix adapt/fep every value {}", nevery);
 
   dynamic_group_allow = 1;
   create_attribute = 1;
@@ -62,21 +62,21 @@ FixAdaptFEP::FixAdaptFEP(LAMMPS *lmp, int narg, char **arg) :
   int iarg = 4;
   while (iarg < narg) {
     if (strcmp(arg[iarg],"pair") == 0) {
-      if (iarg+6 > narg) error->all(FLERR,"Illegal fix adapt/fep command");
+      if (iarg+6 > narg) utils::missing_cmd_args(FLERR,"fix adapt/fep pair", error);
       nadapt++;
       iarg += 6;
     } else if (strcmp(arg[iarg],"kspace") == 0) {
-      if (iarg+2 > narg) error->all(FLERR,"Illegal fix adapt/fep command");
+      if (iarg+2 > narg) utils::missing_cmd_args(FLERR,"fix adapt/fep kspace", error);
       nadapt++;
       iarg += 2;
     } else if (strcmp(arg[iarg],"atom") == 0) {
-      if (iarg+4 > narg) error->all(FLERR,"Illegal fix adapt/fep command");
+      if (iarg+4 > narg) utils::missing_cmd_args(FLERR,"fix adapt/fep atom", error);
       nadapt++;
       iarg += 4;
     } else break;
   }
 
-  if (nadapt == 0) error->all(FLERR,"Illegal fix adapt/fep command");
+  if (nadapt == 0) error->all(FLERR,"Nothing to adapt in fix adapt/fep command");
   adapt = new Adapt[nadapt];
 
   // parse keywords
@@ -136,11 +136,11 @@ FixAdaptFEP::FixAdaptFEP(LAMMPS *lmp, int narg, char **arg) :
 
   while (iarg < narg) {
     if (strcmp(arg[iarg],"reset") == 0) {
-      if (iarg+2 > narg) error->all(FLERR,"Illegal fix adapt/fep command");
+      if (iarg+2 > narg) utils::missing_cmd_args(FLERR,"fix adapt/fep reset", error);
       resetflag = utils::logical(FLERR,arg[iarg+1],false,lmp);
       iarg += 2;
     } else if (strcmp(arg[iarg],"scale") == 0) {
-      if (iarg+2 > narg) error->all(FLERR,"Illegal fix adapt/fep command");
+      if (iarg+2 > narg) utils::missing_cmd_args(FLERR,"fix adapt/fep scale", error);
       scaleflag = utils::logical(FLERR,arg[iarg+1],false,lmp);
       iarg += 2;
     } else if (strcmp(arg[iarg],"after") == 0) {
@@ -208,7 +208,7 @@ void FixAdaptFEP::post_constructor()
   id_fix_diam = nullptr;
   id_fix_chg = nullptr;
 
-  if (diam_flag) {
+  if (diam_flag && atom->radius_flag) {
     id_fix_diam = utils::strdup(id + std::string("_FIX_STORE_DIAM"));
     fix_diam = dynamic_cast<FixStoreAtom *>(
       modify->add_fix(fmt::format("{} {} STORE/ATOM 1 0 0 1", id_fix_diam,group->names[igroup])));
@@ -226,7 +226,7 @@ void FixAdaptFEP::post_constructor()
     }
   }
 
-  if (chgflag) {
+  if (chgflag && atom->q_flag) {
     id_fix_chg = utils::strdup(id + std::string("_FIX_STORE_CHG"));
     fix_chg = dynamic_cast<FixStoreAtom *>(
       modify->add_fix(fmt::format("{} {} STORE/ATOM 1 0 0 1",id_fix_chg,group->names[igroup])));
@@ -267,9 +267,9 @@ void FixAdaptFEP::init()
 
     ad->ivar = input->variable->find(ad->var);
     if (ad->ivar < 0)
-      error->all(FLERR,"Variable name for fix adapt/fep does not exist");
+      error->all(FLERR,"Variable name {} for fix adapt/fep does not exist", ad->var);
     if (!input->variable->equalstyle(ad->ivar))
-      error->all(FLERR,"Variable for fix adapt/fep is invalid style");
+      error->all(FLERR,"Variable {} for fix adapt/fep is invalid style", ad->var);
 
     if (ad->which == PAIR) {
       anypair = 1;
@@ -285,8 +285,9 @@ void FixAdaptFEP::init()
       if (ptr == nullptr)
         error->all(FLERR,"Fix adapt/fep pair style param not supported");
 
-      ad->pdim = 2;
-      if (ad->pdim == 0) ad->scalar = (double *) ptr;
+      if (ad->pdim != 2)
+        error->all(FLERR,"Pair style parameter {} is not compatible with fix adapt/fep", ad->pparam);
+
       if (ad->pdim == 2) ad->array = (double **) ptr;
 
       // if pair hybrid, test that ilo,ihi,jlo,jhi are valid for sub-style
diff --git a/src/GPU/fix_nh_gpu.cpp b/src/GPU/fix_nh_gpu.cpp
index 00df278ea8..d15f6c6a7e 100644
--- a/src/GPU/fix_nh_gpu.cpp
+++ b/src/GPU/fix_nh_gpu.cpp
@@ -33,7 +33,7 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-#define TILTMAX 1.5
+static constexpr double TILTMAX = 1.5;
 
 enum{NOBIAS,BIAS};
 enum{ISO,ANISO,TRICLINIC};
diff --git a/src/GPU/fix_nve_asphere_gpu.cpp b/src/GPU/fix_nve_asphere_gpu.cpp
index 481f44bb63..9b75964c79 100644
--- a/src/GPU/fix_nve_asphere_gpu.cpp
+++ b/src/GPU/fix_nve_asphere_gpu.cpp
@@ -35,7 +35,7 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-#define INERTIA 0.2          // moment of inertia prefactor for ellipsoid
+static constexpr double INERTIA = 0.2;          // moment of inertia prefactor for ellipsoid
 
 #define ME_qnormalize(q)                                                \
 {                                                                       \
diff --git a/src/GPU/pair_amoeba_gpu.cpp b/src/GPU/pair_amoeba_gpu.cpp
index 1621e1f5b6..569dc125f0 100644
--- a/src/GPU/pair_amoeba_gpu.cpp
+++ b/src/GPU/pair_amoeba_gpu.cpp
@@ -51,7 +51,7 @@ enum{GORDON1,GORDON2};
 // same as in pair_amoeba.cpp
 enum{MPOLE_GRID,POLAR_GRID,POLAR_GRIDC,DISP_GRID,INDUCE_GRID,INDUCE_GRIDC};
 
-#define DEBYE 4.80321    // conversion factor from q-Angs (real units) to Debye
+static constexpr double DEBYE = 4.80321;    // conversion factor from q-Angs (real units) to Debye
 
 // External functions from cuda library for atom decomposition
 
diff --git a/src/GPU/pair_born_coul_long_cs_gpu.cpp b/src/GPU/pair_born_coul_long_cs_gpu.cpp
index 798caeb97a..d37f0c53e2 100644
--- a/src/GPU/pair_born_coul_long_cs_gpu.cpp
+++ b/src/GPU/pair_born_coul_long_cs_gpu.cpp
@@ -42,7 +42,7 @@ using namespace MathConst;
 #define B4 -5.80844129e-3
 #define B5 1.14652755e-1
 
-#define EPSILON 1.0e-20
+static constexpr double EPSILON = 1.0e-20;
 #define EPS_EWALD 1.0e-6
 #define EPS_EWALD_SQR 1.0e-12
 
diff --git a/src/GPU/pair_born_coul_wolf_cs_gpu.cpp b/src/GPU/pair_born_coul_wolf_cs_gpu.cpp
index 9858015622..128863527c 100644
--- a/src/GPU/pair_born_coul_wolf_cs_gpu.cpp
+++ b/src/GPU/pair_born_coul_wolf_cs_gpu.cpp
@@ -32,7 +32,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define EPSILON 1.0e-20
+static constexpr double EPSILON = 1.0e-20;
 
 // External functions from cuda library for atom decomposition
 
diff --git a/src/GPU/pair_coul_long_cs_gpu.cpp b/src/GPU/pair_coul_long_cs_gpu.cpp
index 79c4c4ab7c..bbe50e4351 100644
--- a/src/GPU/pair_coul_long_cs_gpu.cpp
+++ b/src/GPU/pair_coul_long_cs_gpu.cpp
@@ -40,7 +40,7 @@ using namespace LAMMPS_NS;
 #define B4 -5.80844129e-3
 #define B5 1.14652755e-1
 
-#define EPSILON 1.0e-20
+static constexpr double EPSILON = 1.0e-20;
 #define EPS_EWALD 1.0e-6
 #define EPS_EWALD_SQR 1.0e-12
 
diff --git a/src/GPU/pair_coul_slater_long_gpu.cpp b/src/GPU/pair_coul_slater_long_gpu.cpp
new file mode 100644
index 0000000000..4ace8bd761
--- /dev/null
+++ b/src/GPU/pair_coul_slater_long_gpu.cpp
@@ -0,0 +1,254 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Trung Nguyen (U Chicago)
+------------------------------------------------------------------------- */
+
+#include "pair_coul_slater_long_gpu.h"
+
+#include "atom.h"
+#include "domain.h"
+#include "error.h"
+#include "force.h"
+#include "gpu_extra.h"
+#include "kspace.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+#include "suffix.h"
+
+#include <cmath>
+
+#define EWALD_F 1.12837917
+#define EWALD_P 0.3275911
+#define A1 0.254829592
+#define A2 -0.284496736
+#define A3 1.421413741
+#define A4 -1.453152027
+#define A5 1.061405429
+
+using namespace LAMMPS_NS;
+
+// External functions from cuda library for atom decomposition
+
+int csl_gpu_init(const int ntypes, double **scale, const int nlocal, const int nall,
+                const int max_nbors, const int maxspecial, const double cell_size, int &gpu_mode,
+                FILE *screen, double host_cut_coulsq, double *host_special_coul,
+                const double qqrd2e, const double g_ewald, const double lamda);
+void csl_gpu_reinit(const int ntypes, double **scale);
+void csl_gpu_clear();
+int **csl_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                       int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                       tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                       const bool vatom, int &host_start, int **ilist, int **jnum,
+                       const double cpu_time, bool &success, double *host_q, double *boxlo,
+                       double *prd);
+void csl_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type,
+                    int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag,
+                    const bool eatom, const bool vatom, int &host_start, const double cpu_time,
+                    bool &success, double *host_q, const int nlocal, double *boxlo, double *prd);
+double csl_gpu_bytes();
+
+/* ---------------------------------------------------------------------- */
+
+PairCoulSlaterLongGPU::PairCoulSlaterLongGPU(LAMMPS *lmp) : PairCoulSlaterLong(lmp), gpu_mode(GPU_FORCE)
+{
+  respa_enable = 0;
+  cpu_time = 0.0;
+  suffix_flag |= Suffix::GPU;
+  GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
+}
+
+/* ----------------------------------------------------------------------
+   free all arrays
+------------------------------------------------------------------------- */
+
+PairCoulSlaterLongGPU::~PairCoulSlaterLongGPU()
+{
+  csl_gpu_clear();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairCoulSlaterLongGPU::compute(int eflag, int vflag)
+{
+  ev_init(eflag, vflag);
+
+  int nall = atom->nlocal + atom->nghost;
+  int inum, host_start;
+
+  bool success = true;
+  int *ilist, *numneigh, **firstneigh;
+  if (gpu_mode != GPU_FORCE) {
+    double sublo[3], subhi[3];
+    if (domain->triclinic == 0) {
+      sublo[0] = domain->sublo[0];
+      sublo[1] = domain->sublo[1];
+      sublo[2] = domain->sublo[2];
+      subhi[0] = domain->subhi[0];
+      subhi[1] = domain->subhi[1];
+      subhi[2] = domain->subhi[2];
+    } else {
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
+    }
+    inum = atom->nlocal;
+    firstneigh = csl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
+                                  atom->tag, atom->nspecial, atom->special, eflag, vflag,
+                                  eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
+                                  success, atom->q, domain->boxlo, domain->prd);
+  } else {
+    inum = list->inum;
+    ilist = list->ilist;
+    numneigh = list->numneigh;
+    firstneigh = list->firstneigh;
+    csl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                   eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
+                   atom->nlocal, domain->boxlo, domain->prd);
+  }
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");
+
+  if (atom->molecular != Atom::ATOMIC && neighbor->ago == 0)
+    neighbor->build_topology();
+  if (host_start < inum) {
+    cpu_time = platform::walltime();
+    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
+    cpu_time = platform::walltime() - cpu_time;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+void PairCoulSlaterLongGPU::init_style()
+{
+  if (!atom->q_flag) error->all(FLERR, "Pair style coul/slater/long/gpu requires atom attribute q");
+
+  // Call init_one calculation make sure scale is correct
+  for (int i = 1; i <= atom->ntypes; i++) {
+    for (int j = i; j <= atom->ntypes; j++) {
+      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { init_one(i, j); }
+    }
+  }
+  double cell_size = cut_coul + neighbor->skin;
+
+  cut_coulsq = cut_coul * cut_coul;
+
+  // ensure use of KSpace long-range solver, set g_ewald
+
+  if (force->kspace == nullptr) error->all(FLERR, "Pair style requires a KSpace style");
+  g_ewald = force->kspace->g_ewald;
+
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
+  int mnf = 5e-2 * neighbor->oneatom;
+  int success = csl_gpu_init(atom->ntypes + 1, scale, atom->nlocal, atom->nlocal + atom->nghost, mnf,
+                            maxspecial, cell_size, gpu_mode, screen, cut_coulsq,
+                            force->special_coul, force->qqrd2e, g_ewald, lamda);
+
+  GPU_EXTRA::check_flag(success, error, world);
+
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairCoulSlaterLongGPU::reinit()
+{
+  Pair::reinit();
+
+  csl_gpu_reinit(atom->ntypes + 1, scale);
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairCoulSlaterLongGPU::memory_usage()
+{
+  double bytes = Pair::memory_usage();
+  return bytes + csl_gpu_bytes();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairCoulSlaterLongGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
+                                  int *numneigh, int **firstneigh)
+{
+  int i, j, ii, jj, jnum;
+  double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, ecoul, fpair;
+  double r, r2inv, forcecoul, factor_coul;
+  double grij, expm2, prefactor, t, erfc;
+  int *jlist;
+  double rsq;
+
+  ecoul = 0.0;
+
+  double **x = atom->x;
+  double **f = atom->f;
+  double *q = atom->q;
+  double *special_coul = force->special_coul;
+  double qqrd2e = force->qqrd2e;
+
+  // loop over neighbors of my atoms
+
+  for (ii = start; ii < inum; ii++) {
+    i = ilist[ii];
+    qtmp = q[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_coul = special_coul[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx * delx + dely * dely + delz * delz;
+
+      r2inv = 1.0 / rsq;
+
+      if (rsq < cut_coulsq) {
+        r2inv = 1.0/rsq;
+        r = sqrt(rsq);
+        grij = g_ewald * r;
+        expm2 = exp(-grij*grij);
+        t = 1.0 / (1.0 + EWALD_P*grij);
+        erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+        double slater_term = exp(-2*r/lamda)*(1 + (2*r/lamda*(1+r/lamda)));
+        prefactor = qqrd2e * qtmp*q[j]/r;
+        forcecoul = prefactor * (erfc + EWALD_F*grij*expm2 - slater_term);
+        if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor*(1-slater_term);
+
+        fpair = forcecoul * r2inv;
+
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;
+
+        if (eflag) {
+          if (rsq < cut_coulsq) {
+            ecoul = prefactor*(erfc - (1 + r/lamda)*exp(-2*r/lamda));
+            if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor*(1.0-(1 + r/lamda)*exp(-2*r/lamda));
+          } else
+            ecoul = 0.0;
+        }
+
+        if (evflag) ev_tally_full(i, 0.0, ecoul, fpair, delx, dely, delz);
+      }
+    }
+  }
+}
diff --git a/src/GPU/pair_coul_slater_long_gpu.h b/src/GPU/pair_coul_slater_long_gpu.h
new file mode 100644
index 0000000000..4a30a71d25
--- /dev/null
+++ b/src/GPU/pair_coul_slater_long_gpu.h
@@ -0,0 +1,46 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+// clang-format off
+PairStyle(coul/slater/long/gpu,PairCoulSlaterLongGPU);
+// clang-format on
+#else
+
+#ifndef LMP_PAIR_COUL_SLATER_LONG_GPU_H
+#define LMP_PAIR_COUL_SLATER_LONG_GPU_H
+
+#include "pair_coul_slater_long.h"
+
+namespace LAMMPS_NS {
+
+class PairCoulSlaterLongGPU : public PairCoulSlaterLong {
+ public:
+  PairCoulSlaterLongGPU(LAMMPS *lmp);
+  ~PairCoulSlaterLongGPU() override;
+  void cpu_compute(int, int, int, int, int *, int *, int **);
+  void compute(int, int) override;
+  void init_style() override;
+  void reinit() override;
+  double memory_usage() override;
+
+  enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
+
+ private:
+  int gpu_mode;
+  double cpu_time;
+};
+
+}    // namespace LAMMPS_NS
+#endif
+#endif
diff --git a/src/GPU/pair_dpd_gpu.cpp b/src/GPU/pair_dpd_gpu.cpp
index e4657cf2eb..afbdedcd11 100644
--- a/src/GPU/pair_dpd_gpu.cpp
+++ b/src/GPU/pair_dpd_gpu.cpp
@@ -53,7 +53,7 @@ void dpd_gpu_compute(const int ago, const int inum_full, const int nall, double
                      double *boxlo, double *prd);
 double dpd_gpu_bytes();
 
-#define EPSILON 1.0e-10
+static constexpr double EPSILON = 1.0e-10;
 
 //#define _USE_UNIFORM_SARU_LCG
 //#define _USE_UNIFORM_SARU_TEA8
diff --git a/src/GPU/pair_dpd_tstat_gpu.cpp b/src/GPU/pair_dpd_tstat_gpu.cpp
index 4a7b05fd2c..6889a0e0b8 100644
--- a/src/GPU/pair_dpd_tstat_gpu.cpp
+++ b/src/GPU/pair_dpd_tstat_gpu.cpp
@@ -55,7 +55,7 @@ void dpd_tstat_gpu_update_coeff(int ntypes, double **host_a0, double **host_gamm
                                 double **host_sigma, double **host_cut);
 double dpd_tstat_gpu_bytes();
 
-#define EPSILON 1.0e-10
+static constexpr double EPSILON = 1.0e-10;
 
 //#define _USE_UNIFORM_SARU_LCG
 //#define _USE_UNIFORM_SARU_TEA8
diff --git a/src/GPU/pair_eam_gpu.cpp b/src/GPU/pair_eam_gpu.cpp
index 155da43768..563b3f8284 100644
--- a/src/GPU/pair_eam_gpu.cpp
+++ b/src/GPU/pair_eam_gpu.cpp
@@ -29,7 +29,7 @@
 
 #include <cmath>
 
-#define MAXLINE 1024
+static constexpr int MAXLINE = 1024;
 
 using namespace LAMMPS_NS;
 
diff --git a/src/GPU/pair_edpd_gpu.cpp b/src/GPU/pair_edpd_gpu.cpp
new file mode 100644
index 0000000000..b6ea6d9098
--- /dev/null
+++ b/src/GPU/pair_edpd_gpu.cpp
@@ -0,0 +1,195 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Trung Dac Nguyen (U Chicago)
+------------------------------------------------------------------------- */
+
+#include "pair_edpd_gpu.h"
+
+#include "atom.h"
+#include "domain.h"
+#include "error.h"
+#include "force.h"
+#include "gpu_extra.h"
+#include "info.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+#include "suffix.h"
+#include "update.h"
+
+#include <cmath>
+
+using namespace LAMMPS_NS;
+
+// External functions from cuda library for atom decomposition
+
+int edpd_gpu_init(const int ntypes, double **cutsq, double **host_a0, double **host_gamma,
+                  double **host_cut, double **host_power, double **host_kappa,
+                  double **host_powerT, double** host_cutT, double*** host_sc, double ***host_kc,
+                  double *host_mass, double *special_lj, const int power_flag, const int kappa_flag,
+                  const int inum, const int nall, const int max_nbors,
+                  const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen);
+void edpd_gpu_clear();
+int **edpd_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
+                        int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                        tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                        const bool vatom, int &host_start, int **ilist, int **jnum,
+                        const double cpu_time, bool &success, double **host_v,
+                        const double dtinvsqrt, const int seed, const int timestep, double *boxlo,
+                        double *prd);
+void edpd_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x,
+                     int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                     const bool vflag, const bool eatom, const bool vatom, int &host_start,
+                     const double cpu_time, bool &success, tagint *tag, double **host_v,
+                     const double dtinvsqrt, const int seed, const int timestep, const int nlocal,
+                     double *boxlo, double *prd);
+void edpd_gpu_get_extra_data(double *host_T, double *host_cv);
+void edpd_gpu_update_flux(void **flux_ptr);
+double edpd_gpu_bytes();
+
+static constexpr double EPSILON = 1.0e-10;
+
+/* ---------------------------------------------------------------------- */
+
+PairEDPDGPU::PairEDPDGPU(LAMMPS *lmp) : PairEDPD(lmp), gpu_mode(GPU_FORCE)
+{
+  flux_pinned = nullptr;
+  respa_enable = 0;
+  reinitflag = 0;
+  cpu_time = 0.0;
+  suffix_flag |= Suffix::GPU;
+  GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
+}
+
+/* ----------------------------------------------------------------------
+   free all arrays
+------------------------------------------------------------------------- */
+
+PairEDPDGPU::~PairEDPDGPU()
+{
+  edpd_gpu_clear();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairEDPDGPU::compute(int eflag, int vflag)
+{
+  ev_init(eflag, vflag);
+
+  int nall = atom->nlocal + atom->nghost;
+  int inum, host_start;
+
+  double dtinvsqrt = 1.0 / sqrt(update->dt);
+
+  bool success = true;
+  int *ilist, *numneigh, **firstneigh;
+
+  double *T = atom->edpd_temp;
+  double *cv = atom->edpd_cv;
+  edpd_gpu_get_extra_data(T, cv);
+
+  if (gpu_mode != GPU_FORCE) {
+    double sublo[3], subhi[3];
+    if (domain->triclinic == 0) {
+      sublo[0] = domain->sublo[0];
+      sublo[1] = domain->sublo[1];
+      sublo[2] = domain->sublo[2];
+      subhi[0] = domain->subhi[0];
+      subhi[1] = domain->subhi[1];
+      subhi[2] = domain->subhi[2];
+    } else {
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
+    }
+    inum = atom->nlocal;
+    firstneigh = edpd_gpu_compute_n(
+        neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag, atom->nspecial,
+        atom->special, eflag, vflag, eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
+        cpu_time, success, atom->v, dtinvsqrt, seed, update->ntimestep, domain->boxlo, domain->prd);
+  } else {
+    inum = list->inum;
+    ilist = list->ilist;
+    numneigh = list->numneigh;
+    firstneigh = list->firstneigh;
+    edpd_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                    eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->tag,
+                    atom->v, dtinvsqrt, seed, update->ntimestep, atom->nlocal, domain->boxlo, domain->prd);
+  }
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");
+
+  // get the heat flux from device
+
+  double *Q = atom->edpd_flux;
+  edpd_gpu_update_flux(&flux_pinned);
+
+  int nlocal = atom->nlocal;
+  if (acc_float) {
+    auto flux_ptr = (float *)flux_pinned;
+    for (int i = 0; i < nlocal; i++)
+      Q[i] = flux_ptr[i];
+
+  } else {
+    auto flux_ptr = (double *)flux_pinned;
+    for (int i = 0; i < nlocal; i++)
+      Q[i] = flux_ptr[i];
+  }
+
+  if (atom->molecular != Atom::ATOMIC && neighbor->ago == 0)
+    neighbor->build_topology();
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+void PairEDPDGPU::init_style()
+{
+
+  // Repeat cutsq calculation because done after call to init_style
+  double maxcut = -1.0;
+  double mcut;
+  for (int i = 1; i <= atom->ntypes; i++) {
+    for (int j = i; j <= atom->ntypes; j++) {
+      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
+        mcut = init_one(i, j);
+        mcut *= mcut;
+        if (mcut > maxcut) maxcut = mcut;
+        cutsq[i][j] = cutsq[j][i] = mcut;
+      } else
+        cutsq[i][j] = cutsq[j][i] = 0.0;
+    }
+  }
+  double cell_size = sqrt(maxcut) + neighbor->skin;
+
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
+  int mnf = 5e-2 * neighbor->oneatom;
+  int success =
+      edpd_gpu_init(atom->ntypes + 1, cutsq, a0, gamma, cut, power, kappa,
+                    powerT, cutT, sc, kc, atom->mass, force->special_lj,
+                    power_flag, kappa_flag, atom->nlocal, atom->nlocal + atom->nghost,
+                    mnf, maxspecial, cell_size, gpu_mode, screen);
+  GPU_EXTRA::check_flag(success, error, world);
+
+  acc_float = Info::has_accelerator_feature("GPU", "precision", "single");
+
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairEDPDGPU::memory_usage()
+{
+  double bytes = Pair::memory_usage();
+  return bytes + edpd_gpu_bytes();
+}
diff --git a/src/npair_half_size_nsq_newton.h b/src/GPU/pair_edpd_gpu.h
similarity index 61%
rename from src/npair_half_size_nsq_newton.h
rename to src/GPU/pair_edpd_gpu.h
index d55785bd72..75495b2ca4 100644
--- a/src/npair_half_size_nsq_newton.h
+++ b/src/GPU/pair_edpd_gpu.h
@@ -11,28 +11,38 @@
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
-#ifdef NPAIR_CLASS
+#ifdef PAIR_CLASS
 // clang-format off
-NPairStyle(half/size/nsq/newton,
-           NPairHalfSizeNsqNewton,
-           NP_HALF | NP_SIZE | NP_NSQ | NP_NEWTON | NP_ORTHO | NP_TRI);
+PairStyle(edpd/gpu,PairEDPDGPU);
 // clang-format on
 #else
 
-#ifndef LMP_NPAIR_HALF_SIZE_NSQ_NEWTON_H
-#define LMP_NPAIR_HALF_SIZE_NSQ_NEWTON_H
+#ifndef LMP_PAIR_EDPD_GPU_H
+#define LMP_PAIR_EDPD_GPU_H
 
-#include "npair.h"
+#include "pair_edpd.h"
 
 namespace LAMMPS_NS {
 
-class NPairHalfSizeNsqNewton : public NPair {
+class PairEDPDGPU : public PairEDPD {
  public:
-  NPairHalfSizeNsqNewton(class LAMMPS *);
-  void build(class NeighList *) override;
+  PairEDPDGPU(LAMMPS *lmp);
+  ~PairEDPDGPU() override;
+  void cpu_compute(int, int, int, int, int *, int *, int **);
+  void compute(int, int) override;
+  void init_style() override;
+  double memory_usage() override;
+
+  enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
+
+  void *flux_pinned;
+  bool acc_float;
+
+ private:
+  int gpu_mode;
+  double cpu_time;
 };
 
 }    // namespace LAMMPS_NS
-
 #endif
 #endif
diff --git a/src/GPU/pair_hippo_gpu.cpp b/src/GPU/pair_hippo_gpu.cpp
index 59a95619d7..7346ffda8c 100644
--- a/src/GPU/pair_hippo_gpu.cpp
+++ b/src/GPU/pair_hippo_gpu.cpp
@@ -50,7 +50,7 @@ enum{GORDON1,GORDON2};
 // same as in pair_amoeba.cpp
 enum{MPOLE_GRID,POLAR_GRID,POLAR_GRIDC,DISP_GRID,INDUCE_GRID,INDUCE_GRIDC};
 
-#define DEBYE 4.80321    // conversion factor from q-Angs (real units) to Debye
+static constexpr double DEBYE = 4.80321;    // conversion factor from q-Angs (real units) to Debye
 
 // External functions from cuda library for atom decomposition
 
diff --git a/src/GPU/pair_lj_cut_coul_cut_soft_gpu.cpp b/src/GPU/pair_lj_cut_coul_cut_soft_gpu.cpp
new file mode 100644
index 0000000000..cfde3ab632
--- /dev/null
+++ b/src/GPU/pair_lj_cut_coul_cut_soft_gpu.cpp
@@ -0,0 +1,249 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS Development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Trung Nguyen (U Chicago)
+------------------------------------------------------------------------- */
+
+#include "pair_lj_cut_coul_cut_soft_gpu.h"
+
+#include "atom.h"
+#include "domain.h"
+#include "error.h"
+#include "force.h"
+#include "gpu_extra.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+#include "suffix.h"
+
+#include <cmath>
+
+using namespace LAMMPS_NS;
+
+// External functions from cuda library for atom decomposition
+
+int ljcs_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
+                 double **host_lj3, double **host_lj4, double **offset, double **epsilon, double *special_lj,
+                 const int nlocal, const int nall, const int max_nbors, const int maxspecial,
+                 const double cell_size, int &gpu_mode, FILE *screen, double **host_cut_ljsq,
+                 double **host_cut_coulsq, double *host_special_coul, const double qqrd2e);
+void ljcs_gpu_clear();
+int **ljcs_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                        int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                        tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                        const bool vatom, int &host_start, int **ilist, int **jnum,
+                        const double cpu_time, bool &success, double *host_q, double *boxlo,
+                        double *prd);
+void ljcs_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type,
+                     int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag,
+                     const bool eatom, const bool vatom, int &host_start, const double cpu_time,
+                     bool &success, double *host_q, const int nlocal, double *boxlo, double *prd);
+double ljcs_gpu_bytes();
+
+/* ---------------------------------------------------------------------- */
+
+PairLJCutCoulCutSoftGPU::PairLJCutCoulCutSoftGPU(LAMMPS *lmp) :
+    PairLJCutCoulCutSoft(lmp), gpu_mode(GPU_FORCE)
+{
+  respa_enable = 0;
+  reinitflag = 0;
+  cpu_time = 0.0;
+  suffix_flag |= Suffix::GPU;
+  GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
+}
+
+/* ----------------------------------------------------------------------
+   free all arrays
+------------------------------------------------------------------------- */
+
+PairLJCutCoulCutSoftGPU::~PairLJCutCoulCutSoftGPU()
+{
+  ljcs_gpu_clear();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJCutCoulCutSoftGPU::compute(int eflag, int vflag)
+{
+  ev_init(eflag, vflag);
+
+  int nall = atom->nlocal + atom->nghost;
+  int inum, host_start;
+
+  bool success = true;
+  int *ilist, *numneigh, **firstneigh;
+  if (gpu_mode != GPU_FORCE) {
+    double sublo[3], subhi[3];
+    if (domain->triclinic == 0) {
+      sublo[0] = domain->sublo[0];
+      sublo[1] = domain->sublo[1];
+      sublo[2] = domain->sublo[2];
+      subhi[0] = domain->subhi[0];
+      subhi[1] = domain->subhi[1];
+      subhi[2] = domain->subhi[2];
+    } else {
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
+    }
+    inum = atom->nlocal;
+    firstneigh = ljcs_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
+                                   atom->tag, atom->nspecial, atom->special, eflag, vflag,
+                                   eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
+                                   success, atom->q, domain->boxlo, domain->prd);
+  } else {
+    inum = list->inum;
+    ilist = list->ilist;
+    numneigh = list->numneigh;
+    firstneigh = list->firstneigh;
+    ljcs_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                    eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
+                    atom->nlocal, domain->boxlo, domain->prd);
+  }
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");
+
+  if (atom->molecular != Atom::ATOMIC && neighbor->ago == 0)
+    neighbor->build_topology();
+  if (host_start < inum) {
+    cpu_time = platform::walltime();
+    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
+    cpu_time = platform::walltime() - cpu_time;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+void PairLJCutCoulCutSoftGPU::init_style()
+{
+  if (!atom->q_flag) error->all(FLERR, "Pair style lj/cut/coul/cut/soft/gpu requires atom attribute q");
+
+  // Repeat cutsq calculation because done after call to init_style
+  double maxcut = -1.0;
+  double cut;
+  for (int i = 1; i <= atom->ntypes; i++) {
+    for (int j = i; j <= atom->ntypes; j++) {
+      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
+        cut = init_one(i, j);
+        cut *= cut;
+        if (cut > maxcut) maxcut = cut;
+        cutsq[i][j] = cutsq[j][i] = cut;
+      } else
+        cutsq[i][j] = cutsq[j][i] = 0.0;
+    }
+  }
+  double cell_size = sqrt(maxcut) + neighbor->skin;
+
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
+  int mnf = 5e-2 * neighbor->oneatom;
+  int success =
+      ljcs_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, epsilon, force->special_lj,
+                   atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode,
+                   screen, cut_ljsq, cut_coulsq, force->special_coul, force->qqrd2e);
+  GPU_EXTRA::check_flag(success, error, world);
+
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairLJCutCoulCutSoftGPU::memory_usage()
+{
+  double bytes = Pair::memory_usage();
+  return bytes + ljcs_gpu_bytes();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJCutCoulCutSoftGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
+                                       int *numneigh, int **firstneigh)
+{
+  int i, j, ii, jj, jnum, itype, jtype;
+  double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fpair;
+  double forcecoul, forcelj, factor_coul, factor_lj;
+  double denc, denlj, r4sig6;
+  int *jlist;
+  double rsq;
+
+  evdwl = ecoul = 0.0;
+
+  double **x = atom->x;
+  double **f = atom->f;
+  double *q = atom->q;
+  int *type = atom->type;
+  double *special_coul = force->special_coul;
+  double *special_lj = force->special_lj;
+  double qqrd2e = force->qqrd2e;
+
+  // loop over neighbors of my atoms
+
+  for (ii = start; ii < inum; ii++) {
+    i = ilist[ii];
+    qtmp = q[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx * delx + dely * dely + delz * delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+
+        if (rsq < cut_coulsq[itype][jtype]) {
+          denc = sqrt(lj4[itype][jtype] + rsq);
+          forcecoul = qqrd2e * lj1[itype][jtype] * qtmp*q[j] / (denc*denc*denc);
+        } else forcecoul = 0.0;
+
+        if (rsq < cut_ljsq[itype][jtype]) {
+          r4sig6 = rsq*rsq / lj2[itype][jtype];
+          denlj = lj3[itype][jtype] + rsq*r4sig6;
+          forcelj = lj1[itype][jtype] * epsilon[itype][jtype] *
+            (48.0*r4sig6/(denlj*denlj*denlj) - 24.0*r4sig6/(denlj*denlj));
+        } else forcelj = 0.0;
+
+        fpair = factor_coul*forcecoul + factor_lj*forcelj;
+
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;
+
+        if (eflag) {
+          if (rsq < cut_coulsq[itype][jtype])
+            ecoul = factor_coul * qqrd2e * lj1[itype][jtype] * qtmp*q[j] / denc;
+          else
+            ecoul = 0.0;
+          if (rsq < cut_ljsq[itype][jtype]) {
+            evdwl = lj1[itype][jtype] * 4.0 * epsilon[itype][jtype] *
+              (1.0/(denlj*denlj) - 1.0/denlj) - offset[itype][jtype];
+            evdwl *= factor_lj;
+          } else
+            evdwl = 0.0;
+        }
+
+        if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
+      }
+    }
+  }
+}
diff --git a/src/GPU/pair_lj_cut_coul_cut_soft_gpu.h b/src/GPU/pair_lj_cut_coul_cut_soft_gpu.h
new file mode 100644
index 0000000000..0776695ba3
--- /dev/null
+++ b/src/GPU/pair_lj_cut_coul_cut_soft_gpu.h
@@ -0,0 +1,45 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS Development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+// clang-format off
+PairStyle(lj/cut/coul/cut/soft/gpu,PairLJCutCoulCutSoftGPU);
+// clang-format on
+#else
+
+#ifndef LMP_PAIR_LJ_CUT_COUL_CUT_SOFT_GPU_H
+#define LMP_PAIR_LJ_CUT_COUL_CUT_SOFT_GPU_H
+
+#include "pair_lj_cut_coul_cut_soft.h"
+
+namespace LAMMPS_NS {
+
+class PairLJCutCoulCutSoftGPU : public PairLJCutCoulCutSoft {
+ public:
+  PairLJCutCoulCutSoftGPU(LAMMPS *lmp);
+  ~PairLJCutCoulCutSoftGPU() override;
+  void cpu_compute(int, int, int, int, int *, int *, int **);
+  void compute(int, int) override;
+  void init_style() override;
+  double memory_usage() override;
+
+  enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
+
+ private:
+  int gpu_mode;
+  double cpu_time;
+};
+
+}    // namespace LAMMPS_NS
+#endif
+#endif
diff --git a/src/GPU/pair_lj_cut_coul_long_soft_gpu.cpp b/src/GPU/pair_lj_cut_coul_long_soft_gpu.cpp
new file mode 100644
index 0000000000..e8342b6530
--- /dev/null
+++ b/src/GPU/pair_lj_cut_coul_long_soft_gpu.cpp
@@ -0,0 +1,297 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS Development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Trung Nguyen (U Chicago)
+------------------------------------------------------------------------- */
+
+#include "pair_lj_cut_coul_long_soft_gpu.h"
+
+#include "atom.h"
+#include "domain.h"
+#include "error.h"
+#include "force.h"
+#include "gpu_extra.h"
+#include "kspace.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+#include "suffix.h"
+
+#include <cmath>
+
+#define EWALD_F 1.12837917
+#define EWALD_P 0.3275911
+#define A1 0.254829592
+#define A2 -0.284496736
+#define A3 1.421413741
+#define A4 -1.453152027
+#define A5 1.061405429
+
+using namespace LAMMPS_NS;
+
+// External functions from cuda library for atom decomposition
+
+int ljcls_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
+                  double **host_lj3, double **host_lj4, double **offset,  double **epsilon, double *special_lj,
+                  const int nlocal, const int nall, const int max_nbors, const int maxspecial,
+                  const double cell_size, int &gpu_mode, FILE *screen, double **host_cut_ljsq,
+                  double host_cut_coulsq, double *host_special_coul, const double qqrd2e,
+                  const double g_ewald);
+void ljcls_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
+                     double **host_lj3, double **host_lj4, double **offset, double **epsilon,
+                     double **host_lj_cutsq);
+void ljcls_gpu_clear();
+int **ljcls_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
+                         int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                         tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                         const bool vatom, int &host_start, int **ilist, int **jnum,
+                         const double cpu_time, bool &success, double *host_q, double *boxlo,
+                         double *prd);
+void ljcls_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
+                      int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                      const bool vflag, const bool eatom, const bool vatom, int &host_start,
+                      const double cpu_time, bool &success, double *host_q, const int nlocal,
+                      double *boxlo, double *prd);
+double ljcls_gpu_bytes();
+
+/* ---------------------------------------------------------------------- */
+
+PairLJCutCoulLongSoftGPU::PairLJCutCoulLongSoftGPU(LAMMPS *lmp) :
+    PairLJCutCoulLongSoft(lmp), gpu_mode(GPU_FORCE)
+{
+  respa_enable = 0;
+  cpu_time = 0.0;
+  suffix_flag |= Suffix::GPU;
+  GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
+}
+
+/* ----------------------------------------------------------------------
+   free all arrays
+------------------------------------------------------------------------- */
+
+PairLJCutCoulLongSoftGPU::~PairLJCutCoulLongSoftGPU()
+{
+  ljcls_gpu_clear();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJCutCoulLongSoftGPU::compute(int eflag, int vflag)
+{
+  ev_init(eflag, vflag);
+
+  int nall = atom->nlocal + atom->nghost;
+  int inum, host_start;
+
+  bool success = true;
+  int *ilist, *numneigh, **firstneigh;
+  if (gpu_mode != GPU_FORCE) {
+    double sublo[3], subhi[3];
+    if (domain->triclinic == 0) {
+      sublo[0] = domain->sublo[0];
+      sublo[1] = domain->sublo[1];
+      sublo[2] = domain->sublo[2];
+      subhi[0] = domain->subhi[0];
+      subhi[1] = domain->subhi[1];
+      subhi[2] = domain->subhi[2];
+    } else {
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
+    }
+    inum = atom->nlocal;
+    firstneigh = ljcls_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
+                                    atom->tag, atom->nspecial, atom->special, eflag, vflag,
+                                    eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
+                                    success, atom->q, domain->boxlo, domain->prd);
+  } else {
+    inum = list->inum;
+    ilist = list->ilist;
+    numneigh = list->numneigh;
+    firstneigh = list->firstneigh;
+    ljcls_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                     eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
+                     atom->nlocal, domain->boxlo, domain->prd);
+  }
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");
+
+  if (host_start < inum) {
+    cpu_time = platform::walltime();
+    cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
+    cpu_time = platform::walltime() - cpu_time;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+void PairLJCutCoulLongSoftGPU::init_style()
+{
+  cut_respa = nullptr;
+
+  if (!atom->q_flag) error->all(FLERR, "Pair style lj/cut/coul/long/soft/gpu requires atom attribute q");
+
+  // Repeat cutsq calculation because done after call to init_style
+  double maxcut = -1.0;
+  double cut;
+  for (int i = 1; i <= atom->ntypes; i++) {
+    for (int j = i; j <= atom->ntypes; j++) {
+      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
+        cut = init_one(i, j);
+        cut *= cut;
+        if (cut > maxcut) maxcut = cut;
+        cutsq[i][j] = cutsq[j][i] = cut;
+      } else
+        cutsq[i][j] = cutsq[j][i] = 0.0;
+    }
+  }
+  double cell_size = sqrt(maxcut) + neighbor->skin;
+
+  cut_coulsq = cut_coul * cut_coul;
+
+  // insure use of KSpace long-range solver, set g_ewald
+
+  if (force->kspace == nullptr) error->all(FLERR, "Pair style requires a KSpace style");
+  g_ewald = force->kspace->g_ewald;
+
+  // setup force tables
+
+  if (ncoultablebits) init_tables(cut_coul, cut_respa);
+
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
+  int mnf = 5e-2 * neighbor->oneatom;
+  int success =
+      ljcls_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, epsilon, force->special_lj,
+                    atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode,
+                    screen, cut_ljsq, cut_coulsq, force->special_coul, force->qqrd2e, g_ewald);
+  GPU_EXTRA::check_flag(success, error, world);
+
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJCutCoulLongSoftGPU::reinit()
+{
+  Pair::reinit();
+
+  ljcls_gpu_reinit(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, epsilon, cut_ljsq);
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairLJCutCoulLongSoftGPU::memory_usage()
+{
+  double bytes = Pair::memory_usage();
+  return bytes + ljcls_gpu_bytes();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairLJCutCoulLongSoftGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
+                                       int *numneigh, int **firstneigh)
+{
+  int i, j, ii, jj, jnum, itype, jtype;
+  double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fpair;
+  double r, r2inv, forcecoul, forcelj, factor_coul, factor_lj;
+  double denc, denlj, r4sig6;
+  double grij, expm2, prefactor, t, erfc;
+  int *jlist;
+  double rsq;
+
+  evdwl = ecoul = 0.0;
+
+  double **x = atom->x;
+  double **f = atom->f;
+  double *q = atom->q;
+  int *type = atom->type;
+  double *special_coul = force->special_coul;
+  double *special_lj = force->special_lj;
+  double qqrd2e = force->qqrd2e;
+
+  // loop over neighbors of my atoms
+
+  for (ii = start; ii < inum; ii++) {
+    i = ilist[ii];
+    qtmp = q[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    itype = type[i];
+    jlist = firstneigh[i];
+    jnum = numneigh[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      j = jlist[jj];
+      factor_lj = special_lj[sbmask(j)];
+      factor_coul = special_coul[sbmask(j)];
+      j &= NEIGHMASK;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx * delx + dely * dely + delz * delz;
+      jtype = type[j];
+
+      if (rsq < cutsq[itype][jtype]) {
+        r2inv = 1.0 / rsq;
+
+        if (rsq < cut_coulsq) {
+          r = sqrt(rsq);
+          grij = g_ewald * r;
+          expm2 = exp(-grij * grij);
+          t = 1.0 / (1.0 + EWALD_P * grij);
+          erfc = t * (A1 + t * (A2 + t * (A3 + t * (A4 + t * A5)))) * expm2;
+
+          denc = sqrt(lj4[itype][jtype] + rsq);
+          prefactor = qqrd2e * lj1[itype][jtype] * qtmp*q[j] / (denc*denc*denc);
+
+          forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
+          if (factor_coul < 1.0) forcecoul -= (1.0 - factor_coul) * prefactor;
+        } else
+          forcecoul = 0.0;
+
+        if (rsq < cut_ljsq[itype][jtype]) {
+          r4sig6 = rsq*rsq / lj2[itype][jtype];
+          denlj = lj3[itype][jtype] + rsq*r4sig6;
+          forcelj = lj1[itype][jtype] * epsilon[itype][jtype] *
+            (48.0*r4sig6/(denlj*denlj*denlj) - 24.0*r4sig6/(denlj*denlj));
+        } else
+          forcelj = 0.0;
+
+        fpair = (forcecoul + factor_lj * forcelj) * r2inv;
+
+        f[i][0] += delx * fpair;
+        f[i][1] += dely * fpair;
+        f[i][2] += delz * fpair;
+
+        if (eflag) {
+          if (rsq < cut_coulsq) {
+            prefactor = qqrd2e * lj1[itype][jtype] * qtmp*q[j] / denc;
+            ecoul = prefactor*erfc;
+          } else
+            ecoul = 0.0;
+
+          if (rsq < cut_ljsq[itype][jtype]) {
+            evdwl = lj1[itype][jtype] * 4.0 * epsilon[itype][jtype] *
+              (1.0/(denlj*denlj) - 1.0/denlj) - offset[itype][jtype];
+            evdwl *= factor_lj;
+          } else
+            evdwl = 0.0;
+        }
+
+        if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
+      }
+    }
+  }
+}
diff --git a/src/GPU/pair_lj_cut_coul_long_soft_gpu.h b/src/GPU/pair_lj_cut_coul_long_soft_gpu.h
new file mode 100644
index 0000000000..cb6790d333
--- /dev/null
+++ b/src/GPU/pair_lj_cut_coul_long_soft_gpu.h
@@ -0,0 +1,46 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS Development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+// clang-format off
+PairStyle(lj/cut/coul/long/soft/gpu,PairLJCutCoulLongSoftGPU);
+// clang-format on
+#else
+
+#ifndef LMP_PAIR_LJ_CUT_COUL_LONG_SOFT_GPU_H
+#define LMP_PAIR_LJ_CUT_COUL_LONG_SOFT_GPU_H
+
+#include "pair_lj_cut_coul_long_soft.h"
+
+namespace LAMMPS_NS {
+
+class PairLJCutCoulLongSoftGPU : public PairLJCutCoulLongSoft {
+ public:
+  PairLJCutCoulLongSoftGPU(LAMMPS *lmp);
+  ~PairLJCutCoulLongSoftGPU() override;
+  void cpu_compute(int, int, int, int, int *, int *, int **);
+  void compute(int, int) override;
+  void init_style() override;
+  void reinit() override;
+  double memory_usage() override;
+
+  enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
+
+ private:
+  int gpu_mode;
+  double cpu_time;
+};
+
+}    // namespace LAMMPS_NS
+#endif
+#endif
diff --git a/src/GPU/pair_mdpd_gpu.cpp b/src/GPU/pair_mdpd_gpu.cpp
new file mode 100644
index 0000000000..78861504ee
--- /dev/null
+++ b/src/GPU/pair_mdpd_gpu.cpp
@@ -0,0 +1,171 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Trung Dac Nguyen (U Chicago)
+------------------------------------------------------------------------- */
+
+#include "pair_mdpd_gpu.h"
+
+#include "atom.h"
+#include "domain.h"
+#include "error.h"
+#include "force.h"
+#include "gpu_extra.h"
+#include "info.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+#include "suffix.h"
+#include "update.h"
+
+#include <cmath>
+
+using namespace LAMMPS_NS;
+
+// External functions from cuda library for atom decomposition
+
+int mdpd_gpu_init(const int ntypes, double **cutsq, double **host_A_att, double **host_B_rep,
+                  double **host_gamma, double **host_sigma, double **host_cut, double **host_cut_r,
+                  double *special_lj, const int inum, const int nall, const int max_nbors,
+                  const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen);
+void mdpd_gpu_clear();
+int **mdpd_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
+                         int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
+                         tagint **special, const bool eflag, const bool vflag, const bool eatom,
+                         const bool vatom, int &host_start, int **ilist, int **jnum,
+                         const double cpu_time, bool &success, double **host_v,
+                         const double dtinvsqrt, const int seed, const int timestep, double *boxlo,
+                         double *prd);
+void mdpd_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x,
+                      int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
+                      const bool vflag, const bool eatom, const bool vatom, int &host_start,
+                      const double cpu_time, bool &success, tagint *tag, double **host_v,
+                      const double dtinvsqrt, const int seed, const int timestep, const int nlocal,
+                      double *boxlo, double *prd);
+void mdpd_gpu_get_extra_data(double *host_rho);
+double mdpd_gpu_bytes();
+
+static constexpr double EPSILON = 1.0e-10;
+
+/* ---------------------------------------------------------------------- */
+
+PairMDPDGPU::PairMDPDGPU(LAMMPS *lmp) : PairMDPD(lmp), gpu_mode(GPU_FORCE)
+{
+  respa_enable = 0;
+  reinitflag = 0;
+  cpu_time = 0.0;
+  suffix_flag |= Suffix::GPU;
+  GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
+}
+
+/* ----------------------------------------------------------------------
+   free all arrays
+------------------------------------------------------------------------- */
+
+PairMDPDGPU::~PairMDPDGPU()
+{
+  mdpd_gpu_clear();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairMDPDGPU::compute(int eflag, int vflag)
+{
+  ev_init(eflag, vflag);
+
+  int nall = atom->nlocal + atom->nghost;
+  int inum, host_start;
+
+  double dtinvsqrt = 1.0 / sqrt(update->dt);
+
+  bool success = true;
+  int *ilist, *numneigh, **firstneigh;
+
+  double *rho = atom->rho;
+  mdpd_gpu_get_extra_data(rho);
+
+  if (gpu_mode != GPU_FORCE) {
+    double sublo[3], subhi[3];
+    if (domain->triclinic == 0) {
+      sublo[0] = domain->sublo[0];
+      sublo[1] = domain->sublo[1];
+      sublo[2] = domain->sublo[2];
+      subhi[0] = domain->subhi[0];
+      subhi[1] = domain->subhi[1];
+      subhi[2] = domain->subhi[2];
+    } else {
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
+    }
+    inum = atom->nlocal;
+    firstneigh = mdpd_gpu_compute_n(
+        neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag, atom->nspecial,
+        atom->special, eflag, vflag, eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
+        cpu_time, success, atom->v, dtinvsqrt, seed, update->ntimestep, domain->boxlo, domain->prd);
+  } else {
+    inum = list->inum;
+    ilist = list->ilist;
+    numneigh = list->numneigh;
+    firstneigh = list->firstneigh;
+    mdpd_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                    eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->tag,
+                    atom->v, dtinvsqrt, seed, update->ntimestep, atom->nlocal, domain->boxlo, domain->prd);
+  }
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");
+
+  if (atom->molecular != Atom::ATOMIC && neighbor->ago == 0)
+    neighbor->build_topology();
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+void PairMDPDGPU::init_style()
+{
+
+  // Repeat cutsq calculation because done after call to init_style
+  double maxcut = -1.0;
+  double mcut;
+  for (int i = 1; i <= atom->ntypes; i++) {
+    for (int j = i; j <= atom->ntypes; j++) {
+      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
+        mcut = init_one(i, j);
+        mcut *= mcut;
+        if (mcut > maxcut) maxcut = mcut;
+        cutsq[i][j] = cutsq[j][i] = mcut;
+      } else
+        cutsq[i][j] = cutsq[j][i] = 0.0;
+    }
+  }
+  double cell_size = sqrt(maxcut) + neighbor->skin;
+
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
+  int mnf = 5e-2 * neighbor->oneatom;
+  int success =
+      mdpd_gpu_init(atom->ntypes + 1, cutsq, A_att, B_rep, gamma, sigma,
+                    cut, cut_r, force->special_lj,
+                    atom->nlocal, atom->nlocal + atom->nghost,
+                    mnf, maxspecial, cell_size, gpu_mode, screen);
+  GPU_EXTRA::check_flag(success, error, world);
+
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairMDPDGPU::memory_usage()
+{
+  double bytes = Pair::memory_usage();
+  return bytes + mdpd_gpu_bytes();
+}
diff --git a/src/npair_half_size_bin_newton.h b/src/GPU/pair_mdpd_gpu.h
similarity index 63%
rename from src/npair_half_size_bin_newton.h
rename to src/GPU/pair_mdpd_gpu.h
index de11284a8a..5f27c4014e 100644
--- a/src/npair_half_size_bin_newton.h
+++ b/src/GPU/pair_mdpd_gpu.h
@@ -11,28 +11,35 @@
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
-#ifdef NPAIR_CLASS
+#ifdef PAIR_CLASS
 // clang-format off
-NPairStyle(half/size/bin/newton,
-           NPairHalfSizeBinNewton,
-           NP_HALF | NP_SIZE | NP_BIN | NP_NEWTON | NP_ORTHO);
+PairStyle(mdpd/gpu,PairMDPDGPU);
 // clang-format on
 #else
 
-#ifndef LMP_NPAIR_HALF_SIZE_BIN_NEWTON_H
-#define LMP_NPAIR_HALF_SIZE_BIN_NEWTON_H
+#ifndef LMP_PAIR_MDPD_GPU_H
+#define LMP_PAIR_MDPD_GPU_H
 
-#include "npair.h"
+#include "pair_mdpd.h"
 
 namespace LAMMPS_NS {
 
-class NPairHalfSizeBinNewton : public NPair {
+class PairMDPDGPU : public PairMDPD {
  public:
-  NPairHalfSizeBinNewton(class LAMMPS *);
-  void build(class NeighList *) override;
+  PairMDPDGPU(LAMMPS *lmp);
+  ~PairMDPDGPU() override;
+  void cpu_compute(int, int, int, int, int *, int *, int **);
+  void compute(int, int) override;
+  void init_style() override;
+  double memory_usage() override;
+
+  enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
+
+ private:
+  int gpu_mode;
+  double cpu_time;
 };
 
 }    // namespace LAMMPS_NS
-
 #endif
 #endif
diff --git a/src/GPU/pair_sph_heatconduction_gpu.cpp b/src/GPU/pair_sph_heatconduction_gpu.cpp
new file mode 100644
index 0000000000..a81de53c91
--- /dev/null
+++ b/src/GPU/pair_sph_heatconduction_gpu.cpp
@@ -0,0 +1,196 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Trung Dac Nguyen (U Chicago)
+------------------------------------------------------------------------- */
+
+#include "pair_sph_heatconduction_gpu.h"
+
+#include "atom.h"
+#include "domain.h"
+#include "error.h"
+#include "force.h"
+#include "gpu_extra.h"
+#include "info.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+#include "suffix.h"
+#include "update.h"
+
+#include <cmath>
+
+using namespace LAMMPS_NS;
+
+// External functions from cuda library for atom decomposition
+
+int sph_heatconduction_gpu_init(const int ntypes, double **cutsq, double** host_cut,
+                                double **host_alpha, double* host_mass,
+                                const int dimension, double *special_lj,
+                                const int inum, const int nall,
+                                const int max_nbors,  const int maxspecial,
+                                const double cell_size, int &gpu_mode, FILE *screen);
+void sph_heatconduction_gpu_clear();
+int **sph_heatconduction_gpu_compute_n(const int ago, const int inum_full, const int nall,
+                           double **host_x, int *host_type, double *sublo,
+                           double *subhi, tagint *host_tag, int **nspecial,
+                           tagint **special, const bool eflag, const bool vflag,
+                           const bool eatom, const bool vatom, int &host_start,
+                           int **ilist, int **jnum, const double cpu_time, bool &success,
+                           double **host_v);
+void sph_heatconduction_gpu_compute(const int ago, const int inum_full, const int nall,
+                        double **host_x, int *host_type, int *ilist, int *numj,
+                        int **firstneigh, const bool eflag, const bool vflag,
+                        const bool eatom, const bool vatom, int &host_start,
+                        const double cpu_time, bool &success, tagint *host_tag,
+                        double **host_v);
+void sph_heatconduction_gpu_get_extra_data(double *host_rho, double *host_esph);
+void sph_heatconduction_gpu_update_dE(void **dE_ptr);
+double sph_heatconduction_gpu_bytes();
+
+/* ---------------------------------------------------------------------- */
+
+PairSPHHeatConductionGPU::PairSPHHeatConductionGPU(LAMMPS *lmp) :
+  PairSPHHeatConduction(lmp), gpu_mode(GPU_FORCE)
+{
+  dE_pinned = nullptr;
+  respa_enable = 0;
+  reinitflag = 0;
+  cpu_time = 0.0;
+  suffix_flag |= Suffix::GPU;
+  GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
+}
+
+/* ----------------------------------------------------------------------
+   free all arrays
+------------------------------------------------------------------------- */
+
+PairSPHHeatConductionGPU::~PairSPHHeatConductionGPU()
+{
+  sph_heatconduction_gpu_clear();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairSPHHeatConductionGPU::compute(int eflag, int vflag)
+{
+  ev_init(eflag, vflag);
+
+  int nall = atom->nlocal + atom->nghost;
+  int inum, host_start;
+
+  bool success = true;
+  int *ilist, *numneigh, **firstneigh;
+
+  double *rho = atom->rho;
+  double *esph = atom->esph;
+  sph_heatconduction_gpu_get_extra_data(rho, esph);
+
+  if (gpu_mode != GPU_FORCE) {
+    double sublo[3], subhi[3];
+    if (domain->triclinic == 0) {
+      sublo[0] = domain->sublo[0];
+      sublo[1] = domain->sublo[1];
+      sublo[2] = domain->sublo[2];
+      subhi[0] = domain->subhi[0];
+      subhi[1] = domain->subhi[1];
+      subhi[2] = domain->subhi[2];
+    } else {
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
+    }
+    inum = atom->nlocal;
+    firstneigh = sph_heatconduction_gpu_compute_n(
+        neighbor->ago, inum, nall, atom->x, atom->type,
+        sublo, subhi, atom->tag, atom->nspecial, atom->special, eflag, vflag,
+        eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
+        cpu_time, success, atom->v);
+  } else {
+    inum = list->inum;
+    ilist = list->ilist;
+    numneigh = list->numneigh;
+    firstneigh = list->firstneigh;
+    sph_heatconduction_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
+                       ilist, numneigh, firstneigh, eflag, vflag,
+                       eflag_atom, vflag_atom, host_start, cpu_time, success,
+                       atom->tag, atom->v);
+  }
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");
+
+  // get the drho and dE from device
+
+  double *desph = atom->desph;
+  sph_heatconduction_gpu_update_dE(&dE_pinned);
+
+  int nlocal = atom->nlocal;
+  if (acc_float) {
+    auto dE_ptr = (float *)dE_pinned;
+    for (int i = 0; i < nlocal; i++) {
+      desph[i] = dE_ptr[i];
+    }
+
+  } else {
+    auto dE_ptr = (double *)dE_pinned;
+    for (int i = 0; i < nlocal; i++) {
+      desph[i] = dE_ptr[i];
+    }
+  }
+
+  if (atom->molecular != Atom::ATOMIC && neighbor->ago == 0)
+    neighbor->build_topology();
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+void PairSPHHeatConductionGPU::init_style()
+{
+
+  // Repeat cutsq calculation because done after call to init_style
+  double maxcut = -1.0;
+  double mcut;
+  for (int i = 1; i <= atom->ntypes; i++) {
+    for (int j = i; j <= atom->ntypes; j++) {
+      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
+        mcut = init_one(i, j);
+        mcut *= mcut;
+        if (mcut > maxcut) maxcut = mcut;
+        cutsq[i][j] = cutsq[j][i] = mcut;
+      } else
+        cutsq[i][j] = cutsq[j][i] = 0.0;
+    }
+  }
+  double cell_size = sqrt(maxcut) + neighbor->skin;
+
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
+  int mnf = 5e-2 * neighbor->oneatom;
+  int success =
+      sph_heatconduction_gpu_init(atom->ntypes + 1, cutsq, cut, alpha, atom->mass,
+                      domain->dimension, force->special_lj, atom->nlocal,
+                      atom->nlocal + atom->nghost,
+                      mnf, maxspecial, cell_size, gpu_mode, screen);
+  GPU_EXTRA::check_flag(success, error, world);
+
+  acc_float = Info::has_accelerator_feature("GPU", "precision", "single");
+
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairSPHHeatConductionGPU::memory_usage()
+{
+  double bytes = Pair::memory_usage();
+  return bytes + sph_heatconduction_gpu_bytes();
+}
diff --git a/src/npair_half_multi_newton.h b/src/GPU/pair_sph_heatconduction_gpu.h
similarity index 57%
rename from src/npair_half_multi_newton.h
rename to src/GPU/pair_sph_heatconduction_gpu.h
index 98552db81a..571334017d 100644
--- a/src/npair_half_multi_newton.h
+++ b/src/GPU/pair_sph_heatconduction_gpu.h
@@ -11,28 +11,38 @@
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
-#ifdef NPAIR_CLASS
+#ifdef PAIR_CLASS
 // clang-format off
-NPairStyle(half/multi/newton,
-           NPairHalfMultiNewton,
-           NP_HALF | NP_MULTI | NP_NEWTON | NP_ORTHO);
+PairStyle(sph/heatconduction/gpu,PairSPHHeatConductionGPU);
 // clang-format on
 #else
 
-#ifndef LMP_NPAIR_HALF_MULTI_NEWTON_H
-#define LMP_NPAIR_HALF_MULTI_NEWTON_H
+#ifndef LMP_PAIR_SPH_HEATCONDUCTION_GPU_H
+#define LMP_PAIR_SPH_HEATCONDUCTION_GPU_H
 
-#include "npair.h"
+#include "pair_sph_heatconduction.h"
 
 namespace LAMMPS_NS {
 
-class NPairHalfMultiNewton : public NPair {
+class PairSPHHeatConductionGPU : public PairSPHHeatConduction {
  public:
-  NPairHalfMultiNewton(class LAMMPS *);
-  void build(class NeighList *) override;
+  PairSPHHeatConductionGPU(LAMMPS *lmp);
+  ~PairSPHHeatConductionGPU() override;
+  void cpu_compute(int, int, int, int, int *, int *, int **);
+  void compute(int, int) override;
+  void init_style() override;
+  double memory_usage() override;
+
+  enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
+
+  void *dE_pinned;
+  bool acc_float;
+
+ private:
+  int gpu_mode;
+  double cpu_time;
 };
 
 }    // namespace LAMMPS_NS
-
 #endif
 #endif
diff --git a/src/GPU/pair_sph_lj_gpu.cpp b/src/GPU/pair_sph_lj_gpu.cpp
new file mode 100644
index 0000000000..46d7b38073
--- /dev/null
+++ b/src/GPU/pair_sph_lj_gpu.cpp
@@ -0,0 +1,204 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Trung Dac Nguyen (U Chicago)
+------------------------------------------------------------------------- */
+
+#include "pair_sph_lj_gpu.h"
+
+#include "atom.h"
+#include "domain.h"
+#include "error.h"
+#include "force.h"
+#include "gpu_extra.h"
+#include "info.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+#include "suffix.h"
+#include "update.h"
+
+#include <cmath>
+
+using namespace LAMMPS_NS;
+
+// External functions from cuda library for atom decomposition
+
+int sph_lj_gpu_init(const int ntypes, double **cutsq, double** host_cut,
+                    double **host_viscosity, double* host_mass,
+                     const int dimension, double *special_lj,
+                    const int inum, const int nall,
+                    const int max_nbors,  const int maxspecial,
+                    const double cell_size, int &gpu_mode, FILE *screen);
+void sph_lj_gpu_clear();
+int **sph_lj_gpu_compute_n(const int ago, const int inum_full, const int nall,
+                           double **host_x, int *host_type, double *sublo,
+                           double *subhi, tagint *host_tag, int **nspecial,
+                           tagint **special, const bool eflag, const bool vflag,
+                           const bool eatom, const bool vatom, int &host_start,
+                           int **ilist, int **jnum, const double cpu_time, bool &success,
+                           double **host_v);
+void sph_lj_gpu_compute(const int ago, const int inum_full, const int nall,
+                        double **host_x, int *host_type, int *ilist, int *numj,
+                        int **firstneigh, const bool eflag, const bool vflag,
+                        const bool eatom, const bool vatom, int &host_start,
+                        const double cpu_time, bool &success, tagint *host_tag,
+                        double **host_v);
+void sph_lj_gpu_get_extra_data(double *host_rho, double *host_esph,
+                               double *host_cv);
+void sph_lj_gpu_update_drhoE(void **drhoE_ptr);
+double sph_lj_gpu_bytes();
+
+/* ---------------------------------------------------------------------- */
+
+PairSPHLJGPU::PairSPHLJGPU(LAMMPS *lmp) : PairSPHLJ(lmp), gpu_mode(GPU_FORCE)
+{
+  drhoE_pinned = nullptr;
+  respa_enable = 0;
+  reinitflag = 0;
+  cpu_time = 0.0;
+  suffix_flag |= Suffix::GPU;
+  GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
+}
+
+/* ----------------------------------------------------------------------
+   free all arrays
+------------------------------------------------------------------------- */
+
+PairSPHLJGPU::~PairSPHLJGPU()
+{
+  sph_lj_gpu_clear();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairSPHLJGPU::compute(int eflag, int vflag)
+{
+  ev_init(eflag, vflag);
+
+  int nall = atom->nlocal + atom->nghost;
+  int inum, host_start;
+
+  bool success = true;
+  int *ilist, *numneigh, **firstneigh;
+
+  double *rho = atom->rho;
+  double *esph = atom->esph;
+  double *cv = atom->cv;
+  sph_lj_gpu_get_extra_data(rho, esph, cv);
+
+  if (gpu_mode != GPU_FORCE) {
+    double sublo[3], subhi[3];
+    if (domain->triclinic == 0) {
+      sublo[0] = domain->sublo[0];
+      sublo[1] = domain->sublo[1];
+      sublo[2] = domain->sublo[2];
+      subhi[0] = domain->subhi[0];
+      subhi[1] = domain->subhi[1];
+      subhi[2] = domain->subhi[2];
+    } else {
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
+    }
+    inum = atom->nlocal;
+    firstneigh = sph_lj_gpu_compute_n(
+        neighbor->ago, inum, nall, atom->x, atom->type,
+        sublo, subhi, atom->tag, atom->nspecial, atom->special, eflag, vflag,
+        eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
+        cpu_time, success, atom->v);
+  } else {
+    inum = list->inum;
+    ilist = list->ilist;
+    numneigh = list->numneigh;
+    firstneigh = list->firstneigh;
+    sph_lj_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
+                       ilist, numneigh, firstneigh, eflag, vflag,
+                       eflag_atom, vflag_atom, host_start, cpu_time, success,
+                       atom->tag, atom->v);
+  }
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");
+
+  // get the drho and dE from device
+
+  double *drho = atom->drho;
+  double *desph = atom->desph;
+  sph_lj_gpu_update_drhoE(&drhoE_pinned);
+
+  int nlocal = atom->nlocal;
+  if (acc_float) {
+    auto drhoE_ptr = (float *)drhoE_pinned;
+    int idx = 0;
+    for (int i = 0; i < nlocal; i++) {
+      drho[i] = drhoE_ptr[idx];
+      desph[i] = drhoE_ptr[idx+1];
+      idx += 2;
+    }
+
+  } else {
+    auto drhoE_ptr = (double *)drhoE_pinned;
+    int idx = 0;
+    for (int i = 0; i < nlocal; i++) {
+      drho[i] = drhoE_ptr[idx];
+      desph[i] = drhoE_ptr[idx+1];
+      idx += 2;
+    }
+  }
+
+  if (atom->molecular != Atom::ATOMIC && neighbor->ago == 0)
+    neighbor->build_topology();
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+void PairSPHLJGPU::init_style()
+{
+
+  // Repeat cutsq calculation because done after call to init_style
+  double maxcut = -1.0;
+  double mcut;
+  for (int i = 1; i <= atom->ntypes; i++) {
+    for (int j = i; j <= atom->ntypes; j++) {
+      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
+        mcut = init_one(i, j);
+        mcut *= mcut;
+        if (mcut > maxcut) maxcut = mcut;
+        cutsq[i][j] = cutsq[j][i] = mcut;
+      } else
+        cutsq[i][j] = cutsq[j][i] = 0.0;
+    }
+  }
+  double cell_size = sqrt(maxcut) + neighbor->skin;
+
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
+  int mnf = 5e-2 * neighbor->oneatom;
+  int success =
+      sph_lj_gpu_init(atom->ntypes + 1, cutsq, cut, viscosity, atom->mass,
+                      domain->dimension, force->special_lj, atom->nlocal,
+                      atom->nlocal + atom->nghost,
+                      mnf, maxspecial, cell_size, gpu_mode, screen);
+  GPU_EXTRA::check_flag(success, error, world);
+
+  acc_float = Info::has_accelerator_feature("GPU", "precision", "single");
+
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairSPHLJGPU::memory_usage()
+{
+  double bytes = Pair::memory_usage();
+  return bytes + sph_lj_gpu_bytes();
+}
diff --git a/src/INTEL/npair_full_bin_intel.h b/src/GPU/pair_sph_lj_gpu.h
similarity index 59%
rename from src/INTEL/npair_full_bin_intel.h
rename to src/GPU/pair_sph_lj_gpu.h
index 58ff21d22c..9aae3c2d6a 100644
--- a/src/INTEL/npair_full_bin_intel.h
+++ b/src/GPU/pair_sph_lj_gpu.h
@@ -1,4 +1,3 @@
-// clang-format off
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    https://www.lammps.org/, Sandia National Laboratories
@@ -12,33 +11,38 @@
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
-#ifdef NPAIR_CLASS
+#ifdef PAIR_CLASS
 // clang-format off
-NPairStyle(full/bin/intel,
-           NPairFullBinIntel,
-           NP_FULL | NP_BIN | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI |
-           NP_INTEL);
+PairStyle(sph/lj/gpu,PairSPHLJGPU);
 // clang-format on
 #else
 
-#ifndef LMP_NPAIR_FULL_BIN_INTEL_H
-#define LMP_NPAIR_FULL_BIN_INTEL_H
+#ifndef LMP_PAIR_SPH_LJ_GPU_H
+#define LMP_PAIR_SPH_LJ_GPU_H
 
-#include "fix_intel.h"
-#include "npair_intel.h"
+#include "pair_sph_lj.h"
 
 namespace LAMMPS_NS {
 
-class NPairFullBinIntel : public NPairIntel {
+class PairSPHLJGPU : public PairSPHLJ {
  public:
-  NPairFullBinIntel(class LAMMPS *);
-  void build(class NeighList *) override;
+  PairSPHLJGPU(LAMMPS *lmp);
+  ~PairSPHLJGPU() override;
+  void cpu_compute(int, int, int, int, int *, int *, int **);
+  void compute(int, int) override;
+  void init_style() override;
+  double memory_usage() override;
+
+  enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
+
+  void *drhoE_pinned;
+  bool acc_float;
 
  private:
-  template <class flt_t, class acc_t> void fbi(NeighList *, IntelBuffers<flt_t, acc_t> *);
+  int gpu_mode;
+  double cpu_time;
 };
 
 }    // namespace LAMMPS_NS
-
 #endif
 #endif
diff --git a/src/GPU/pair_sph_taitwater_gpu.cpp b/src/GPU/pair_sph_taitwater_gpu.cpp
new file mode 100644
index 0000000000..6f2762c144
--- /dev/null
+++ b/src/GPU/pair_sph_taitwater_gpu.cpp
@@ -0,0 +1,199 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Trung Dac Nguyen (U Chicago)
+------------------------------------------------------------------------- */
+
+#include "pair_sph_taitwater_gpu.h"
+
+#include "atom.h"
+#include "domain.h"
+#include "error.h"
+#include "force.h"
+#include "gpu_extra.h"
+#include "info.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+#include "suffix.h"
+#include "update.h"
+
+#include <cmath>
+
+using namespace LAMMPS_NS;
+
+// External functions from cuda library for atom decomposition
+
+int sph_taitwater_gpu_init(const int ntypes, double **cutsq, double** host_cut,
+                           double **host_viscosity, double* host_mass, double* host_rho0,
+                           double* host_soundspeed, double* host_B, const int dimension,
+                           double *special_lj, const int inum, const int nall,
+                           const int max_nbors,  const int maxspecial,
+                           const double cell_size, int &gpu_mode, FILE *screen);
+void sph_taitwater_gpu_clear();
+int **sph_taitwater_gpu_compute_n(const int ago, const int inum_full, const int nall,
+                         double **host_x, int *host_type, double *sublo,
+                         double *subhi, tagint *tag, int **nspecial,
+                         tagint **special, const bool eflag, const bool vflag,
+                         const bool eatom, const bool vatom, int &host_start,
+                         int **ilist, int **jnum, const double cpu_time, bool &success,
+                         double **host_v);
+void sph_taitwater_gpu_compute(const int ago, const int inum_full, const int nall,
+                        double **host_x, int *host_type, int *ilist, int *numj,
+                        int **firstneigh, const bool eflag, const bool vflag,
+                        const bool eatom, const bool vatom, int &host_start,
+                        const double cpu_time, bool &success, tagint *tag,
+                        double **host_v);
+void sph_taitwater_gpu_get_extra_data(double *host_rho);
+void sph_taitwater_gpu_update_drhoE(void **drhoE_ptr);
+double sph_taitwater_gpu_bytes();
+
+/* ---------------------------------------------------------------------- */
+
+PairSPHTaitwaterGPU::PairSPHTaitwaterGPU(LAMMPS *lmp) : PairSPHTaitwater(lmp), gpu_mode(GPU_FORCE)
+{
+  drhoE_pinned = nullptr;
+  respa_enable = 0;
+  reinitflag = 0;
+  cpu_time = 0.0;
+  suffix_flag |= Suffix::GPU;
+  GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
+}
+
+/* ----------------------------------------------------------------------
+   free all arrays
+------------------------------------------------------------------------- */
+
+PairSPHTaitwaterGPU::~PairSPHTaitwaterGPU()
+{
+  sph_taitwater_gpu_clear();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairSPHTaitwaterGPU::compute(int eflag, int vflag)
+{
+  ev_init(eflag, vflag);
+
+  int nall = atom->nlocal + atom->nghost;
+  int inum, host_start;
+
+  bool success = true;
+  int *ilist, *numneigh, **firstneigh;
+
+  double *rho = atom->rho;
+  sph_taitwater_gpu_get_extra_data(rho);
+
+  if (gpu_mode != GPU_FORCE) {
+    double sublo[3], subhi[3];
+    if (domain->triclinic == 0) {
+      sublo[0] = domain->sublo[0];
+      sublo[1] = domain->sublo[1];
+      sublo[2] = domain->sublo[2];
+      subhi[0] = domain->subhi[0];
+      subhi[1] = domain->subhi[1];
+      subhi[2] = domain->subhi[2];
+    } else {
+      domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
+    }
+    inum = atom->nlocal;
+    firstneigh = sph_taitwater_gpu_compute_n(
+        neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag, atom->nspecial,
+        atom->special, eflag, vflag, eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
+        cpu_time, success, atom->v);
+  } else {
+    inum = list->inum;
+    ilist = list->ilist;
+    numneigh = list->numneigh;
+    firstneigh = list->firstneigh;
+    sph_taitwater_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
+                       eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success,
+                       atom->tag, atom->v);
+  }
+  if (!success) error->one(FLERR, "Insufficient memory on accelerator");
+
+  // get the drho and dE from device
+
+  double *drho = atom->drho;
+  double *desph = atom->desph;
+  sph_taitwater_gpu_update_drhoE(&drhoE_pinned);
+
+  int nlocal = atom->nlocal;
+  if (acc_float) {
+    auto drhoE_ptr = (float *)drhoE_pinned;
+    int idx = 0;
+    for (int i = 0; i < nlocal; i++) {
+      drho[i] = drhoE_ptr[idx];
+      desph[i] = drhoE_ptr[idx+1];
+      idx += 2;
+    }
+
+  } else {
+    auto drhoE_ptr = (double *)drhoE_pinned;
+    int idx = 0;
+    for (int i = 0; i < nlocal; i++) {
+      drho[i] = drhoE_ptr[idx];
+      desph[i] = drhoE_ptr[idx+1];
+      idx += 2;
+    }
+  }
+
+  if (atom->molecular != Atom::ATOMIC && neighbor->ago == 0)
+    neighbor->build_topology();
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+void PairSPHTaitwaterGPU::init_style()
+{
+
+  // Repeat cutsq calculation because done after call to init_style
+  double maxcut = -1.0;
+  double mcut;
+  for (int i = 1; i <= atom->ntypes; i++) {
+    for (int j = i; j <= atom->ntypes; j++) {
+      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
+        mcut = init_one(i, j);
+        mcut *= mcut;
+        if (mcut > maxcut) maxcut = mcut;
+        cutsq[i][j] = cutsq[j][i] = mcut;
+      } else
+        cutsq[i][j] = cutsq[j][i] = 0.0;
+    }
+  }
+  double cell_size = sqrt(maxcut) + neighbor->skin;
+
+  int maxspecial = 0;
+  if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
+  int mnf = 5e-2 * neighbor->oneatom;
+  int success =
+      sph_taitwater_gpu_init(atom->ntypes + 1, cutsq, cut, viscosity, atom->mass,
+                             rho0, soundspeed, B, domain->dimension, force->special_lj,
+                             atom->nlocal, atom->nlocal + atom->nghost,
+                             mnf, maxspecial, cell_size, gpu_mode, screen);
+  GPU_EXTRA::check_flag(success, error, world);
+
+  acc_float = Info::has_accelerator_feature("GPU", "precision", "single");
+
+  if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
+}
+
+/* ---------------------------------------------------------------------- */
+
+double PairSPHTaitwaterGPU::memory_usage()
+{
+  double bytes = Pair::memory_usage();
+  return bytes + sph_taitwater_gpu_bytes();
+}
diff --git a/src/npair_full_multi_old.h b/src/GPU/pair_sph_taitwater_gpu.h
similarity index 58%
rename from src/npair_full_multi_old.h
rename to src/GPU/pair_sph_taitwater_gpu.h
index cb78cf3d76..df8119a3c0 100644
--- a/src/npair_full_multi_old.h
+++ b/src/GPU/pair_sph_taitwater_gpu.h
@@ -11,28 +11,38 @@
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
-#ifdef NPAIR_CLASS
+#ifdef PAIR_CLASS
 // clang-format off
-NPairStyle(full/multi/old,
-           NPairFullMultiOld,
-           NP_FULL | NP_MULTI_OLD | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+PairStyle(sph/taitwater/gpu,PairSPHTaitwaterGPU);
 // clang-format on
 #else
 
-#ifndef LMP_NPAIR_FULL_MULTI_OLD_H
-#define LMP_NPAIR_FULL_MULTI_OLD_H
+#ifndef LMP_PAIR_SPH_TAITWATER_GPU_H
+#define LMP_PAIR_SPH_TAITWATER_GPU_H
 
-#include "npair.h"
+#include "pair_sph_taitwater.h"
 
 namespace LAMMPS_NS {
 
-class NPairFullMultiOld : public NPair {
+class PairSPHTaitwaterGPU : public PairSPHTaitwater {
  public:
-  NPairFullMultiOld(class LAMMPS *);
-  void build(class NeighList *) override;
+  PairSPHTaitwaterGPU(LAMMPS *lmp);
+  ~PairSPHTaitwaterGPU() override;
+  void cpu_compute(int, int, int, int, int *, int *, int **);
+  void compute(int, int) override;
+  void init_style() override;
+  double memory_usage() override;
+
+  enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
+
+  void *drhoE_pinned;
+  bool acc_float;
+
+ private:
+  int gpu_mode;
+  double cpu_time;
 };
 
 }    // namespace LAMMPS_NS
-
 #endif
 #endif
diff --git a/src/GPU/pair_sw_gpu.cpp b/src/GPU/pair_sw_gpu.cpp
index 7645218a85..ac604f3a74 100644
--- a/src/GPU/pair_sw_gpu.cpp
+++ b/src/GPU/pair_sw_gpu.cpp
@@ -49,8 +49,8 @@ void sw_gpu_compute(const int ago, const int nloc, const int nall, const int ln,
                     const double cpu_time, bool &success);
 double sw_gpu_bytes();
 
-#define MAXLINE 1024
-#define DELTA 4
+static constexpr int MAXLINE = 1024;
+static constexpr int DELTA = 4;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/GPU/pair_tersoff_gpu.cpp b/src/GPU/pair_tersoff_gpu.cpp
index 8610a3880c..c3c9e6cfab 100644
--- a/src/GPU/pair_tersoff_gpu.cpp
+++ b/src/GPU/pair_tersoff_gpu.cpp
@@ -54,8 +54,8 @@ void tersoff_gpu_compute(const int ago, const int nlocal, const int nall, const
                          int &host_start, const double cpu_time, bool &success);
 double tersoff_gpu_bytes();
 
-#define MAXLINE 1024
-#define DELTA 4
+static constexpr int MAXLINE = 1024;
+static constexpr int DELTA = 4;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/GPU/pair_yukawa_colloid_gpu.cpp b/src/GPU/pair_yukawa_colloid_gpu.cpp
index c1e785380d..db199a4701 100644
--- a/src/GPU/pair_yukawa_colloid_gpu.cpp
+++ b/src/GPU/pair_yukawa_colloid_gpu.cpp
@@ -123,7 +123,7 @@ void PairYukawaColloidGPU::compute(int eflag, int vflag)
 
 void PairYukawaColloidGPU::init_style()
 {
-  if (!atom->sphere_flag) error->all(FLERR, "Pair yukawa/colloid/gpu requires atom style sphere");
+  if (!atom->radius_flag) error->all(FLERR, "Pair style yukawa/colloid/gpu requires atom attribute radius");
 
   // Repeat cutsq calculation because done after call to init_style
   double maxcut = -1.0;
diff --git a/src/GPU/pppm_gpu.cpp b/src/GPU/pppm_gpu.cpp
index a2c733e7ed..2c092f6a8e 100644
--- a/src/GPU/pppm_gpu.cpp
+++ b/src/GPU/pppm_gpu.cpp
@@ -40,22 +40,17 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define MAXORDER 7
-#define OFFSET 16384
-#define SMALL 0.00001
-#define LARGE 10000.0
-#define EPS_HOC 1.0e-7
+static constexpr int MAXORDER = 7;
+static constexpr int OFFSET = 16384;
+static constexpr double SMALL = 0.00001;
+static constexpr double LARGE = 10000.0;
+static constexpr double EPS_HOC = 1.0e-7;
 
 enum{REVERSE_RHO_GPU,REVERSE_RHO};
 enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM};
 
-#ifdef FFT_SINGLE
-#define ZEROF 0.0f
-#define ONEF  1.0f
-#else
-#define ZEROF 0.0
-#define ONEF  1.0
-#endif
+static constexpr FFT_SCALAR ZEROF = 0.0;
+static constexpr FFT_SCALAR ONEF =  1.0;
 
 // external functions from cuda library for atom decomposition
 
@@ -405,7 +400,8 @@ void PPPMGPU::poisson_ik()
 
   // if requested, compute energy and virial contribution
 
-  double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
+  bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm;
+  double scaleinv = 1.0 / ngridtotal;
   double s2 = scaleinv*scaleinv;
 
   if (eflag_global || vflag_global) {
diff --git a/src/GRANULAR/compute_contact_atom.cpp b/src/GRANULAR/compute_contact_atom.cpp
index 310fdb5a41..a3e71af0fe 100644
--- a/src/GRANULAR/compute_contact_atom.cpp
+++ b/src/GRANULAR/compute_contact_atom.cpp
@@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    https://www.lammps.org/, Sandia National Laboratories
@@ -51,7 +50,7 @@ ComputeContactAtom::ComputeContactAtom(LAMMPS *lmp, int narg, char **arg) :
 
   // error checks
 
-  if (!atom->sphere_flag) error->all(FLERR, "Compute contact/atom requires atom style sphere");
+  if (!atom->radius_flag) error->all(FLERR, "Compute contact/atom requires atom attribute radius");
 }
 
 /* ---------------------------------------------------------------------- */
@@ -67,10 +66,10 @@ ComputeContactAtom::~ComputeContactAtom()
 void ComputeContactAtom::init()
 {
   if (force->pair == nullptr)
-    error->all(FLERR,"Compute contact/atom requires a pair style be defined");
+    error->all(FLERR, "Compute contact/atom requires a pair style be defined");
 
   if (modify->get_compute_by_style("contact/atom").size() > 1 && comm->me == 0)
-    error->warning(FLERR,"More than one compute contact/atom");
+    error->warning(FLERR, "More than one compute contact/atom");
 
   // need an occasional neighbor list
 
@@ -88,10 +87,10 @@ void ComputeContactAtom::init_list(int /*id*/, NeighList *ptr)
 
 void ComputeContactAtom::compute_peratom()
 {
-  int i,j,ii,jj,inum,jnum;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  double radi,radsum,radsumsq;
-  int *ilist,*jlist,*numneigh,**firstneigh;
+  int i, j, ii, jj, inum, jnum;
+  double xtmp, ytmp, ztmp, delx, dely, delz, rsq;
+  double radi, radsum, radsumsq;
+  int *ilist, *jlist, *numneigh, **firstneigh;
 
   invoked_peratom = update->ntimestep;
 
@@ -100,7 +99,7 @@ void ComputeContactAtom::compute_peratom()
   if (atom->nmax > nmax) {
     memory->destroy(contact);
     nmax = atom->nmax;
-    memory->create(contact,nmax,"contact/atom:contact");
+    memory->create(contact, nmax, "contact/atom:contact");
     vector_atom = contact;
   }
 
@@ -130,7 +129,7 @@ void ComputeContactAtom::compute_peratom()
     i = ilist[ii];
 
     // Only proceed if i is either part of the compute group or will contribute to contacts
-    if (! (mask[i] & groupbit) && ! (mask[i] & jgroupbit)) continue;
+    if (!(mask[i] & groupbit) && !(mask[i] & jgroupbit)) continue;
 
     xtmp = x[i][0];
     ytmp = x[i][1];
@@ -146,7 +145,7 @@ void ComputeContactAtom::compute_peratom()
       // Only tally for atoms in compute group (groupbit) if neighbor is in group2 (jgroupbit)
       update_i_flag = (mask[i] & groupbit) && (mask[j] & jgroupbit);
       update_j_flag = (mask[j] & groupbit) && (mask[i] & jgroupbit);
-      if (! update_i_flag && ! update_j_flag) continue;
+      if (!update_i_flag && !update_j_flag) continue;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
@@ -170,12 +169,11 @@ void ComputeContactAtom::compute_peratom()
 
 int ComputeContactAtom::pack_reverse_comm(int n, int first, double *buf)
 {
-  int i,m,last;
+  int i, m, last;
 
   m = 0;
   last = first + n;
-  for (i = first; i < last; i++)
-    buf[m++] = contact[i];
+  for (i = first; i < last; i++) buf[m++] = contact[i];
   return m;
 }
 
@@ -183,7 +181,7 @@ int ComputeContactAtom::pack_reverse_comm(int n, int first, double *buf)
 
 void ComputeContactAtom::unpack_reverse_comm(int n, int *list, double *buf)
 {
-  int i,j,m;
+  int i, j, m;
 
   m = 0;
   for (i = 0; i < n; i++) {
@@ -198,6 +196,6 @@ void ComputeContactAtom::unpack_reverse_comm(int n, int *list, double *buf)
 
 double ComputeContactAtom::memory_usage()
 {
-  double bytes = (double)nmax * sizeof(double);
+  double bytes = (double) nmax * sizeof(double);
   return bytes;
 }
diff --git a/src/GRANULAR/compute_fabric.cpp b/src/GRANULAR/compute_fabric.cpp
index fb95a8b446..adaf242c92 100644
--- a/src/GRANULAR/compute_fabric.cpp
+++ b/src/GRANULAR/compute_fabric.cpp
@@ -11,6 +11,10 @@
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
+/* ----------------------------------------------------------------------
+   Contributing authors: Joel Clemmer (SNL), Ishan Srivastava (LBNL)
+------------------------------------------------------------------------- */
+
 #include "compute_fabric.h"
 
 #include "atom.h"
diff --git a/src/GRANULAR/fix_damping_cundall.cpp b/src/GRANULAR/fix_damping_cundall.cpp
index cf17e91050..cf3dd46b96 100644
--- a/src/GRANULAR/fix_damping_cundall.cpp
+++ b/src/GRANULAR/fix_damping_cundall.cpp
@@ -40,9 +40,9 @@ FixDampingCundall::FixDampingCundall(LAMMPS *_lmp, int narg, char **arg) :
 {
   dynamic_group_allow = 1;
 
-  if (!atom->sphere_flag) error->all(FLERR, "Fix damping/cundall requires atom style sphere");
+  if (!atom->omega_flag) error->all(FLERR, "Fix damping/cundall requires atom attribute omega");
 
-  if (narg < 5) error->all(FLERR, "Illegal fix damping/cundall command");
+  if (narg < 5) utils::missing_cmd_args(FLERR, "fix damping/cundall", error);
 
   gamma_lin = utils::numeric(FLERR, arg[3], false, lmp);
   gamma_ang = utils::numeric(FLERR, arg[4], false, lmp);
diff --git a/src/GRANULAR/fix_wall_gran.cpp b/src/GRANULAR/fix_wall_gran.cpp
index 550d2e17db..5930280053 100644
--- a/src/GRANULAR/fix_wall_gran.cpp
+++ b/src/GRANULAR/fix_wall_gran.cpp
@@ -42,7 +42,7 @@ using namespace FixConst;
 using namespace MathConst;
 using namespace MathExtra;
 
-#define BIG 1.0e20
+static constexpr double BIG = 1.0e20;
 
 // XYZ PLANE need to be 0,1,2
 
@@ -55,10 +55,10 @@ FixWallGran::FixWallGran(LAMMPS *lmp, int narg, char **arg) :
   Fix(lmp, narg, arg), idregion(nullptr), tstr(nullptr), history_one(nullptr),
   fix_rigid(nullptr), mass_rigid(nullptr)
 {
-  if (narg < 4) error->all(FLERR,"Illegal fix wall/gran command");
+  if (narg < 4) utils::missing_cmd_args(FLERR,"fix wall/gran", error);
 
-  if (!atom->sphere_flag)
-    error->all(FLERR,"Fix wall/gran requires atom style sphere");
+  if (!atom->omega_flag) error->all(FLERR,"Fix wall/gran requires atom attribute omega");
+  if (!atom->radius_flag) error->all(FLERR,"Fix wall/gran requires atom attribute radius");
 
   create_attribute = 1;
 
@@ -110,7 +110,7 @@ FixWallGran::FixWallGran(LAMMPS *lmp, int narg, char **arg) :
         model->limit_damping = 1;
         iarg += 1;
       } else {
-        error->all(FLERR, "Illegal fix wall/gran command");
+        error->all(FLERR, "Unknown fix wall/gran keyword {}", arg[iarg]);
       }
     }
   }
@@ -281,8 +281,8 @@ FixWallGran::~FixWallGran()
   // delete local storage
 
   delete model;
-  delete [] tstr;
-  delete [] idregion;
+  delete[] tstr;
+  delete[] idregion;
   memory->destroy(history_one);
   memory->destroy(mass_rigid);
 }
diff --git a/src/GRANULAR/pair_gran_hooke_history.cpp b/src/GRANULAR/pair_gran_hooke_history.cpp
index 98191e0564..79bf2b87aa 100644
--- a/src/GRANULAR/pair_gran_hooke_history.cpp
+++ b/src/GRANULAR/pair_gran_hooke_history.cpp
@@ -435,10 +435,10 @@ void PairGranHookeHistory::init_style()
 
   // error and warning checks
 
-  if (!atom->radius_flag || !atom->rmass_flag)
-    error->all(FLERR, "Pair granular requires atom attributes radius, rmass");
+  if (!atom->radius_flag || !atom->rmass_flag || !atom->omega_flag)
+    error->all(FLERR, "Pair gran/h* requires atom attributes radius, rmass, omega");
   if (comm->ghost_velocity == 0)
-    error->all(FLERR, "Pair granular requires ghost atoms store velocity");
+    error->all(FLERR, "Pair gran/h* requires ghost atoms store velocity");
 
   // need a granular neighbor list
 
diff --git a/src/GRANULAR/pair_granular.cpp b/src/GRANULAR/pair_granular.cpp
index 30f272791e..119feb1c38 100644
--- a/src/GRANULAR/pair_granular.cpp
+++ b/src/GRANULAR/pair_granular.cpp
@@ -401,8 +401,8 @@ void PairGranular::init_style()
 {
   // error and warning checks
 
-  if (!atom->radius_flag || !atom->rmass_flag)
-    error->all(FLERR,"Pair granular requires atom attributes radius, rmass");
+  if (!atom->radius_flag || !atom->rmass_flag || !atom->omega_flag)
+    error->all(FLERR,"Pair granular requires atom attributes radius, rmass, omega");
   if (comm->ghost_velocity == 0)
     error->all(FLERR,"Pair granular requires ghost atoms store velocity");
 
diff --git a/src/INTEL/fix_intel.cpp b/src/INTEL/fix_intel.cpp
index cb60149885..30d119dd6a 100644
--- a/src/INTEL/fix_intel.cpp
+++ b/src/INTEL/fix_intel.cpp
@@ -553,6 +553,9 @@ void FixIntel::kspace_init_check()
 
   if (intel_pair == 0)
     error->all(FLERR,"Intel styles for kspace require intel pair style.");
+
+  if (utils::strmatch(update->integrate_style, "^verlet/split"))
+    error->all(FLERR,"Intel styles for kspace are not compatible with run_style verlet/split");
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/INTEL/fix_nh_intel.cpp b/src/INTEL/fix_nh_intel.cpp
index 688101ab13..5455576a1c 100644
--- a/src/INTEL/fix_nh_intel.cpp
+++ b/src/INTEL/fix_nh_intel.cpp
@@ -33,7 +33,7 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-#define TILTMAX 1.5
+static constexpr double TILTMAX = 1.5;
 
 enum{NOBIAS,BIAS};
 enum{ISO,ANISO,TRICLINIC};
diff --git a/src/INTEL/fix_nve_asphere_intel.cpp b/src/INTEL/fix_nve_asphere_intel.cpp
index 7e429901a4..848afa20cc 100644
--- a/src/INTEL/fix_nve_asphere_intel.cpp
+++ b/src/INTEL/fix_nve_asphere_intel.cpp
@@ -30,7 +30,7 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-#define INERTIA 0.2          // moment of inertia prefactor for ellipsoid
+static constexpr double INERTIA = 0.2;          // moment of inertia prefactor for ellipsoid
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/INTEL/npair_full_bin_ghost_intel.cpp b/src/INTEL/npair_bin_ghost_intel.cpp
similarity index 99%
rename from src/INTEL/npair_full_bin_ghost_intel.cpp
rename to src/INTEL/npair_bin_ghost_intel.cpp
index 920c0c559a..1210f33b9a 100644
--- a/src/INTEL/npair_full_bin_ghost_intel.cpp
+++ b/src/INTEL/npair_bin_ghost_intel.cpp
@@ -16,7 +16,7 @@
    Contributing authors: W. Michael Brown (Intel)
 ------------------------------------------------------------------------- */
 
-#include "npair_full_bin_ghost_intel.h"
+#include "npair_bin_ghost_intel.h"
 
 #include "atom.h"
 #include "comm.h"
diff --git a/src/INTEL/npair_full_bin_ghost_intel.h b/src/INTEL/npair_bin_ghost_intel.h
similarity index 95%
rename from src/INTEL/npair_full_bin_ghost_intel.h
rename to src/INTEL/npair_bin_ghost_intel.h
index 4ae5ddad5f..eada3237bc 100644
--- a/src/INTEL/npair_full_bin_ghost_intel.h
+++ b/src/INTEL/npair_bin_ghost_intel.h
@@ -25,8 +25,8 @@ NPairStyle(full/bin/ghost/intel,
 // clang-format on
 #else
 
-#ifndef LMP_NPAIR_FULL_BIN_GHOST_INTEL_H
-#define LMP_NPAIR_FULL_BIN_GHOST_INTEL_H
+#ifndef LMP_NPAIR_BIN_GHOST_INTEL_H
+#define LMP_NPAIR_BIN_GHOST_INTEL_H
 
 #include "npair_intel.h"
 
diff --git a/src/INTEL/npair_bin_intel.cpp b/src/INTEL/npair_bin_intel.cpp
new file mode 100644
index 0000000000..f4942022ec
--- /dev/null
+++ b/src/INTEL/npair_bin_intel.cpp
@@ -0,0 +1,298 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: W. Michael Brown (Intel)
+------------------------------------------------------------------------- */
+
+#include "npair_bin_intel.h"
+
+#include "atom.h"
+#include "comm.h"
+#include "error.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+
+using namespace LAMMPS_NS;
+
+
+/* ---------------------------------------------------------------------- */
+
+NPairHalfBinNewtonIntel::NPairHalfBinNewtonIntel(LAMMPS *lmp) :
+  NPairIntel(lmp) {}
+
+/* ----------------------------------------------------------------------
+   binned neighbor list construction with full Newton's 3rd law
+   each owned atom i checks its own bin and other bins in Newton stencil
+   every pair stored exactly once by some processor
+------------------------------------------------------------------------- */
+
+void NPairHalfBinNewtonIntel::build(NeighList *list)
+{
+  if (nstencil / 2 > INTEL_MAX_STENCIL_CHECK)
+    error->all(FLERR, "Too many neighbor bins for INTEL package.");
+
+  #ifdef _LMP_INTEL_OFFLOAD
+  if (exclude)
+    error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
+  #endif
+
+  if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
+    hbni(list, _fix->get_mixed_buffers());
+  else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
+    hbni(list, _fix->get_double_buffers());
+  else
+    hbni(list, _fix->get_single_buffers());
+
+  _fix->stop_watch(TIME_HOST_NEIGHBOR);
+}
+
+template <class flt_t, class acc_t>
+void NPairHalfBinNewtonIntel::
+hbni(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) {
+  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
+  list->inum = nlocal;
+
+  int host_start = _fix->host_start_neighbor();
+  const int off_end = _fix->offload_end_neighbor();
+
+  #ifdef _LMP_INTEL_OFFLOAD
+  if (off_end) grow_stencil();
+  if (_fix->full_host_list()) host_start = 0;
+  int offload_noghost = _fix->offload_noghost();
+  #endif
+
+  buffers->grow_list(list, atom->nlocal, comm->nthreads, 0, off_end);
+
+  int need_ic = 0;
+  if (atom->molecular != Atom::ATOMIC)
+    dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax,
+                         neighbor->cutneighmax);
+
+  #ifdef _LMP_INTEL_OFFLOAD
+  if (need_ic) {
+    if (offload_noghost) {
+      bin_newton<flt_t,acc_t,1,1,0,0,0>(1, list, buffers, 0, off_end);
+      bin_newton<flt_t,acc_t,1,1,0,0,0>(0, list, buffers, host_start, nlocal,
+                                        off_end);
+    } else {
+      bin_newton<flt_t,acc_t,0,1,0,0,0>(1, list, buffers, 0, off_end);
+      bin_newton<flt_t,acc_t,0,1,0,0,0>(0, list, buffers, host_start, nlocal);
+    }
+  } else {
+    if (offload_noghost) {
+      bin_newton<flt_t,acc_t,1,0,0,0,0>(1, list, buffers, 0, off_end);
+      bin_newton<flt_t,acc_t,1,0,0,0,0>(0, list, buffers, host_start, nlocal,
+                                        off_end);
+    } else {
+      bin_newton<flt_t,acc_t,0,0,0,0,0>(1, list, buffers, 0, off_end);
+      bin_newton<flt_t,acc_t,0,0,0,0,0>(0, list, buffers, host_start, nlocal);
+    }
+  }
+  #else
+  if (need_ic)
+    bin_newton<flt_t,acc_t,0,1,0,0,0>(0, list, buffers, host_start, nlocal);
+  else
+    bin_newton<flt_t,acc_t,0,0,0,0,0>(0, list, buffers, host_start, nlocal);
+  #endif
+}
+
+
+/* ---------------------------------------------------------------------- */
+
+NPairHalfBinNewtonTriIntel::NPairHalfBinNewtonTriIntel(LAMMPS *lmp) :
+  NPairIntel(lmp) {}
+
+/* ----------------------------------------------------------------------
+   binned neighbor list construction with Newton's 3rd law for triclinic
+   each owned atom i checks its own bin and other bins in triclinic stencil
+   every pair stored exactly once by some processor
+------------------------------------------------------------------------- */
+
+void NPairHalfBinNewtonTriIntel::build(NeighList *list)
+{
+  if (nstencil > INTEL_MAX_STENCIL)
+    error->all(FLERR, "Too many neighbor bins for INTEL package.");
+
+  #ifdef _LMP_INTEL_OFFLOAD
+  if (exclude)
+    error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
+  #endif
+
+  if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
+    hbnti(list, _fix->get_mixed_buffers());
+  else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
+    hbnti(list, _fix->get_double_buffers());
+  else
+    hbnti(list, _fix->get_single_buffers());
+
+  _fix->stop_watch(TIME_HOST_NEIGHBOR);
+}
+
+template <class flt_t, class acc_t>
+void NPairHalfBinNewtonTriIntel::
+hbnti(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) {
+  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
+  list->inum = nlocal;
+
+  int host_start = _fix->host_start_neighbor();
+  const int off_end = _fix->offload_end_neighbor();
+
+  #ifdef _LMP_INTEL_OFFLOAD
+  if (off_end) grow_stencil();
+  if (_fix->full_host_list()) host_start = 0;
+  int offload_noghost = _fix->offload_noghost();
+  #endif
+
+  buffers->grow_list(list, atom->nlocal, comm->nthreads, 0, off_end);
+
+  int need_ic = 0;
+  if (atom->molecular != Atom::ATOMIC)
+    dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax,
+                         neighbor->cutneighmax);
+
+  #ifdef _LMP_INTEL_OFFLOAD
+  if (need_ic) {
+    if (offload_noghost) {
+      bin_newton<flt_t,acc_t,1,1,0,1,0>(1, list, buffers, 0, off_end);
+      bin_newton<flt_t,acc_t,1,1,0,1,0>(0, list, buffers, host_start, nlocal,
+                                        off_end);
+    } else {
+      bin_newton<flt_t,acc_t,0,1,0,1,0>(1, list, buffers, 0, off_end);
+      bin_newton<flt_t,acc_t,0,1,0,1,0>(0, list, buffers, host_start, nlocal);
+    }
+  } else {
+    if (offload_noghost) {
+      bin_newton<flt_t,acc_t,1,0,0,1,0>(1, list, buffers, 0, off_end);
+      bin_newton<flt_t,acc_t,1,0,0,1,0>(0, list, buffers, host_start, nlocal,
+                                        off_end);
+    } else {
+      bin_newton<flt_t,acc_t,0,0,0,1,0>(1, list, buffers, 0, off_end);
+      bin_newton<flt_t,acc_t,0,0,0,1,0>(0, list, buffers, host_start, nlocal);
+    }
+  }
+  #else
+  if (need_ic)
+    bin_newton<flt_t,acc_t,0,1,0,1,0>(0, list, buffers, host_start, nlocal);
+  else
+    bin_newton<flt_t,acc_t,0,0,0,1,0>(0, list, buffers, host_start, nlocal);
+  #endif
+}
+
+/* ---------------------------------------------------------------------- */
+
+NPairFullBinIntel::NPairFullBinIntel(LAMMPS *lmp) : NPairIntel(lmp) {}
+
+/* ----------------------------------------------------------------------
+   binned neighbor list construction for all neighbors
+   every neighbor pair appears in list of both atoms i and j
+------------------------------------------------------------------------- */
+
+void NPairFullBinIntel::build(NeighList *list)
+{
+  if (nstencil > INTEL_MAX_STENCIL_CHECK)
+    error->all(FLERR, "Too many neighbor bins for INTEL package.");
+
+  #ifdef _LMP_INTEL_OFFLOAD
+  if (exclude)
+    error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
+  #endif
+
+  if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
+    fbi(list, _fix->get_mixed_buffers());
+  else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
+    fbi(list, _fix->get_double_buffers());
+  else
+    fbi(list, _fix->get_single_buffers());
+
+  _fix->stop_watch(TIME_HOST_NEIGHBOR);
+}
+
+template <class flt_t, class acc_t>
+void NPairFullBinIntel::
+fbi(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) {
+  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
+  list->inum = nlocal;
+  list->gnum = 0;
+
+  int host_start = _fix->host_start_neighbor();;
+  const int off_end = _fix->offload_end_neighbor();
+
+  #ifdef _LMP_INTEL_OFFLOAD
+  if (off_end) grow_stencil();
+  if (_fix->full_host_list()) host_start = 0;
+  int offload_noghost = _fix->offload_noghost();
+  #endif
+
+  buffers->grow_list(list, atom->nlocal, comm->nthreads,
+                     _fix->three_body_neighbor(), off_end,
+                     _fix->nbor_pack_width());
+
+  int need_ic = 0;
+  if (atom->molecular != Atom::ATOMIC)
+    dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax,
+                         neighbor->cutneighmax);
+
+  #ifdef _LMP_INTEL_OFFLOAD
+  if (_fix->three_body_neighbor()) {
+    if (need_ic) {
+      if (offload_noghost) {
+        bin_newton<flt_t,acc_t,1,1,1,0,1>(1, list, buffers, 0, off_end);
+        bin_newton<flt_t,acc_t,1,1,1,0,1>(0, list, buffers, host_start, nlocal, off_end);
+      } else {
+        bin_newton<flt_t,acc_t,0,1,1,0,1>(1, list, buffers, 0, off_end);
+        bin_newton<flt_t,acc_t,0,1,1,0,1>(0, list, buffers, host_start, nlocal);
+      }
+    } else {
+      if (offload_noghost) {
+        bin_newton<flt_t,acc_t,1,0,1,0,1>(1, list, buffers, 0, off_end);
+        bin_newton<flt_t,acc_t,1,0,1,0,1>(0, list, buffers, host_start, nlocal, off_end);
+      } else {
+        bin_newton<flt_t,acc_t,0,0,1,0,1>(1, list, buffers, 0, off_end);
+        bin_newton<flt_t,acc_t,0,0,1,0,1>(0, list, buffers, host_start, nlocal);
+      }
+    }
+  } else {
+    if (need_ic) {
+      if (offload_noghost) {
+        bin_newton<flt_t,acc_t,1,1,1,0,0>(1, list, buffers, 0, off_end);
+        bin_newton<flt_t,acc_t,1,1,1,0,0>(0, list, buffers, host_start, nlocal, off_end);
+      } else {
+        bin_newton<flt_t,acc_t,0,1,1,0,0>(1, list, buffers, 0, off_end);
+        bin_newton<flt_t,acc_t,0,1,1,0,0>(0, list, buffers, host_start, nlocal);
+      }
+    } else {
+      if (offload_noghost) {
+        bin_newton<flt_t,acc_t,1,0,1,0,0>(1, list, buffers, 0, off_end);
+        bin_newton<flt_t,acc_t,1,0,1,0,0>(0, list, buffers, host_start, nlocal, off_end);
+      } else {
+        bin_newton<flt_t,acc_t,0,0,1,0,0>(1, list, buffers, 0, off_end);
+        bin_newton<flt_t,acc_t,0,0,1,0,0>(0, list, buffers, host_start, nlocal);
+      }
+    }
+  }
+  #else
+  if (_fix->three_body_neighbor()) {
+    if (need_ic)
+      bin_newton<flt_t,acc_t,0,1,1,0,1>(0, list, buffers, host_start, nlocal);
+    else
+      bin_newton<flt_t,acc_t,0,0,1,0,1>(0, list, buffers, host_start, nlocal);
+  } else {
+    if (need_ic)
+      bin_newton<flt_t,acc_t,0,1,1,0,0>(0, list, buffers, host_start, nlocal);
+    else
+      bin_newton<flt_t,acc_t,0,0,1,0,0>(0, list, buffers, host_start, nlocal);
+  }
+  #endif
+}
diff --git a/src/INTEL/npair_half_bin_newton_tri_intel.h b/src/INTEL/npair_bin_intel.h
similarity index 59%
rename from src/INTEL/npair_half_bin_newton_tri_intel.h
rename to src/INTEL/npair_bin_intel.h
index 8ef65c12e5..fd18f20be5 100644
--- a/src/INTEL/npair_half_bin_newton_tri_intel.h
+++ b/src/INTEL/npair_bin_intel.h
@@ -14,20 +14,38 @@
 
 #ifdef NPAIR_CLASS
 // clang-format off
+NPairStyle(half/bin/newton/intel,
+           NPairHalfBinNewtonIntel,
+           NP_HALF | NP_BIN | NP_NEWTON | NP_ORTHO | NP_INTEL);
+
 NPairStyle(half/bin/newton/tri/intel,
            NPairHalfBinNewtonTriIntel,
            NP_HALF | NP_BIN | NP_NEWTON | NP_TRI | NP_INTEL);
+
+NPairStyle(full/bin/intel,
+           NPairFullBinIntel,
+           NP_FULL | NP_BIN | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI |
+           NP_INTEL);
 // clang-format on
 #else
 
-#ifndef LMP_NPAIR_HALF_BIN_NEWTON_INTEL_TRI_H
-#define LMP_NPAIR_HALF_BIN_NEWTON_INTEL_TRI_H
+#ifndef LMP_NPAIR_BIN_INTEL_H
+#define LMP_NPAIR_BIN_INTEL_H
 
 #include "fix_intel.h"
 #include "npair_intel.h"
 
 namespace LAMMPS_NS {
 
+class NPairHalfBinNewtonIntel : public NPairIntel {
+ public:
+  NPairHalfBinNewtonIntel(class LAMMPS *);
+  void build(class NeighList *) override;
+
+ private:
+  template <class flt_t, class acc_t> void hbni(NeighList *, IntelBuffers<flt_t, acc_t> *);
+};
+
 class NPairHalfBinNewtonTriIntel : public NPairIntel {
  public:
   NPairHalfBinNewtonTriIntel(class LAMMPS *);
@@ -37,6 +55,15 @@ class NPairHalfBinNewtonTriIntel : public NPairIntel {
   template <class flt_t, class acc_t> void hbnti(NeighList *, IntelBuffers<flt_t, acc_t> *);
 };
 
+class NPairFullBinIntel : public NPairIntel {
+ public:
+  NPairFullBinIntel(class LAMMPS *);
+  void build(class NeighList *) override;
+
+ private:
+  template <class flt_t, class acc_t> void fbi(NeighList *, IntelBuffers<flt_t, acc_t> *);
+};
+
 }    // namespace LAMMPS_NS
 
 #endif
diff --git a/src/INTEL/npair_half_bin_newton_intel.cpp b/src/INTEL/npair_half_bin_newton_intel.cpp
deleted file mode 100644
index 24e8b01572..0000000000
--- a/src/INTEL/npair_half_bin_newton_intel.cpp
+++ /dev/null
@@ -1,108 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-   Contributing author: W. Michael Brown (Intel)
-------------------------------------------------------------------------- */
-
-#include "npair_half_bin_newton_intel.h"
-
-#include "atom.h"
-#include "comm.h"
-#include "error.h"
-#include "neigh_list.h"
-#include "neighbor.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfBinNewtonIntel::NPairHalfBinNewtonIntel(LAMMPS *lmp) :
-  NPairIntel(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction with full Newton's 3rd law
-   each owned atom i checks its own bin and other bins in Newton stencil
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfBinNewtonIntel::build(NeighList *list)
-{
-  if (nstencil / 2 > INTEL_MAX_STENCIL_CHECK)
-    error->all(FLERR, "Too many neighbor bins for INTEL package.");
-
-  #ifdef _LMP_INTEL_OFFLOAD
-  if (exclude)
-    error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
-  #endif
-
-  if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
-    hbni(list, _fix->get_mixed_buffers());
-  else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
-    hbni(list, _fix->get_double_buffers());
-  else
-    hbni(list, _fix->get_single_buffers());
-
-  _fix->stop_watch(TIME_HOST_NEIGHBOR);
-}
-
-template <class flt_t, class acc_t>
-void NPairHalfBinNewtonIntel::
-hbni(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) {
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  list->inum = nlocal;
-
-  int host_start = _fix->host_start_neighbor();
-  const int off_end = _fix->offload_end_neighbor();
-
-  #ifdef _LMP_INTEL_OFFLOAD
-  if (off_end) grow_stencil();
-  if (_fix->full_host_list()) host_start = 0;
-  int offload_noghost = _fix->offload_noghost();
-  #endif
-
-  buffers->grow_list(list, atom->nlocal, comm->nthreads, 0, off_end);
-
-  int need_ic = 0;
-  if (atom->molecular != Atom::ATOMIC)
-    dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax,
-                         neighbor->cutneighmax);
-
-  #ifdef _LMP_INTEL_OFFLOAD
-  if (need_ic) {
-    if (offload_noghost) {
-      bin_newton<flt_t,acc_t,1,1,0,0,0>(1, list, buffers, 0, off_end);
-      bin_newton<flt_t,acc_t,1,1,0,0,0>(0, list, buffers, host_start, nlocal,
-                                        off_end);
-    } else {
-      bin_newton<flt_t,acc_t,0,1,0,0,0>(1, list, buffers, 0, off_end);
-      bin_newton<flt_t,acc_t,0,1,0,0,0>(0, list, buffers, host_start, nlocal);
-    }
-  } else {
-    if (offload_noghost) {
-      bin_newton<flt_t,acc_t,1,0,0,0,0>(1, list, buffers, 0, off_end);
-      bin_newton<flt_t,acc_t,1,0,0,0,0>(0, list, buffers, host_start, nlocal,
-                                        off_end);
-    } else {
-      bin_newton<flt_t,acc_t,0,0,0,0,0>(1, list, buffers, 0, off_end);
-      bin_newton<flt_t,acc_t,0,0,0,0,0>(0, list, buffers, host_start, nlocal);
-    }
-  }
-  #else
-  if (need_ic)
-    bin_newton<flt_t,acc_t,0,1,0,0,0>(0, list, buffers, host_start, nlocal);
-  else
-    bin_newton<flt_t,acc_t,0,0,0,0,0>(0, list, buffers, host_start, nlocal);
-  #endif
-}
diff --git a/src/INTEL/npair_half_bin_newton_tri_intel.cpp b/src/INTEL/npair_half_bin_newton_tri_intel.cpp
deleted file mode 100644
index a903ef8e9a..0000000000
--- a/src/INTEL/npair_half_bin_newton_tri_intel.cpp
+++ /dev/null
@@ -1,108 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-   Contributing author: W. Michael Brown (Intel)
-------------------------------------------------------------------------- */
-
-#include "npair_half_bin_newton_tri_intel.h"
-
-#include "atom.h"
-#include "comm.h"
-#include "error.h"
-#include "neigh_list.h"
-#include "neighbor.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfBinNewtonTriIntel::NPairHalfBinNewtonTriIntel(LAMMPS *lmp) :
-  NPairIntel(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction with Newton's 3rd law for triclinic
-   each owned atom i checks its own bin and other bins in triclinic stencil
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfBinNewtonTriIntel::build(NeighList *list)
-{
-  if (nstencil > INTEL_MAX_STENCIL)
-    error->all(FLERR, "Too many neighbor bins for INTEL package.");
-
-  #ifdef _LMP_INTEL_OFFLOAD
-  if (exclude)
-    error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
-  #endif
-
-  if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
-    hbnti(list, _fix->get_mixed_buffers());
-  else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
-    hbnti(list, _fix->get_double_buffers());
-  else
-    hbnti(list, _fix->get_single_buffers());
-
-  _fix->stop_watch(TIME_HOST_NEIGHBOR);
-}
-
-template <class flt_t, class acc_t>
-void NPairHalfBinNewtonTriIntel::
-hbnti(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) {
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  list->inum = nlocal;
-
-  int host_start = _fix->host_start_neighbor();
-  const int off_end = _fix->offload_end_neighbor();
-
-  #ifdef _LMP_INTEL_OFFLOAD
-  if (off_end) grow_stencil();
-  if (_fix->full_host_list()) host_start = 0;
-  int offload_noghost = _fix->offload_noghost();
-  #endif
-
-  buffers->grow_list(list, atom->nlocal, comm->nthreads, 0, off_end);
-
-  int need_ic = 0;
-  if (atom->molecular != Atom::ATOMIC)
-    dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax,
-                         neighbor->cutneighmax);
-
-  #ifdef _LMP_INTEL_OFFLOAD
-  if (need_ic) {
-    if (offload_noghost) {
-      bin_newton<flt_t,acc_t,1,1,0,1,0>(1, list, buffers, 0, off_end);
-      bin_newton<flt_t,acc_t,1,1,0,1,0>(0, list, buffers, host_start, nlocal,
-                                        off_end);
-    } else {
-      bin_newton<flt_t,acc_t,0,1,0,1,0>(1, list, buffers, 0, off_end);
-      bin_newton<flt_t,acc_t,0,1,0,1,0>(0, list, buffers, host_start, nlocal);
-    }
-  } else {
-    if (offload_noghost) {
-      bin_newton<flt_t,acc_t,1,0,0,1,0>(1, list, buffers, 0, off_end);
-      bin_newton<flt_t,acc_t,1,0,0,1,0>(0, list, buffers, host_start, nlocal,
-                                        off_end);
-    } else {
-      bin_newton<flt_t,acc_t,0,0,0,1,0>(1, list, buffers, 0, off_end);
-      bin_newton<flt_t,acc_t,0,0,0,1,0>(0, list, buffers, host_start, nlocal);
-    }
-  }
-  #else
-  if (need_ic)
-    bin_newton<flt_t,acc_t,0,1,0,1,0>(0, list, buffers, host_start, nlocal);
-  else
-    bin_newton<flt_t,acc_t,0,0,0,1,0>(0, list, buffers, host_start, nlocal);
-  #endif
-}
diff --git a/src/INTEL/npair_halffull_trim_newton_intel.cpp b/src/INTEL/npair_halffull_intel.cpp
similarity index 56%
rename from src/INTEL/npair_halffull_trim_newton_intel.cpp
rename to src/INTEL/npair_halffull_intel.cpp
index b1b69734a4..134b768cc5 100644
--- a/src/INTEL/npair_halffull_trim_newton_intel.cpp
+++ b/src/INTEL/npair_halffull_intel.cpp
@@ -13,10 +13,10 @@
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
-   Contributing author: Stan Moore (SNL)
+   Contributing author: W. Michael Brown (Intel)
 ------------------------------------------------------------------------- */
 
-#include "npair_halffull_trim_newton_intel.h"
+#include "npair_halffull_intel.h"
 
 #include "atom.h"
 #include "comm.h"
@@ -31,6 +31,232 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
+NPairHalffullNewtonIntel::NPairHalffullNewtonIntel(LAMMPS *lmp) : NPair(lmp) {
+  _fix = static_cast<FixIntel *>(modify->get_fix_by_id("package_intel"));
+  if (!_fix) error->all(FLERR, "The 'package intel' command is required for /intel styles");
+}
+
+/* ----------------------------------------------------------------------
+   build half list from full list
+   pair stored once if i,j are both owned and i < j
+   if j is ghost, only store if j coords are "above and to the right" of i
+   works if full list is a skip list
+------------------------------------------------------------------------- */
+
+template <class flt_t, class acc_t>
+void NPairHalffullNewtonIntel::build_t(NeighList *list,
+                                       IntelBuffers<flt_t,acc_t> *buffers)
+{
+  const int inum_full = list->listfull->inum;
+  const int nlocal = atom->nlocal;
+  const int e_nall = nlocal + atom->nghost;
+  const ATOM_T * _noalias const x = buffers->get_x();
+  int * _noalias const ilist = list->ilist;
+  int * _noalias const numneigh = list->numneigh;
+  int ** _noalias const firstneigh = list->firstneigh;
+  const int * _noalias const ilist_full = list->listfull->ilist;
+  const int * _noalias const numneigh_full = list->listfull->numneigh;
+  const int ** _noalias const firstneigh_full = (const int ** const)list->listfull->firstneigh;  // NOLINT
+
+  const double delta = 0.01 * force->angstrom;
+  const int triclinic = domain->triclinic;
+
+  #if defined(_OPENMP)
+  #pragma omp parallel
+  #endif
+  {
+    int tid, ifrom, ito;
+    IP_PRE_omp_range_id(ifrom, ito, tid, inum_full, comm->nthreads);
+
+    // each thread has its own page allocator
+    MyPage<int> &ipage = list->ipage[tid];
+    ipage.reset();
+
+    // loop over parent full list
+    for (int ii = ifrom; ii < ito; ii++) {
+      int n = 0;
+      int *neighptr = ipage.vget();
+
+      const int i = ilist_full[ii];
+      const flt_t xtmp = x[i].x;
+      const flt_t ytmp = x[i].y;
+      const flt_t ztmp = x[i].z;
+
+      // loop over full neighbor list
+
+      const int * _noalias const jlist = firstneigh_full[i];
+      const int jnum = numneigh_full[i];
+
+      if (!triclinic) {
+        #if defined(LMP_SIMD_COMPILER)
+        #pragma vector aligned
+        #pragma ivdep
+        #endif
+        for (int jj = 0; jj < jnum; jj++) {
+          const int joriginal = jlist[jj];
+          const int j = joriginal & NEIGHMASK;
+          int addme = 1;
+          if (j < nlocal) {
+            if (i > j) addme = 0;
+          } else {
+            if (x[j].z < ztmp) addme = 0;
+            if (x[j].z == ztmp) {
+              if (x[j].y < ytmp) addme = 0;
+              if (x[j].y == ytmp && x[j].x < xtmp) addme = 0;
+            }
+          }
+          if (addme)
+            neighptr[n++] = joriginal;
+        }
+      } else {
+        #if defined(LMP_SIMD_COMPILER)
+        #pragma vector aligned
+        #pragma ivdep
+        #endif
+        for (int jj = 0; jj < jnum; jj++) {
+          const int joriginal = jlist[jj];
+          const int j = joriginal & NEIGHMASK;
+          int addme = 1;
+          if (j < nlocal) {
+            if (i > j) addme = 0;
+          } else {
+            if (fabs(x[j].z-ztmp) > delta) {
+              if (x[j].z < ztmp) addme = 0;
+            } else if (fabs(x[j].y-ytmp) > delta) {
+              if (x[j].y < ytmp) addme = 0;
+            } else {
+              if (x[j].x < xtmp) addme = 0;
+            }
+          }
+          if (addme)
+            neighptr[n++] = joriginal;
+        }
+      }
+
+      ilist[ii] = i;
+      firstneigh[i] = neighptr;
+      numneigh[i] = n;
+
+      int pad_end = n;
+      IP_PRE_neighbor_pad(pad_end, 0);
+      #if defined(LMP_SIMD_COMPILER)
+      #pragma vector aligned
+      #pragma loop_count min=1, max=INTEL_COMPILE_WIDTH-1, \
+              avg=INTEL_COMPILE_WIDTH/2
+      #endif
+      for ( ; n < pad_end; n++)
+        neighptr[n] = e_nall;
+
+      ipage.vgot(n);
+      if (ipage.status())
+        error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
+    }
+  }
+  list->inum = inum_full;
+}
+
+/* ----------------------------------------------------------------------
+   build half list from full 3-body list
+   half list is already stored as first part of 3-body list
+------------------------------------------------------------------------- */
+
+template <class flt_t>
+void NPairHalffullNewtonIntel::build_t3(NeighList *list, int *numhalf)
+{
+  const int inum_full = list->listfull->inum;
+  const int e_nall = atom->nlocal + atom->nghost;
+  int * _noalias const ilist = list->ilist;
+  int * _noalias const numneigh = list->numneigh;
+  int ** _noalias const firstneigh = list->firstneigh;
+  const int * _noalias const ilist_full = list->listfull->ilist;
+  const int * _noalias const numneigh_full = numhalf;
+  const int ** _noalias const firstneigh_full = (const int ** const)list->listfull->firstneigh;  // NOLINT
+
+  int packthreads = 1;
+  if (comm->nthreads > INTEL_HTHREADS) packthreads = comm->nthreads;
+
+  #if defined(_OPENMP)
+  #pragma omp parallel if (packthreads > 1)
+  #endif
+  {
+    int tid, ifrom, ito;
+    IP_PRE_omp_range_id(ifrom, ito, tid, inum_full, packthreads);
+
+    // each thread has its own page allocator
+    MyPage<int> &ipage = list->ipage[tid];
+    ipage.reset();
+
+    // loop over parent full list
+    for (int ii = ifrom; ii < ito; ii++) {
+      int n = 0;
+      int *neighptr = ipage.vget();
+
+      const int i = ilist_full[ii];
+
+      // loop over full neighbor list
+
+      const int * _noalias const jlist = firstneigh_full[i];
+      const int jnum = numneigh_full[ii];
+
+      #if defined(LMP_SIMD_COMPILER)
+      #pragma vector aligned
+      #pragma ivdep
+      #endif
+      for (int jj = 0; jj < jnum; jj++) {
+        const int joriginal = jlist[jj];
+        neighptr[n++] = joriginal;
+      }
+
+      ilist[ii] = i;
+      firstneigh[i] = neighptr;
+      numneigh[i] = n;
+
+      int pad_end = n;
+      IP_PRE_neighbor_pad(pad_end, 0);
+      #if defined(LMP_SIMD_COMPILER)
+      #pragma vector aligned
+      #pragma loop_count min=1, max=INTEL_COMPILE_WIDTH-1, \
+              avg=INTEL_COMPILE_WIDTH/2
+      #endif
+      for ( ; n < pad_end; n++)
+        neighptr[n] = e_nall;
+
+      ipage.vgot(n);
+      if (ipage.status())
+        error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
+    }
+  }
+  list->inum = inum_full;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void NPairHalffullNewtonIntel::build(NeighList *list)
+{
+  if (_fix->three_body_neighbor() == 0 || domain->triclinic) {
+    if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
+      build_t(list, _fix->get_mixed_buffers());
+    else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
+      build_t(list, _fix->get_double_buffers());
+    else
+      build_t(list, _fix->get_single_buffers());
+  } else {
+    int *nhalf, *cnum;
+    if (_fix->precision() == FixIntel::PREC_MODE_MIXED) {
+      _fix->get_mixed_buffers()->get_list_data3(list->listfull, nhalf, cnum);
+      build_t3<float>(list, nhalf);
+    } else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
+      _fix->get_double_buffers()->get_list_data3(list->listfull, nhalf, cnum);
+      build_t3<double>(list, nhalf);
+    } else {
+      _fix->get_single_buffers()->get_list_data3(list->listfull, nhalf, cnum);
+      build_t3<float>(list, nhalf);
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
 NPairHalffullTrimNewtonIntel::NPairHalffullTrimNewtonIntel(LAMMPS *lmp) : NPair(lmp) {
   _fix = static_cast<FixIntel *>(modify->get_fix_by_id("package_intel"));
   if (!_fix) error->all(FLERR, "The 'package intel' command is required for /intel styles");
diff --git a/src/INTEL/npair_halffull_intel.h b/src/INTEL/npair_halffull_intel.h
new file mode 100644
index 0000000000..a1f9adbbc4
--- /dev/null
+++ b/src/INTEL/npair_halffull_intel.h
@@ -0,0 +1,128 @@
+// clang-format off
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: W. Michael Brown (Intel)
+------------------------------------------------------------------------- */
+
+// For Newton off, only used for hybrid to generate list for non-intel style.
+// Use standard routines.
+
+#ifdef NPAIR_CLASS
+// clang-format off
+NPairStyle(halffull/newton/intel,
+           NPairHalffullNewtonIntel,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
+           NP_ORTHO | NP_TRI| NP_INTEL);
+
+NPairStyle(halffull/newton/skip/intel,
+           NPairHalffullNewtonIntel,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
+           NP_ORTHO | NP_TRI | NP_SKIP | NP_INTEL);
+
+NPairStyle(halffull/newtoff/intel,
+           NPairHalffullNewtoff,
+           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
+           NP_ORTHO | NP_TRI | NP_INTEL);
+
+NPairStyle(halffull/newtoff/skip/intel,
+           NPairHalffullNewtoff,
+           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
+           NP_ORTHO | NP_TRI | NP_SKIP | NP_INTEL);
+
+NPairStyle(halffull/newtoff/ghost/intel,
+           NPairHalffullNewtoff,
+           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
+           NP_ORTHO | NP_TRI | NP_GHOST | NP_INTEL);
+
+NPairStyle(halffull/newtoff/skip/ghost/intel,
+           NPairHalffullNewtoff,
+           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
+           NP_ORTHO | NP_TRI | NP_SKIP | NP_GHOST | NP_INTEL);
+
+
+NPairStyle(halffull/trim/newton/intel,
+           NPairHalffullTrimNewtonIntel,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
+           NP_ORTHO | NP_TRI| NP_TRIM | NP_INTEL);
+
+NPairStyle(halffull/trim/newton/skip/intel,
+           NPairHalffullTrimNewtonIntel,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
+           NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_INTEL);
+
+NPairStyle(halffull/trim/newtoff/intel,
+           NPairHalffullTrimNewtoff,
+           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
+           NP_ORTHO | NP_TRI | NP_TRIM | NP_INTEL);
+
+NPairStyle(halffull/trim/newtoff/skip/intel,
+           NPairHalffullTrimNewtoff,
+           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
+           NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_INTEL);
+
+NPairStyle(halffull/trim/newtoff/ghost/intel,
+           NPairHalffullTrimNewtoff,
+           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
+           NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM | NP_INTEL);
+
+NPairStyle(halffull/trim/newtoff/skip/ghost/intel,
+           NPairHalffullTrimNewtoff,
+           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
+           NP_ORTHO | NP_TRI | NP_SKIP | NP_GHOST | NP_TRIM | NP_INTEL);
+// clang-format on
+#else
+
+#ifndef LMP_NPAIR_HALFFULL_INTEL_H
+#define LMP_NPAIR_HALFFULL_INTEL_H
+
+#include "fix_intel.h"
+#include "npair.h"
+
+#if defined(_OPENMP)
+#include <omp.h>
+#endif
+
+namespace LAMMPS_NS {
+
+class NPairHalffullNewtonIntel : public NPair {
+ public:
+  NPairHalffullNewtonIntel(class LAMMPS *);
+  void build(class NeighList *) override;
+
+ protected:
+  FixIntel *_fix;
+
+  template <class flt_t, class acc_t> void build_t(NeighList *, IntelBuffers<flt_t, acc_t> *);
+
+  template <class flt_t> void build_t3(NeighList *, int *);
+};
+
+class NPairHalffullTrimNewtonIntel : public NPair {
+ public:
+  NPairHalffullTrimNewtonIntel(class LAMMPS *);
+  void build(class NeighList *) override;
+
+ protected:
+  FixIntel *_fix;
+
+  template <class flt_t, class acc_t> void build_t(NeighList *, IntelBuffers<flt_t, acc_t> *);
+
+  template <class flt_t, class acc_t> void build_t3(NeighList *, int *, IntelBuffers<flt_t, acc_t> *);
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/INTEL/npair_halffull_newtoff_intel.h b/src/INTEL/npair_halffull_newtoff_intel.h
deleted file mode 100644
index f77ddb74d6..0000000000
--- a/src/INTEL/npair_halffull_newtoff_intel.h
+++ /dev/null
@@ -1,44 +0,0 @@
-// clang-format off
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-   Contributing author: W. Michael Brown (Intel)
-------------------------------------------------------------------------- */
-
-// Only used for hybrid to generate list for non-intel style. Use
-// standard routines.
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(halffull/newtoff/intel,
-           NPairHalffullNewtoff,
-           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
-           NP_ORTHO | NP_TRI | NP_INTEL);
-
-NPairStyle(halffull/newtoff/skip/intel,
-           NPairHalffullNewtoff,
-           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
-           NP_ORTHO | NP_TRI | NP_SKIP | NP_INTEL);
-
-NPairStyle(halffull/newtoff/ghost/intel,
-           NPairHalffullNewtoff,
-           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
-           NP_ORTHO | NP_TRI | NP_GHOST | NP_INTEL);
-
-NPairStyle(halffull/newtoff/skip/ghost/intel,
-           NPairHalffullNewtoff,
-           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
-           NP_ORTHO | NP_TRI | NP_SKIP | NP_GHOST | NP_INTEL);
-// clang-format on
-#endif
diff --git a/src/INTEL/npair_halffull_newton_intel.cpp b/src/INTEL/npair_halffull_newton_intel.cpp
deleted file mode 100644
index adcf2527ab..0000000000
--- a/src/INTEL/npair_halffull_newton_intel.cpp
+++ /dev/null
@@ -1,256 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-   Contributing author: W. Michael Brown (Intel)
-------------------------------------------------------------------------- */
-
-#include "npair_halffull_newton_intel.h"
-
-#include "atom.h"
-#include "comm.h"
-#include "domain.h"
-#include "error.h"
-#include "force.h"
-#include "modify.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalffullNewtonIntel::NPairHalffullNewtonIntel(LAMMPS *lmp) : NPair(lmp) {
-  _fix = static_cast<FixIntel *>(modify->get_fix_by_id("package_intel"));
-  if (!_fix) error->all(FLERR, "The 'package intel' command is required for /intel styles");
-}
-
-/* ----------------------------------------------------------------------
-   build half list from full list
-   pair stored once if i,j are both owned and i < j
-   if j is ghost, only store if j coords are "above and to the right" of i
-   works if full list is a skip list
-------------------------------------------------------------------------- */
-
-template <class flt_t, class acc_t>
-void NPairHalffullNewtonIntel::build_t(NeighList *list,
-                                       IntelBuffers<flt_t,acc_t> *buffers)
-{
-  const int inum_full = list->listfull->inum;
-  const int nlocal = atom->nlocal;
-  const int e_nall = nlocal + atom->nghost;
-  const ATOM_T * _noalias const x = buffers->get_x();
-  int * _noalias const ilist = list->ilist;
-  int * _noalias const numneigh = list->numneigh;
-  int ** _noalias const firstneigh = list->firstneigh;
-  const int * _noalias const ilist_full = list->listfull->ilist;
-  const int * _noalias const numneigh_full = list->listfull->numneigh;
-  const int ** _noalias const firstneigh_full = (const int ** const)list->listfull->firstneigh;  // NOLINT
-
-  const double delta = 0.01 * force->angstrom;
-  const int triclinic = domain->triclinic;
-
-  #if defined(_OPENMP)
-  #pragma omp parallel
-  #endif
-  {
-    int tid, ifrom, ito;
-    IP_PRE_omp_range_id(ifrom, ito, tid, inum_full, comm->nthreads);
-
-    // each thread has its own page allocator
-    MyPage<int> &ipage = list->ipage[tid];
-    ipage.reset();
-
-    // loop over parent full list
-    for (int ii = ifrom; ii < ito; ii++) {
-      int n = 0;
-      int *neighptr = ipage.vget();
-
-      const int i = ilist_full[ii];
-      const flt_t xtmp = x[i].x;
-      const flt_t ytmp = x[i].y;
-      const flt_t ztmp = x[i].z;
-
-      // loop over full neighbor list
-
-      const int * _noalias const jlist = firstneigh_full[i];
-      const int jnum = numneigh_full[i];
-
-      if (!triclinic) {
-        #if defined(LMP_SIMD_COMPILER)
-        #pragma vector aligned
-        #pragma ivdep
-        #endif
-        for (int jj = 0; jj < jnum; jj++) {
-          const int joriginal = jlist[jj];
-          const int j = joriginal & NEIGHMASK;
-          int addme = 1;
-          if (j < nlocal) {
-            if (i > j) addme = 0;
-          } else {
-            if (x[j].z < ztmp) addme = 0;
-            if (x[j].z == ztmp) {
-              if (x[j].y < ytmp) addme = 0;
-              if (x[j].y == ytmp && x[j].x < xtmp) addme = 0;
-            }
-          }
-          if (addme)
-            neighptr[n++] = joriginal;
-        }
-      } else {
-        #if defined(LMP_SIMD_COMPILER)
-        #pragma vector aligned
-        #pragma ivdep
-        #endif
-        for (int jj = 0; jj < jnum; jj++) {
-          const int joriginal = jlist[jj];
-          const int j = joriginal & NEIGHMASK;
-          int addme = 1;
-          if (j < nlocal) {
-            if (i > j) addme = 0;
-          } else {
-            if (fabs(x[j].z-ztmp) > delta) {
-              if (x[j].z < ztmp) addme = 0;
-            } else if (fabs(x[j].y-ytmp) > delta) {
-              if (x[j].y < ytmp) addme = 0;
-            } else {
-              if (x[j].x < xtmp) addme = 0;
-            }
-          }
-          if (addme)
-            neighptr[n++] = joriginal;
-        }
-      }
-
-      ilist[ii] = i;
-      firstneigh[i] = neighptr;
-      numneigh[i] = n;
-
-      int pad_end = n;
-      IP_PRE_neighbor_pad(pad_end, 0);
-      #if defined(LMP_SIMD_COMPILER)
-      #pragma vector aligned
-      #pragma loop_count min=1, max=INTEL_COMPILE_WIDTH-1, \
-              avg=INTEL_COMPILE_WIDTH/2
-      #endif
-      for ( ; n < pad_end; n++)
-        neighptr[n] = e_nall;
-
-      ipage.vgot(n);
-      if (ipage.status())
-        error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-    }
-  }
-  list->inum = inum_full;
-}
-
-/* ----------------------------------------------------------------------
-   build half list from full 3-body list
-   half list is already stored as first part of 3-body list
-------------------------------------------------------------------------- */
-
-template <class flt_t>
-void NPairHalffullNewtonIntel::build_t3(NeighList *list, int *numhalf)
-{
-  const int inum_full = list->listfull->inum;
-  const int e_nall = atom->nlocal + atom->nghost;
-  int * _noalias const ilist = list->ilist;
-  int * _noalias const numneigh = list->numneigh;
-  int ** _noalias const firstneigh = list->firstneigh;
-  const int * _noalias const ilist_full = list->listfull->ilist;
-  const int * _noalias const numneigh_full = numhalf;
-  const int ** _noalias const firstneigh_full = (const int ** const)list->listfull->firstneigh;  // NOLINT
-
-  int packthreads = 1;
-  if (comm->nthreads > INTEL_HTHREADS) packthreads = comm->nthreads;
-
-  #if defined(_OPENMP)
-  #pragma omp parallel if (packthreads > 1)
-  #endif
-  {
-    int tid, ifrom, ito;
-    IP_PRE_omp_range_id(ifrom, ito, tid, inum_full, packthreads);
-
-    // each thread has its own page allocator
-    MyPage<int> &ipage = list->ipage[tid];
-    ipage.reset();
-
-    // loop over parent full list
-    for (int ii = ifrom; ii < ito; ii++) {
-      int n = 0;
-      int *neighptr = ipage.vget();
-
-      const int i = ilist_full[ii];
-
-      // loop over full neighbor list
-
-      const int * _noalias const jlist = firstneigh_full[i];
-      const int jnum = numneigh_full[ii];
-
-      #if defined(LMP_SIMD_COMPILER)
-      #pragma vector aligned
-      #pragma ivdep
-      #endif
-      for (int jj = 0; jj < jnum; jj++) {
-        const int joriginal = jlist[jj];
-        neighptr[n++] = joriginal;
-      }
-
-      ilist[ii] = i;
-      firstneigh[i] = neighptr;
-      numneigh[i] = n;
-
-      int pad_end = n;
-      IP_PRE_neighbor_pad(pad_end, 0);
-      #if defined(LMP_SIMD_COMPILER)
-      #pragma vector aligned
-      #pragma loop_count min=1, max=INTEL_COMPILE_WIDTH-1, \
-              avg=INTEL_COMPILE_WIDTH/2
-      #endif
-      for ( ; n < pad_end; n++)
-        neighptr[n] = e_nall;
-
-      ipage.vgot(n);
-      if (ipage.status())
-        error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-    }
-  }
-  list->inum = inum_full;
-}
-
-/* ---------------------------------------------------------------------- */
-
-void NPairHalffullNewtonIntel::build(NeighList *list)
-{
-  if (_fix->three_body_neighbor() == 0 || domain->triclinic) {
-    if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
-      build_t(list, _fix->get_mixed_buffers());
-    else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
-      build_t(list, _fix->get_double_buffers());
-    else
-      build_t(list, _fix->get_single_buffers());
-  } else {
-    int *nhalf, *cnum;
-    if (_fix->precision() == FixIntel::PREC_MODE_MIXED) {
-      _fix->get_mixed_buffers()->get_list_data3(list->listfull, nhalf, cnum);
-      build_t3<float>(list, nhalf);
-    } else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
-      _fix->get_double_buffers()->get_list_data3(list->listfull, nhalf, cnum);
-      build_t3<double>(list, nhalf);
-    } else {
-      _fix->get_single_buffers()->get_list_data3(list->listfull, nhalf, cnum);
-      build_t3<float>(list, nhalf);
-    }
-  }
-}
diff --git a/src/INTEL/npair_halffull_newton_intel.h b/src/INTEL/npair_halffull_newton_intel.h
deleted file mode 100644
index 149983d08e..0000000000
--- a/src/INTEL/npair_halffull_newton_intel.h
+++ /dev/null
@@ -1,61 +0,0 @@
-// clang-format off
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-   Contributing author: W. Michael Brown (Intel)
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(halffull/newton/intel,
-           NPairHalffullNewtonIntel,
-           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
-           NP_ORTHO | NP_TRI| NP_INTEL);
-
-NPairStyle(halffull/newton/skip/intel,
-           NPairHalffullNewtonIntel,
-           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
-           NP_ORTHO | NP_TRI | NP_SKIP | NP_INTEL);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALFFULL_NEWTON_INTEL_H
-#define LMP_NPAIR_HALFFULL_NEWTON_INTEL_H
-
-#include "fix_intel.h"
-#include "npair.h"
-
-#if defined(_OPENMP)
-#include <omp.h>
-#endif
-
-namespace LAMMPS_NS {
-
-class NPairHalffullNewtonIntel : public NPair {
- public:
-  NPairHalffullNewtonIntel(class LAMMPS *);
-  void build(class NeighList *) override;
-
- protected:
-  FixIntel *_fix;
-
-  template <class flt_t, class acc_t> void build_t(NeighList *, IntelBuffers<flt_t, acc_t> *);
-
-  template <class flt_t> void build_t3(NeighList *, int *);
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/INTEL/npair_halffull_trim_newtoff_intel.h b/src/INTEL/npair_halffull_trim_newtoff_intel.h
deleted file mode 100644
index 5e8b01cd09..0000000000
--- a/src/INTEL/npair_halffull_trim_newtoff_intel.h
+++ /dev/null
@@ -1,44 +0,0 @@
-// clang-format off
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-   Contributing author: Stan Moore (SNL)
-------------------------------------------------------------------------- */
-
-// Only used for hybrid to generate list for non-intel style. Use
-// standard routines.
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(halffull/trim/newtoff/intel,
-           NPairHalffullTrimNewtoff,
-           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
-           NP_ORTHO | NP_TRI | NP_TRIM | NP_INTEL);
-
-NPairStyle(halffull/trim/newtoff/skip/intel,
-           NPairHalffullTrimNewtoff,
-           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
-           NP_ORTHO | NP_TRI | NP_TRIM | NP_SKIP | NP_INTEL);
-
-NPairStyle(halffull/trim/newtoff/ghost/intel,
-           NPairHalffullTrimNewtoff,
-           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
-           NP_ORTHO | NP_TRI | NP_TRIM | NP_GHOST | NP_INTEL);
-
-NPairStyle(halffull/trim/newtoff/skip/ghost/intel,
-           NPairHalffullTrimNewtoff,
-           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
-           NP_ORTHO | NP_TRI | NP_TRIM | NP_SKIP | NP_GHOST | NP_INTEL);
-// clang-format on
-#endif
diff --git a/src/INTEL/npair_halffull_trim_newton_intel.h b/src/INTEL/npair_halffull_trim_newton_intel.h
deleted file mode 100644
index dfce63e93d..0000000000
--- a/src/INTEL/npair_halffull_trim_newton_intel.h
+++ /dev/null
@@ -1,61 +0,0 @@
-// clang-format off
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-   Contributing author: Stan Moore (SNL)
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(halffull/trim/newton/intel,
-           NPairHalffullTrimNewtonIntel,
-           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
-           NP_ORTHO | NP_TRI| NP_TRIM | NP_INTEL);
-
-NPairStyle(halffull/trim/newton/skip/intel,
-           NPairHalffullTrimNewtonIntel,
-           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
-           NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_INTEL);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALFFULL_TRIM_NEWTON_INTEL_H
-#define LMP_NPAIR_HALFFULL_TRIM_NEWTON_INTEL_H
-
-#include "fix_intel.h"
-#include "npair.h"
-
-#if defined(_OPENMP)
-#include <omp.h>
-#endif
-
-namespace LAMMPS_NS {
-
-class NPairHalffullTrimNewtonIntel : public NPair {
- public:
-  NPairHalffullTrimNewtonIntel(class LAMMPS *);
-  void build(class NeighList *) override;
-
- protected:
-  FixIntel *_fix;
-
-  template <class flt_t, class acc_t> void build_t(NeighList *, IntelBuffers<flt_t, acc_t> *);
-
-  template <class flt_t, class acc_t> void build_t3(NeighList *, int *, IntelBuffers<flt_t, acc_t> *);
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/INTEL/npair_skip_intel.cpp b/src/INTEL/npair_skip_intel.cpp
index b023955dd9..8840f7ee43 100644
--- a/src/INTEL/npair_skip_intel.cpp
+++ b/src/INTEL/npair_skip_intel.cpp
@@ -13,7 +13,7 @@
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
-   Contributing author: W. Michael Brown (Intel)
+   Contributing author: W. Michael Brown (Intel), Stan Moore (SNL)
 ------------------------------------------------------------------------- */
 
 #include "npair_skip_intel.h"
@@ -224,3 +224,244 @@ void NPairSkipIntel::build(NeighList *list)
     }
   }
 }
+
+/* ---------------------------------------------------------------------- */
+
+NPairSkipTrimIntel::NPairSkipTrimIntel(LAMMPS *lmp) : NPair(lmp) {
+  _fix = static_cast<FixIntel *>(modify->get_fix_by_id("package_intel"));
+  if (!_fix) error->all(FLERR, "The 'package intel' command is required for /intel styles");
+  _inum_starts = new int[comm->nthreads];
+  _inum_counts = new int[comm->nthreads];
+  _full_props = nullptr;
+}
+
+/* ---------------------------------------------------------------------- */
+
+NPairSkipTrimIntel::~NPairSkipTrimIntel() {
+  delete []_inum_starts;
+  delete []_inum_counts;
+  delete[] _full_props;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void NPairSkipTrimIntel::copy_neighbor_info()
+{
+  NPair::copy_neighbor_info();
+  // Only need to set _full_props once; npair object deleted for changes
+  if (_full_props) return;
+  _full_props = new int[neighbor->nrequest];
+  for (int i = 0; i < neighbor->nrequest; i++)
+    _full_props[i] = neighbor->requests[i]->full;
+}
+
+/* ----------------------------------------------------------------------
+   build skip list for subset of types from parent list
+   works for half and full lists
+   works for owned (non-ghost) list, also for ghost list
+   iskip and ijskip flag which atom types and type pairs to skip
+   if ghost, also store neighbors of ghost atoms & set inum,gnum correctly
+------------------------------------------------------------------------- */
+
+template<class flt_t, class acc_t, int THREE>
+void NPairSkipTrimIntel::build_t(NeighList *list, int *numhalf, int *cnumneigh,
+                             int *numhalf_skip, IntelBuffers<flt_t,acc_t> *buffers)
+{
+  const int nlocal = atom->nlocal;
+  const int e_nall = nlocal + atom->nghost;
+  const ATOM_T * _noalias const x = buffers->get_x();
+  const int * _noalias const type = atom->type;
+  int * _noalias const ilist = list->ilist;
+  int * _noalias const numneigh = list->numneigh;
+  int ** _noalias const firstneigh = (int ** const)list->firstneigh;  // NOLINT
+  const int * _noalias const ilist_skip = list->listskip->ilist;
+  const int * _noalias const numneigh_skip = list->listskip->numneigh;
+  const int ** _noalias const firstneigh_skip = (const int ** const)list->listskip->firstneigh;  // NOLINT
+  const int * _noalias const iskip = list->iskip;
+  const int **  _noalias const ijskip = (const int ** const)list->ijskip;  // NOLINT
+
+  const flt_t cutsq_custom = cutoff_custom * cutoff_custom;
+  int num_skip = list->listskip->inum;
+  if (list->ghost) num_skip += list->listskip->gnum;
+
+  int packthreads;
+  if (comm->nthreads > INTEL_HTHREADS && THREE==0)
+    packthreads = comm->nthreads;
+  else
+    packthreads = 1;
+
+  #if defined(_OPENMP)
+  #pragma omp parallel if (packthreads > 1)
+  #endif
+  {
+    int tid, ifrom, ito;
+    IP_PRE_omp_range_id(ifrom, ito, tid, num_skip, packthreads);
+
+    // each thread has its own page allocator
+    MyPage<int> &ipage = list->ipage[tid];
+    ipage.reset();
+
+    int my_inum = ifrom;
+    _inum_starts[tid] = ifrom;
+
+    // loop over parent full list
+    for (int ii = ifrom; ii < ito; ii++) {
+      const int i = ilist_skip[ii];
+      const int itype = type[i];
+      if (iskip[itype]) continue;
+
+      const flt_t xtmp = x[i].x;
+      const flt_t ytmp = x[i].y;
+      const flt_t ztmp = x[i].z;
+
+      int n = 0;
+      int *neighptr = ipage.vget();
+
+      // loop over parent non-skip list
+
+      const int * _noalias const jlist = firstneigh_skip[i];
+      const int jnum = numneigh_skip[i];
+
+      if (THREE) {
+        const int jnumhalf = numhalf_skip[ii];
+        for (int jj = 0; jj < jnumhalf; jj++) {
+          const int joriginal = jlist[jj];
+          const int j = joriginal & NEIGHMASK;
+
+          int addme = 1;
+          if (ijskip[itype][type[j]]) addme = 0;
+
+          // trim to shorter cutoff
+
+          const flt_t delx = xtmp - x[j].x;
+          const flt_t dely = ytmp - x[j].y;
+          const flt_t delz = ztmp - x[j].z;
+          const flt_t rsq = delx * delx + dely * dely + delz * delz;
+          if (rsq > cutsq_custom) addme = 0;
+
+          if (addme)
+            neighptr[n++] = joriginal;
+        }
+        numhalf[my_inum] = n;
+
+        for (int jj = jnumhalf; jj < jnum; jj++) {
+          const int joriginal = jlist[jj];
+          const int j = joriginal & NEIGHMASK;
+
+          int addme = 1;
+          if (ijskip[itype][type[j]]) addme = 0;
+
+          // trim to shorter cutoff
+
+          const flt_t delx = xtmp - x[j].x;
+          const flt_t dely = ytmp - x[j].y;
+          const flt_t delz = ztmp - x[j].z;
+          const flt_t rsq = delx * delx + dely * dely + delz * delz;
+          if (rsq > cutsq_custom) addme = 0;
+
+          if (addme)
+            neighptr[n++] = joriginal;
+        }
+      } else {
+        #if defined(LMP_SIMD_COMPILER)
+        #pragma vector aligned
+        #pragma ivdep
+        #endif
+        for (int jj = 0; jj < jnum; jj++) {
+          const int joriginal = jlist[jj];
+          const int j = joriginal & NEIGHMASK;
+
+          int addme = 1;
+          if (ijskip[itype][type[j]]) addme = 0;
+
+          // trim to shorter cutoff
+
+          const flt_t delx = xtmp - x[j].x;
+          const flt_t dely = ytmp - x[j].y;
+          const flt_t delz = ztmp - x[j].z;
+          const flt_t rsq = delx * delx + dely * dely + delz * delz;
+          if (rsq > cutsq_custom) addme = 0;
+
+          if (addme)
+            neighptr[n++] = joriginal;
+        }
+      }
+
+      ilist[my_inum++] = i;
+      firstneigh[i] = neighptr;
+      numneigh[i] = n;
+
+      int pad_end = n;
+      IP_PRE_neighbor_pad(pad_end, 0);
+      #if defined(LMP_SIMD_COMPILER)
+      #pragma vector aligned
+      #pragma loop_count min=1, max=INTEL_COMPILE_WIDTH-1, \
+              avg=INTEL_COMPILE_WIDTH/2
+      #endif
+      for ( ; n < pad_end; n++)
+        neighptr[n] = e_nall;
+
+      ipage.vgot(n);
+      if (ipage.status())
+        error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
+    }
+
+    int last_inum = 0, loop_end;
+    _inum_counts[tid] = my_inum;
+  }
+  int inum = _inum_counts[0];
+  for (int tid = 1; tid < packthreads; tid++) {
+    for (int i = _inum_starts[tid]; i < _inum_counts[tid]; i++) {
+      if (THREE) numhalf[inum] = numhalf[i];
+      ilist[inum++] = ilist[i];
+    }
+  }
+  list->inum = inum;
+
+  if (THREE && num_skip > 0) {
+    int * const list_start = firstneigh[ilist[0]];
+    for (int ii = 0; ii < inum; ii++) {
+      int i = ilist[ii];
+      cnumneigh[ii] = static_cast<int>(firstneigh[i] - list_start);
+    }
+  }
+  if (list->ghost) {
+    int num = 0;
+    int my_inum = list->inum;
+    for (int i = 0; i < my_inum; i++)
+      if (ilist[i] < nlocal) num++;
+      else break;
+    list->inum = num;
+    list->gnum = my_inum - num;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void NPairSkipTrimIntel::build(NeighList *list)
+{
+  if (_fix->three_body_neighbor()==0 ||
+      _full_props[list->listskip->index] == 0) {
+    if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
+      build_t<float,double,0>(list, nullptr, nullptr, nullptr, _fix->get_mixed_buffers());
+    else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
+      build_t<double,double,0>(list, nullptr, nullptr, nullptr, _fix->get_double_buffers());
+    else
+      build_t<float,float,0>(list, nullptr, nullptr, nullptr, _fix->get_single_buffers());
+  } else {
+    int *nhalf, *cnumneigh, *nhalf_skip, *u;
+    if (_fix->precision() == FixIntel::PREC_MODE_MIXED) {
+      _fix->get_mixed_buffers()->get_list_data3(list->listskip,nhalf_skip,u);
+      _fix->get_mixed_buffers()->grow_data3(list, nhalf, cnumneigh);
+      build_t<float,double,1>(list, nhalf, cnumneigh, nhalf_skip, _fix->get_mixed_buffers());
+    } else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
+      _fix->get_double_buffers()->get_list_data3(list->listskip,nhalf_skip,u);
+      _fix->get_double_buffers()->grow_data3(list, nhalf, cnumneigh);
+      build_t<double,double,1>(list, nhalf, cnumneigh, nhalf_skip, _fix->get_double_buffers());
+    } else {
+      _fix->get_single_buffers()->get_list_data3(list->listskip,nhalf_skip,u);
+      _fix->get_single_buffers()->grow_data3(list,nhalf,cnumneigh);
+      build_t<float,float,1>(list, nhalf, cnumneigh, nhalf_skip, _fix->get_single_buffers());
+    }
+  }
+}
diff --git a/src/INTEL/npair_skip_intel.h b/src/INTEL/npair_skip_intel.h
index d38173895a..993f64bf96 100644
--- a/src/INTEL/npair_skip_intel.h
+++ b/src/INTEL/npair_skip_intel.h
@@ -25,6 +25,18 @@ NPairStyle(skip/ghost/intel,
            NP_SKIP | NP_HALF | NP_FULL |
            NP_NSQ | NP_BIN | NP_MULTI |
            NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_GHOST | NP_INTEL);
+
+NPairStyle(skip/trim/intel,
+           NPairSkipTrimIntel,
+           NP_SKIP | NP_HALF | NP_FULL |
+           NP_NSQ | NP_BIN | NP_MULTI |
+           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_INTEL);
+
+NPairStyle(skip/trim/ghost/intel,
+           NPairSkipTrimIntel,
+           NP_SKIP | NP_HALF | NP_FULL |
+           NP_NSQ | NP_BIN | NP_MULTI |
+           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_GHOST | NP_INTEL);
 // clang-format on
 #else
 
@@ -55,6 +67,22 @@ class NPairSkipIntel : public NPair {
   void build_t(NeighList *, int *numhalf, int *cnumneigh, int *numhalf_skip);
 };
 
+class NPairSkipTrimIntel : public NPair {
+ public:
+  NPairSkipTrimIntel(class LAMMPS *);
+  ~NPairSkipTrimIntel() override;
+  void copy_neighbor_info() override;
+  void build(class NeighList *) override;
+
+ protected:
+  FixIntel *_fix;
+  int *_inum_starts, *_inum_counts, *_full_props;
+
+  template <class flt_t, class acc_t, int THREE>
+  void build_t(NeighList *, int *numhalf, int *cnumneigh, int *numhalf_skip,
+               IntelBuffers<flt_t, acc_t> *);
+};
+
 }    // namespace LAMMPS_NS
 
 #endif
diff --git a/src/INTEL/npair_skip_trim_intel.cpp b/src/INTEL/npair_skip_trim_intel.cpp
deleted file mode 100644
index e16e1bc413..0000000000
--- a/src/INTEL/npair_skip_trim_intel.cpp
+++ /dev/null
@@ -1,271 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-   Contributing author: Stan Moore (SNL)
-------------------------------------------------------------------------- */
-
-#include "npair_skip_trim_intel.h"
-
-#include "atom.h"
-#include "comm.h"
-#include "error.h"
-#include "modify.h"
-#include "my_page.h"
-#include "neigh_list.h"
-#include "neigh_request.h"
-#include "neighbor.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairSkipTrimIntel::NPairSkipTrimIntel(LAMMPS *lmp) : NPair(lmp) {
-  _fix = static_cast<FixIntel *>(modify->get_fix_by_id("package_intel"));
-  if (!_fix) error->all(FLERR, "The 'package intel' command is required for /intel styles");
-  _inum_starts = new int[comm->nthreads];
-  _inum_counts = new int[comm->nthreads];
-  _full_props = nullptr;
-}
-
-/* ---------------------------------------------------------------------- */
-
-NPairSkipTrimIntel::~NPairSkipTrimIntel() {
-  delete []_inum_starts;
-  delete []_inum_counts;
-  delete[] _full_props;
-}
-
-/* ---------------------------------------------------------------------- */
-
-void NPairSkipTrimIntel::copy_neighbor_info()
-{
-  NPair::copy_neighbor_info();
-  // Only need to set _full_props once; npair object deleted for changes
-  if (_full_props) return;
-  _full_props = new int[neighbor->nrequest];
-  for (int i = 0; i < neighbor->nrequest; i++)
-    _full_props[i] = neighbor->requests[i]->full;
-}
-
-/* ----------------------------------------------------------------------
-   build skip list for subset of types from parent list
-   works for half and full lists
-   works for owned (non-ghost) list, also for ghost list
-   iskip and ijskip flag which atom types and type pairs to skip
-   if ghost, also store neighbors of ghost atoms & set inum,gnum correctly
-------------------------------------------------------------------------- */
-
-template<class flt_t, class acc_t, int THREE>
-void NPairSkipTrimIntel::build_t(NeighList *list, int *numhalf, int *cnumneigh,
-                             int *numhalf_skip, IntelBuffers<flt_t,acc_t> *buffers)
-{
-  const int nlocal = atom->nlocal;
-  const int e_nall = nlocal + atom->nghost;
-  const ATOM_T * _noalias const x = buffers->get_x();
-  const int * _noalias const type = atom->type;
-  int * _noalias const ilist = list->ilist;
-  int * _noalias const numneigh = list->numneigh;
-  int ** _noalias const firstneigh = (int ** const)list->firstneigh;  // NOLINT
-  const int * _noalias const ilist_skip = list->listskip->ilist;
-  const int * _noalias const numneigh_skip = list->listskip->numneigh;
-  const int ** _noalias const firstneigh_skip = (const int ** const)list->listskip->firstneigh;  // NOLINT
-  const int * _noalias const iskip = list->iskip;
-  const int **  _noalias const ijskip = (const int ** const)list->ijskip;  // NOLINT
-
-  const flt_t cutsq_custom = cutoff_custom * cutoff_custom;
-  int num_skip = list->listskip->inum;
-  if (list->ghost) num_skip += list->listskip->gnum;
-
-  int packthreads;
-  if (comm->nthreads > INTEL_HTHREADS && THREE==0)
-    packthreads = comm->nthreads;
-  else
-    packthreads = 1;
-
-  #if defined(_OPENMP)
-  #pragma omp parallel if (packthreads > 1)
-  #endif
-  {
-    int tid, ifrom, ito;
-    IP_PRE_omp_range_id(ifrom, ito, tid, num_skip, packthreads);
-
-    // each thread has its own page allocator
-    MyPage<int> &ipage = list->ipage[tid];
-    ipage.reset();
-
-    int my_inum = ifrom;
-    _inum_starts[tid] = ifrom;
-
-    // loop over parent full list
-    for (int ii = ifrom; ii < ito; ii++) {
-      const int i = ilist_skip[ii];
-      const int itype = type[i];
-      if (iskip[itype]) continue;
-
-      const flt_t xtmp = x[i].x;
-      const flt_t ytmp = x[i].y;
-      const flt_t ztmp = x[i].z;
-
-      int n = 0;
-      int *neighptr = ipage.vget();
-
-      // loop over parent non-skip list
-
-      const int * _noalias const jlist = firstneigh_skip[i];
-      const int jnum = numneigh_skip[i];
-
-      if (THREE) {
-        const int jnumhalf = numhalf_skip[ii];
-        for (int jj = 0; jj < jnumhalf; jj++) {
-          const int joriginal = jlist[jj];
-          const int j = joriginal & NEIGHMASK;
-
-          int addme = 1;
-          if (ijskip[itype][type[j]]) addme = 0;
-
-          // trim to shorter cutoff
-
-          const flt_t delx = xtmp - x[j].x;
-          const flt_t dely = ytmp - x[j].y;
-          const flt_t delz = ztmp - x[j].z;
-          const flt_t rsq = delx * delx + dely * dely + delz * delz;
-          if (rsq > cutsq_custom) addme = 0;
-
-          if (addme)
-            neighptr[n++] = joriginal;
-        }
-        numhalf[my_inum] = n;
-
-        for (int jj = jnumhalf; jj < jnum; jj++) {
-          const int joriginal = jlist[jj];
-          const int j = joriginal & NEIGHMASK;
-
-          int addme = 1;
-          if (ijskip[itype][type[j]]) addme = 0;
-
-          // trim to shorter cutoff
-
-          const flt_t delx = xtmp - x[j].x;
-          const flt_t dely = ytmp - x[j].y;
-          const flt_t delz = ztmp - x[j].z;
-          const flt_t rsq = delx * delx + dely * dely + delz * delz;
-          if (rsq > cutsq_custom) addme = 0;
-
-          if (addme)
-            neighptr[n++] = joriginal;
-        }
-      } else {
-        #if defined(LMP_SIMD_COMPILER)
-        #pragma vector aligned
-        #pragma ivdep
-        #endif
-        for (int jj = 0; jj < jnum; jj++) {
-          const int joriginal = jlist[jj];
-          const int j = joriginal & NEIGHMASK;
-
-          int addme = 1;
-          if (ijskip[itype][type[j]]) addme = 0;
-
-          // trim to shorter cutoff
-
-          const flt_t delx = xtmp - x[j].x;
-          const flt_t dely = ytmp - x[j].y;
-          const flt_t delz = ztmp - x[j].z;
-          const flt_t rsq = delx * delx + dely * dely + delz * delz;
-          if (rsq > cutsq_custom) addme = 0;
-
-          if (addme)
-            neighptr[n++] = joriginal;
-        }
-      }
-
-      ilist[my_inum++] = i;
-      firstneigh[i] = neighptr;
-      numneigh[i] = n;
-
-      int pad_end = n;
-      IP_PRE_neighbor_pad(pad_end, 0);
-      #if defined(LMP_SIMD_COMPILER)
-      #pragma vector aligned
-      #pragma loop_count min=1, max=INTEL_COMPILE_WIDTH-1, \
-              avg=INTEL_COMPILE_WIDTH/2
-      #endif
-      for ( ; n < pad_end; n++)
-        neighptr[n] = e_nall;
-
-      ipage.vgot(n);
-      if (ipage.status())
-        error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-    }
-
-    int last_inum = 0, loop_end;
-    _inum_counts[tid] = my_inum;
-  }
-  int inum = _inum_counts[0];
-  for (int tid = 1; tid < packthreads; tid++) {
-    for (int i = _inum_starts[tid]; i < _inum_counts[tid]; i++) {
-      if (THREE) numhalf[inum] = numhalf[i];
-      ilist[inum++] = ilist[i];
-    }
-  }
-  list->inum = inum;
-
-  if (THREE && num_skip > 0) {
-    int * const list_start = firstneigh[ilist[0]];
-    for (int ii = 0; ii < inum; ii++) {
-      int i = ilist[ii];
-      cnumneigh[ii] = static_cast<int>(firstneigh[i] - list_start);
-    }
-  }
-  if (list->ghost) {
-    int num = 0;
-    int my_inum = list->inum;
-    for (int i = 0; i < my_inum; i++)
-      if (ilist[i] < nlocal) num++;
-      else break;
-    list->inum = num;
-    list->gnum = my_inum - num;
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-
-void NPairSkipTrimIntel::build(NeighList *list)
-{
-  if (_fix->three_body_neighbor()==0 ||
-      _full_props[list->listskip->index] == 0) {
-    if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
-      build_t<float,double,0>(list, nullptr, nullptr, nullptr, _fix->get_mixed_buffers());
-    else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
-      build_t<double,double,0>(list, nullptr, nullptr, nullptr, _fix->get_double_buffers());
-    else
-      build_t<float,float,0>(list, nullptr, nullptr, nullptr, _fix->get_single_buffers());
-  } else {
-    int *nhalf, *cnumneigh, *nhalf_skip, *u;
-    if (_fix->precision() == FixIntel::PREC_MODE_MIXED) {
-      _fix->get_mixed_buffers()->get_list_data3(list->listskip,nhalf_skip,u);
-      _fix->get_mixed_buffers()->grow_data3(list, nhalf, cnumneigh);
-      build_t<float,double,1>(list, nhalf, cnumneigh, nhalf_skip, _fix->get_mixed_buffers());
-    } else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
-      _fix->get_double_buffers()->get_list_data3(list->listskip,nhalf_skip,u);
-      _fix->get_double_buffers()->grow_data3(list, nhalf, cnumneigh);
-      build_t<double,double,1>(list, nhalf, cnumneigh, nhalf_skip, _fix->get_double_buffers());
-    } else {
-      _fix->get_single_buffers()->get_list_data3(list->listskip,nhalf_skip,u);
-      _fix->get_single_buffers()->grow_data3(list,nhalf,cnumneigh);
-      build_t<float,float,1>(list, nhalf, cnumneigh, nhalf_skip, _fix->get_single_buffers());
-    }
-  }
-}
diff --git a/src/INTEL/npair_skip_trim_intel.h b/src/INTEL/npair_skip_trim_intel.h
deleted file mode 100644
index f0018e5df4..0000000000
--- a/src/INTEL/npair_skip_trim_intel.h
+++ /dev/null
@@ -1,62 +0,0 @@
-// clang-format off
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(skip/trim/intel,
-           NPairSkipTrimIntel,
-           NP_SKIP | NP_HALF | NP_FULL |
-           NP_NSQ | NP_BIN | NP_MULTI |
-           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_INTEL);
-
-NPairStyle(skip/trim/ghost/intel,
-           NPairSkipTrimIntel,
-           NP_SKIP | NP_HALF | NP_FULL |
-           NP_NSQ | NP_BIN | NP_MULTI |
-           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_GHOST | NP_INTEL);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_SKIP_TRIM_INTEL_H
-#define LMP_NPAIR_SKIP_TRIM_INTEL_H
-
-#include "fix_intel.h"
-#include "npair.h"
-
-#if defined(_OPENMP)
-#include <omp.h>
-#endif
-
-namespace LAMMPS_NS {
-
-class NPairSkipTrimIntel : public NPair {
- public:
-  NPairSkipTrimIntel(class LAMMPS *);
-  ~NPairSkipTrimIntel() override;
-  void copy_neighbor_info() override;
-  void build(class NeighList *) override;
-
- protected:
-  FixIntel *_fix;
-  int *_inum_starts, *_inum_counts, *_full_props;
-
-  template <class flt_t, class acc_t, int THREE>
-  void build_t(NeighList *, int *numhalf, int *cnumneigh, int *numhalf_skip,
-               IntelBuffers<flt_t, acc_t> *);
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/INTEL/nstencil_bin_intel.cpp b/src/INTEL/nstencil_bin_intel.cpp
new file mode 100644
index 0000000000..426fcec33c
--- /dev/null
+++ b/src/INTEL/nstencil_bin_intel.cpp
@@ -0,0 +1,70 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "nstencil_bin_intel.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+template<int HALF, int DIM_3D, int TRI>
+NStencilBinIntel<HALF, DIM_3D, TRI>::NStencilBinIntel(LAMMPS *lmp) : NStencil(lmp) {}
+
+/* ----------------------------------------------------------------------
+   create stencil based on bin geometry and cutoff
+------------------------------------------------------------------------- */
+
+template<int HALF, int DIM_3D, int TRI>
+void NStencilBinIntel<HALF, DIM_3D, TRI>::create()
+{
+  int i, j, k;
+
+  // For half stencils, only the upper plane is needed
+  int sy_min = sy;
+  int sz_min = sz;
+  if ((!TRI) && HALF && (!DIM_3D)) sy_min = 0;
+  if ((!TRI) && HALF && DIM_3D) sz_min = 0;
+
+  nstencil = 0;
+
+  // For Intel, half and ortho stencils do not include central bin
+  // as, historically, this was never included in a stencil.
+  // Non-Intel npair classes were updated to account for this change,
+  // but the Intel npair classes have not yet been updated
+  // if (HALF && (!TRI)) stencil[nstencil++] = 0;
+
+  for (k = -sz_min; k <= sz; k++) {
+    for (j = -sy_min; j <= sy; j++) {
+      for (i = -sx; i <= sx; i++) {
+
+        // Now only include "upper right" bins for half and ortho stencils
+        if (HALF && (!DIM_3D) && (!TRI))
+          if (! (j > 0 || (j == 0 && i > 0))) continue;
+        if (HALF && DIM_3D && (!TRI))
+          if (! (k > 0 || j > 0 || (j == 0 && i > 0))) continue;
+
+        if (bin_distance(i, j, k) < cutneighmaxsq)
+          stencil[nstencil++] = k * mbiny * mbinx + j * mbinx + i;
+      }
+    }
+  }
+}
+
+namespace LAMMPS_NS {
+template class NStencilBinIntel<0,0,0>;
+template class NStencilBinIntel<0,1,0>;
+template class NStencilBinIntel<1,0,0>;
+template class NStencilBinIntel<1,0,1>;
+template class NStencilBinIntel<1,1,0>;
+template class NStencilBinIntel<1,1,1>;
+}
diff --git a/src/INTEL/nstencil_bin_intel.h b/src/INTEL/nstencil_bin_intel.h
new file mode 100644
index 0000000000..e377db5fe5
--- /dev/null
+++ b/src/INTEL/nstencil_bin_intel.h
@@ -0,0 +1,65 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef NSTENCIL_CLASS
+// clang-format off
+typedef NStencilBinIntel<0, 0, 0> NStencilFullBin2dIntel;
+NStencilStyle(full/bin/2d/intel,
+              NStencilFullBin2dIntel,
+              NS_FULL | NS_BIN | NS_2D | NS_ORTHO | NS_TRI | NS_INTEL);
+
+typedef NStencilBinIntel<0, 1, 0> NStencilFullBin3dIntel;
+NStencilStyle(full/bin/3d/intel,
+              NStencilFullBin3dIntel,
+              NS_FULL | NS_BIN | NS_3D | NS_ORTHO | NS_TRI | NS_INTEL);
+
+typedef NStencilBinIntel<1, 0, 0> NStencilHalfBin2dIntel;
+NStencilStyle(half/bin/2d/intel,
+              NStencilHalfBin2dIntel,
+              NS_HALF | NS_BIN | NS_2D | NS_ORTHO | NS_INTEL);
+
+typedef NStencilBinIntel<1, 0, 1> NStencilHalfBin2dTriIntel;
+NStencilStyle(half/bin/2d/tri/intel,
+              NStencilHalfBin2dTriIntel,
+              NS_HALF | NS_BIN | NS_2D | NS_TRI | NS_INTEL);
+
+typedef NStencilBinIntel<1, 1, 0> NStencilHalfBin3dIntel;
+NStencilStyle(half/bin/3d/intel,
+              NStencilHalfBin3dIntel,
+              NS_HALF | NS_BIN | NS_3D | NS_ORTHO | NS_INTEL);
+
+typedef NStencilBinIntel<1, 1, 1> NStencilHalfBin3dTriIntel;
+NStencilStyle(half/bin/3d/tri/intel,
+              NStencilHalfBin3dTriIntel,
+              NS_HALF | NS_BIN | NS_3D | NS_TRI | NS_INTEL);
+// clang-format on
+#else
+
+#ifndef LMP_NSTENCIL_BIN_INTEL_H
+#define LMP_NSTENCIL_BIN_INTEL_H
+
+#include "nstencil.h"
+
+namespace LAMMPS_NS {
+
+template<int HALF, int DIM_3D, int TRI>
+class NStencilBinIntel : public NStencil {
+ public:
+  NStencilBinIntel(class LAMMPS *);
+  void create() override;
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/nstencil_half_bin_3d_tri.cpp b/src/INTEL/nstencil_ghost_bin_intel.cpp
similarity index 62%
rename from src/nstencil_half_bin_3d_tri.cpp
rename to src/INTEL/nstencil_ghost_bin_intel.cpp
index 72bef7fb76..23d4930cc0 100644
--- a/src/nstencil_half_bin_3d_tri.cpp
+++ b/src/INTEL/nstencil_ghost_bin_intel.cpp
@@ -11,34 +11,44 @@
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
-#include "nstencil_half_bin_3d_tri.h"
+#include "nstencil_ghost_bin_intel.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-NStencilHalfBin3dTri::NStencilHalfBin3dTri(LAMMPS *lmp) : NStencil(lmp) {}
+template<int DIM_3D>
+NStencilGhostBinIntel<DIM_3D>::NStencilGhostBinIntel(LAMMPS *lmp) : NStencil(lmp)
+{
+  xyzflag = 1;
+}
 
 /* ----------------------------------------------------------------------
    create stencil based on bin geometry and cutoff
 ------------------------------------------------------------------------- */
 
-void NStencilHalfBin3dTri::create()
+template<int DIM_3D>
+void NStencilGhostBinIntel<DIM_3D>::create()
 {
   int i, j, k;
 
-  // for triclinic, need to use full stencil in all dims
-  //   not a half stencil in z
-  // b/c transforming orthog -> lambda -> orthog for ghost atoms
-  //   with an added PBC offset can shift all 3 coords by epsilon
-  // thus for an I/J owned/ghost pair, the xyz coords
-  //   and bin assignments can be different on I proc vs J proc
-
   nstencil = 0;
 
-  for (k = -sz; k <= sz; k++)
-    for (j = -sy; j <= sy; j++)
-      for (i = -sx; i <= sx; i++)
-        if (bin_distance(i, j, k) < cutneighmaxsq)
+  for (k = -sz; k <= sz; k++) {
+    for (j = -sy; j <= sy; j++) {
+      for (i = -sx; i <= sx; i++) {
+        if (bin_distance(i, j, k) < cutneighmaxsq) {
+          stencilxyz[nstencil][0] = i;
+          stencilxyz[nstencil][1] = j;
+          stencilxyz[nstencil][2] = k;
           stencil[nstencil++] = k * mbiny * mbinx + j * mbinx + i;
+        }
+      }
+    }
+  }
+}
+
+namespace LAMMPS_NS {
+template class NStencilGhostBinIntel<0>;
+template class NStencilGhostBinIntel<1>;
 }
diff --git a/src/nstencil_full_ghost_bin_3d.h b/src/INTEL/nstencil_ghost_bin_intel.h
similarity index 60%
rename from src/nstencil_full_ghost_bin_3d.h
rename to src/INTEL/nstencil_ghost_bin_intel.h
index 5fea44f48e..f4ae9f9804 100644
--- a/src/nstencil_full_ghost_bin_3d.h
+++ b/src/INTEL/nstencil_ghost_bin_intel.h
@@ -13,22 +13,29 @@
 
 #ifdef NSTENCIL_CLASS
 // clang-format off
-NStencilStyle(full/ghost/bin/3d,
-              NStencilFullGhostBin3d,
-              NS_FULL | NS_GHOST | NS_BIN | NS_3D | NS_ORTHO | NS_TRI);
+typedef NStencilGhostBinIntel<0> NStencilFullGhostBin2dIntel;
+NStencilStyle(full/ghost/bin/2d/intel,
+              NStencilFullGhostBin2dIntel,
+              NS_FULL | NS_GHOST | NS_BIN | NS_2D | NS_ORTHO | NS_TRI | NS_INTEL);
+
+typedef NStencilGhostBinIntel<1> NStencilFullGhostBin3dIntel;
+NStencilStyle(full/ghost/bin/3d/intel,
+              NStencilFullGhostBin3dIntel,
+              NS_FULL | NS_GHOST | NS_BIN | NS_3D | NS_ORTHO | NS_TRI | NS_INTEL);
 // clang-format on
 #else
 
-#ifndef LMP_NSTENCIL_FULL_GHOST_BIN_3D_H
-#define LMP_NSTENCIL_FULL_GHOST_BIN_3D_H
+#ifndef LMP_NSTENCIL_GHOST_BIN_INTEL_H
+#define LMP_NSTENCIL_GHOST_BIN_INTEL_H
 
 #include "nstencil.h"
 
 namespace LAMMPS_NS {
 
-class NStencilFullGhostBin3d : public NStencil {
+template<int DIM_3D>
+class NStencilGhostBinIntel : public NStencil {
  public:
-  NStencilFullGhostBin3d(class LAMMPS *);
+  NStencilGhostBinIntel(class LAMMPS *);
   void create() override;
 };
 
diff --git a/src/INTEL/pair_airebo_intel.cpp b/src/INTEL/pair_airebo_intel.cpp
index 7bc2b3edb8..8d2475604b 100644
--- a/src/INTEL/pair_airebo_intel.cpp
+++ b/src/INTEL/pair_airebo_intel.cpp
@@ -633,9 +633,8 @@ namespace overloaded {
     compared to original code.
    ---------------------------------------------------------------------- */
 
-#define CARBON 0
-#define HYDROGEN 1
-#define TOL 1.0e-9
+enum { CARBON, HYDROGEN };
+static constexpr double TOL = 1.0e-9;
 
 template<typename T>
 inline T fmin_nonan(T a, T b) {
diff --git a/src/INTEL/pair_eam_intel.cpp b/src/INTEL/pair_eam_intel.cpp
index 9c5d6da5e5..03e268bb68 100644
--- a/src/INTEL/pair_eam_intel.cpp
+++ b/src/INTEL/pair_eam_intel.cpp
@@ -34,7 +34,7 @@
 
 using namespace LAMMPS_NS;
 
-#define MAXLINE 1024
+static constexpr int MAXLINE = 1024;
 
 #define FC_PACKED1_T typename ForceConst<flt_t>::fc_packed1
 #define FC_PACKED2_T typename ForceConst<flt_t>::fc_packed2
diff --git a/src/INTEL/pair_snap_intel.cpp b/src/INTEL/pair_snap_intel.cpp
index d91f0adc36..c9a4ed3d5a 100644
--- a/src/INTEL/pair_snap_intel.cpp
+++ b/src/INTEL/pair_snap_intel.cpp
@@ -34,8 +34,8 @@
 using namespace LAMMPS_NS;
 using namespace ip_simd;
 
-#define MAXLINE 1024
-#define MAXWORD 3
+static constexpr int MAXLINE = 1024;
+static constexpr int MAXWORD = 3;
 
 /* ---------------------------------------------------------------------- */
 
@@ -445,7 +445,8 @@ void PairSNAPIntel::read_files(char *coefffilename, char *paramfilename)
                                    coefffilename, utils::getsyserror());
   }
 
-  char line[MAXLINE],*ptr;
+  char line[MAXLINE] = {'\0'};
+  char *ptr;
   int eof = 0;
   int nwords = 0;
   while (nwords == 0) {
diff --git a/src/INTEL/pair_sw_intel.cpp b/src/INTEL/pair_sw_intel.cpp
index fa62f499de..61c25692e9 100644
--- a/src/INTEL/pair_sw_intel.cpp
+++ b/src/INTEL/pair_sw_intel.cpp
@@ -52,8 +52,8 @@ using namespace LAMMPS_NS;
 #define FC_PACKED2_T typename ForceConst<flt_t>::fc_packed2
 #define FC_PACKED3_T typename ForceConst<flt_t>::fc_packed3
 
-#define MAXLINE 1024
-#define DELTA 4
+static constexpr int MAXLINE = 1024;
+static constexpr int DELTA = 4;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/INTEL/pppm_disp_intel.cpp b/src/INTEL/pppm_disp_intel.cpp
index 6dac9fad99..01981f3152 100644
--- a/src/INTEL/pppm_disp_intel.cpp
+++ b/src/INTEL/pppm_disp_intel.cpp
@@ -39,11 +39,11 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
-#define MAXORDER   7
-#define OFFSET 16384
-#define SMALL 0.00001
-#define LARGE 10000.0
-#define EPS_HOC 1.0e-7
+static constexpr int MAXORDER =   7;
+static constexpr int OFFSET = 16384;
+static constexpr double SMALL = 0.00001;
+static constexpr double LARGE = 10000.0;
+static constexpr double EPS_HOC = 1.0e-7;
 
 enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER};
 enum{REVERSE_RHO, REVERSE_RHO_G, REVERSE_RHO_A, REVERSE_RHO_NONE};
@@ -53,13 +53,8 @@ enum{FORWARD_IK, FORWARD_AD, FORWARD_IK_PERATOM, FORWARD_AD_PERATOM,
      FORWARD_IK_NONE, FORWARD_AD_NONE, FORWARD_IK_PERATOM_NONE,
      FORWARD_AD_PERATOM_NONE};
 
-#ifdef FFT_SINGLE
-#define ZEROF 0.0f
-#define ONEF  1.0f
-#else
-#define ZEROF 0.0
-#define ONEF  1.0
-#endif
+static constexpr FFT_SCALAR ZEROF = 0.0;
+static constexpr FFT_SCALAR ONEF =  1.0;
 
 /* ---------------------------------------------------------------------- */
 
@@ -268,23 +263,23 @@ void PPPMDispIntel::compute(int eflag, int vflag)
     //perform calculations for coulomb interactions only
 
     if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
-      particle_map<float,double>(delxinv, delyinv, delzinv, shift, part2grid,
-                                 nupper, nlower, nxlo_out, nylo_out, nzlo_out,
-                                 nxhi_out, nyhi_out, nzhi_out,
-                                 fix->get_mixed_buffers());
-      make_rho_c<float,double>(fix->get_mixed_buffers());
+      particle_map_intel<float,double>(delxinv, delyinv, delzinv, shift, part2grid,
+                                       nupper, nlower, nxlo_out, nylo_out, nzlo_out,
+                                       nxhi_out, nyhi_out, nzhi_out,
+                                       fix->get_mixed_buffers());
+      make_rho_c_intel<float,double>(fix->get_mixed_buffers());
     } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
-      particle_map<double,double>(delxinv, delyinv, delzinv, shift, part2grid,
-                                  nupper, nlower, nxlo_out, nylo_out,
-                                  nzlo_out, nxhi_out, nyhi_out, nzhi_out,
-                                  fix->get_double_buffers());
-      make_rho_c<double,double>(fix->get_double_buffers());
+      particle_map_intel<double,double>(delxinv, delyinv, delzinv, shift, part2grid,
+                                        nupper, nlower, nxlo_out, nylo_out,
+                                        nzlo_out, nxhi_out, nyhi_out, nzhi_out,
+                                        fix->get_double_buffers());
+      make_rho_c_intel<double,double>(fix->get_double_buffers());
     } else {
-      particle_map<float,float>(delxinv, delyinv, delzinv, shift, part2grid,
-                                nupper, nlower, nxlo_out, nylo_out, nzlo_out,
-                                nxhi_out, nyhi_out, nzhi_out,
-                                fix->get_single_buffers());
-      make_rho_c<float,float>(fix->get_single_buffers());
+      particle_map_intel<float,float>(delxinv, delyinv, delzinv, shift, part2grid,
+                                      nupper, nlower, nxlo_out, nylo_out, nzlo_out,
+                                      nxhi_out, nyhi_out, nzhi_out,
+                                      fix->get_single_buffers());
+      make_rho_c_intel<float,float>(fix->get_single_buffers());
     }
 
     gc->reverse_comm(Grid3d::KSPACE,this,REVERSE_RHO,1,sizeof(FFT_SCALAR),
@@ -305,11 +300,11 @@ void PPPMDispIntel::compute(int eflag, int vflag)
                        gc_buf1,gc_buf2,MPI_FFT_SCALAR);
 
       if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
-        fieldforce_c_ad<float,double>(fix->get_mixed_buffers());
+        fieldforce_c_ad_intel<float,double>(fix->get_mixed_buffers());
       } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
-        fieldforce_c_ad<double,double>(fix->get_double_buffers());
+        fieldforce_c_ad_intel<double,double>(fix->get_double_buffers());
       } else {
-        fieldforce_c_ad<float,float>(fix->get_single_buffers());
+        fieldforce_c_ad_intel<float,float>(fix->get_single_buffers());
       }
 
       if (vflag_atom)
@@ -330,11 +325,11 @@ void PPPMDispIntel::compute(int eflag, int vflag)
                        gc_buf1,gc_buf2,MPI_FFT_SCALAR);
 
       if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
-        fieldforce_c_ik<float,double>(fix->get_mixed_buffers());
+        fieldforce_c_ik_intel<float,double>(fix->get_mixed_buffers());
       } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
-        fieldforce_c_ik<double,double>(fix->get_double_buffers());
+        fieldforce_c_ik_intel<double,double>(fix->get_double_buffers());
       } else {
-        fieldforce_c_ik<float,float>(fix->get_single_buffers());
+        fieldforce_c_ik_intel<float,float>(fix->get_single_buffers());
       }
 
       if (evflag_atom)
@@ -349,26 +344,26 @@ void PPPMDispIntel::compute(int eflag, int vflag)
     //perform calculations for geometric mixing
 
     if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
-      particle_map<float,double>(delxinv_6, delyinv_6, delzinv_6, shift_6,
-                                 part2grid_6, nupper_6, nlower_6, nxlo_out_6,
-                                 nylo_out_6, nzlo_out_6, nxhi_out_6,
-                                 nyhi_out_6, nzhi_out_6,
-                                 fix->get_mixed_buffers());
-      make_rho_g<float,double>(fix->get_mixed_buffers());
+      particle_map_intel<float,double>(delxinv_6, delyinv_6, delzinv_6, shift_6,
+                                       part2grid_6, nupper_6, nlower_6, nxlo_out_6,
+                                       nylo_out_6, nzlo_out_6, nxhi_out_6,
+                                       nyhi_out_6, nzhi_out_6,
+                                       fix->get_mixed_buffers());
+      make_rho_g_intel<float,double>(fix->get_mixed_buffers());
     } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
-      particle_map<double,double>(delxinv_6, delyinv_6, delzinv_6, shift_6,
-                                  part2grid_6, nupper_6, nlower_6, nxlo_out_6,
-                                  nylo_out_6, nzlo_out_6, nxhi_out_6,
-                                  nyhi_out_6, nzhi_out_6,
-                                  fix->get_double_buffers());
-      make_rho_g<double,double>(fix->get_double_buffers());
+      particle_map_intel<double,double>(delxinv_6, delyinv_6, delzinv_6, shift_6,
+                                        part2grid_6, nupper_6, nlower_6, nxlo_out_6,
+                                        nylo_out_6, nzlo_out_6, nxhi_out_6,
+                                        nyhi_out_6, nzhi_out_6,
+                                        fix->get_double_buffers());
+      make_rho_g_intel<double,double>(fix->get_double_buffers());
     } else {
-      particle_map<float,float>(delxinv_6, delyinv_6, delzinv_6, shift_6,
-                                part2grid_6, nupper_6, nlower_6, nxlo_out_6,
-                                nylo_out_6, nzlo_out_6, nxhi_out_6,
-                                nyhi_out_6, nzhi_out_6,
-                                fix->get_single_buffers());
-      make_rho_g<float,float>(fix->get_single_buffers());
+      particle_map_intel<float,float>(delxinv_6, delyinv_6, delzinv_6, shift_6,
+                                      part2grid_6, nupper_6, nlower_6, nxlo_out_6,
+                                      nylo_out_6, nzlo_out_6, nxhi_out_6,
+                                      nyhi_out_6, nzhi_out_6,
+                                      fix->get_single_buffers());
+      make_rho_g_intel<float,float>(fix->get_single_buffers());
     }
 
     gc6->reverse_comm(Grid3d::KSPACE,this,REVERSE_RHO_G,1,sizeof(FFT_SCALAR),
@@ -390,11 +385,11 @@ void PPPMDispIntel::compute(int eflag, int vflag)
                         gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
 
       if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
-        fieldforce_g_ad<float,double>(fix->get_mixed_buffers());
+        fieldforce_g_ad_intel<float,double>(fix->get_mixed_buffers());
       } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
-        fieldforce_g_ad<double,double>(fix->get_double_buffers());
+        fieldforce_g_ad_intel<double,double>(fix->get_double_buffers());
       } else {
-        fieldforce_g_ad<float,float>(fix->get_single_buffers());
+        fieldforce_g_ad_intel<float,float>(fix->get_single_buffers());
       }
 
       if (vflag_atom)
@@ -415,11 +410,11 @@ void PPPMDispIntel::compute(int eflag, int vflag)
                         gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
 
       if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
-        fieldforce_g_ik<float,double>(fix->get_mixed_buffers());
+        fieldforce_g_ik_intel<float,double>(fix->get_mixed_buffers());
       } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
-        fieldforce_g_ik<double,double>(fix->get_double_buffers());
+        fieldforce_g_ik_intel<double,double>(fix->get_double_buffers());
       } else {
-        fieldforce_g_ik<float,float>(fix->get_single_buffers());
+        fieldforce_g_ik_intel<float,float>(fix->get_single_buffers());
       }
 
       if (evflag_atom)
@@ -434,26 +429,26 @@ void PPPMDispIntel::compute(int eflag, int vflag)
     //perform calculations for arithmetic mixing
 
     if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
-      particle_map<float,double>(delxinv_6, delyinv_6, delzinv_6, shift_6,
-                                 part2grid_6, nupper_6, nlower_6,
-                                 nxlo_out_6, nylo_out_6, nzlo_out_6,
-                                 nxhi_out_6, nyhi_out_6, nzhi_out_6,
-                                 fix->get_mixed_buffers());
-      make_rho_a<float,double>(fix->get_mixed_buffers());
+      particle_map_intel<float,double>(delxinv_6, delyinv_6, delzinv_6, shift_6,
+                                       part2grid_6, nupper_6, nlower_6,
+                                       nxlo_out_6, nylo_out_6, nzlo_out_6,
+                                       nxhi_out_6, nyhi_out_6, nzhi_out_6,
+                                       fix->get_mixed_buffers());
+      make_rho_a_intel<float,double>(fix->get_mixed_buffers());
     } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
-      particle_map<double,double>(delxinv_6, delyinv_6, delzinv_6, shift_6,
-                                  part2grid_6, nupper_6, nlower_6, nxlo_out_6,
-                                  nylo_out_6, nzlo_out_6, nxhi_out_6,
-                                  nyhi_out_6, nzhi_out_6,
-                                  fix->get_double_buffers());
-      make_rho_a<double,double>(fix->get_double_buffers());
+      particle_map_intel<double,double>(delxinv_6, delyinv_6, delzinv_6, shift_6,
+                                        part2grid_6, nupper_6, nlower_6, nxlo_out_6,
+                                        nylo_out_6, nzlo_out_6, nxhi_out_6,
+                                        nyhi_out_6, nzhi_out_6,
+                                        fix->get_double_buffers());
+      make_rho_a_intel<double,double>(fix->get_double_buffers());
     } else {
-      particle_map<float,float>(delxinv_6, delyinv_6, delzinv_6, shift_6,
-                                part2grid_6, nupper_6, nlower_6, nxlo_out_6,
-                                nylo_out_6, nzlo_out_6, nxhi_out_6,
-                                nyhi_out_6, nzhi_out_6,
-                                fix->get_single_buffers());
-      make_rho_a<float,float>(fix->get_single_buffers());
+      particle_map_intel<float,float>(delxinv_6, delyinv_6, delzinv_6, shift_6,
+                                      part2grid_6, nupper_6, nlower_6, nxlo_out_6,
+                                      nylo_out_6, nzlo_out_6, nxhi_out_6,
+                                      nyhi_out_6, nzhi_out_6,
+                                      fix->get_single_buffers());
+      make_rho_a_intel<float,float>(fix->get_single_buffers());
     }
 
     gc->reverse_comm(Grid3d::KSPACE,this,REVERSE_RHO_A,7,sizeof(FFT_SCALAR),
@@ -486,11 +481,11 @@ void PPPMDispIntel::compute(int eflag, int vflag)
                         gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
 
       if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
-        fieldforce_a_ad<float,double>(fix->get_mixed_buffers());
+        fieldforce_a_ad_intel<float,double>(fix->get_mixed_buffers());
       } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
-        fieldforce_a_ad<double,double>(fix->get_double_buffers());
+        fieldforce_a_ad_intel<double,double>(fix->get_double_buffers());
       } else {
-        fieldforce_a_ad<float,float>(fix->get_single_buffers());
+        fieldforce_a_ad_intel<float,float>(fix->get_single_buffers());
       }
 
       if (evflag_atom)
@@ -529,11 +524,11 @@ void PPPMDispIntel::compute(int eflag, int vflag)
                         gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
 
       if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
-        fieldforce_a_ik<float,double>(fix->get_mixed_buffers());
+        fieldforce_a_ik_intel<float,double>(fix->get_mixed_buffers());
       } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
-        fieldforce_a_ik<double,double>(fix->get_double_buffers());
+        fieldforce_a_ik_intel<double,double>(fix->get_double_buffers());
       } else {
-        fieldforce_a_ik<float,float>(fix->get_single_buffers());
+        fieldforce_a_ik_intel<float,float>(fix->get_single_buffers());
       }
 
       if (evflag_atom)
@@ -549,26 +544,26 @@ void PPPMDispIntel::compute(int eflag, int vflag)
     // perform calculations if no mixing rule applies
 
     if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
-      particle_map<float,double>(delxinv_6, delyinv_6, delzinv_6, shift_6,
-                                 part2grid_6, nupper_6, nlower_6, nxlo_out_6,
-                                 nylo_out_6, nzlo_out_6, nxhi_out_6,
-                                 nyhi_out_6, nzhi_out_6,
-                                 fix->get_mixed_buffers());
-      make_rho_none<float,double>(fix->get_mixed_buffers());
+      particle_map_intel<float,double>(delxinv_6, delyinv_6, delzinv_6, shift_6,
+                                       part2grid_6, nupper_6, nlower_6, nxlo_out_6,
+                                       nylo_out_6, nzlo_out_6, nxhi_out_6,
+                                       nyhi_out_6, nzhi_out_6,
+                                       fix->get_mixed_buffers());
+      make_rho_none_intel<float,double>(fix->get_mixed_buffers());
     } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
-      particle_map<double,double>(delxinv_6, delyinv_6, delzinv_6, shift_6,
-                                  part2grid_6, nupper_6, nlower_6, nxlo_out_6,
-                                  nylo_out_6, nzlo_out_6, nxhi_out_6,
-                                  nyhi_out_6, nzhi_out_6,
-                                  fix->get_double_buffers());
-      make_rho_none<double,double>(fix->get_double_buffers());
+      particle_map_intel<double,double>(delxinv_6, delyinv_6, delzinv_6, shift_6,
+                                        part2grid_6, nupper_6, nlower_6, nxlo_out_6,
+                                        nylo_out_6, nzlo_out_6, nxhi_out_6,
+                                        nyhi_out_6, nzhi_out_6,
+                                        fix->get_double_buffers());
+      make_rho_none_intel<double,double>(fix->get_double_buffers());
     } else {
-      particle_map<float,float>(delxinv_6, delyinv_6, delzinv_6, shift_6,
-                                part2grid_6, nupper_6, nlower_6, nxlo_out_6,
-                                nylo_out_6, nzlo_out_6, nxhi_out_6,
-                                nyhi_out_6, nzhi_out_6,
-                                fix->get_single_buffers());
-      make_rho_none<float,float>(fix->get_single_buffers());
+      particle_map_intel<float,float>(delxinv_6, delyinv_6, delzinv_6, shift_6,
+                                      part2grid_6, nupper_6, nlower_6, nxlo_out_6,
+                                      nylo_out_6, nzlo_out_6, nxhi_out_6,
+                                      nyhi_out_6, nzhi_out_6,
+                                      fix->get_single_buffers());
+      make_rho_none_intel<float,float>(fix->get_single_buffers());
     }
 
     gc->reverse_comm(Grid3d::KSPACE,this,REVERSE_RHO_NONE,1,sizeof(FFT_SCALAR),
@@ -591,11 +586,11 @@ void PPPMDispIntel::compute(int eflag, int vflag)
                         gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
 
       if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
-        fieldforce_none_ad<float,double>(fix->get_mixed_buffers());
+        fieldforce_none_ad_intel<float,double>(fix->get_mixed_buffers());
       } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
-        fieldforce_none_ad<double,double>(fix->get_double_buffers());
+        fieldforce_none_ad_intel<double,double>(fix->get_double_buffers());
       } else {
-        fieldforce_none_ad<float,float>(fix->get_single_buffers());
+        fieldforce_none_ad_intel<float,float>(fix->get_single_buffers());
       }
 
       if (vflag_atom)
@@ -620,11 +615,11 @@ void PPPMDispIntel::compute(int eflag, int vflag)
                         gc6_buf1,gc6_buf2,MPI_FFT_SCALAR);
 
       if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
-        fieldforce_none_ik<float,double>(fix->get_mixed_buffers());
+        fieldforce_none_ik_intel<float,double>(fix->get_mixed_buffers());
       } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
-        fieldforce_none_ik<double,double>(fix->get_double_buffers());
+        fieldforce_none_ik_intel<double,double>(fix->get_double_buffers());
       } else {
-        fieldforce_none_ik<float,float>(fix->get_single_buffers());
+        fieldforce_none_ik_intel<float,float>(fix->get_single_buffers());
       }
 
       if (evflag_atom)
@@ -730,11 +725,11 @@ void PPPMDispIntel::compute(int eflag, int vflag)
 ------------------------------------------------------------------------- */
 
 template<class flt_t, class acc_t>
-void PPPMDispIntel::particle_map(double delx, double dely, double delz,
-                                 double sft, int** p2g, int nup, int nlow,
-                                 int nxlo, int nylo, int nzlo,
-                                 int nxhi, int nyhi, int nzhi,
-                                 IntelBuffers<flt_t,acc_t> * /*buffers*/)
+void PPPMDispIntel::particle_map_intel(double delx, double dely, double delz,
+                                       double sft, int** p2g, int nup, int nlow,
+                                       int nxlo, int nylo, int nzlo,
+                                       int nxhi, int nyhi, int nzhi,
+                                       IntelBuffers<flt_t,acc_t> * /*buffers*/)
 {
   int nlocal = atom->nlocal;
   int nthr = comm->nthreads;
@@ -805,7 +800,7 @@ void PPPMDispIntel::particle_map(double delx, double dely, double delz,
 ------------------------------------------------------------------------- */
 
 template<class flt_t, class acc_t, int use_table>
-void PPPMDispIntel::make_rho_c(IntelBuffers<flt_t,acc_t> * /*buffers*/)
+void PPPMDispIntel::make_rho_c_intel(IntelBuffers<flt_t,acc_t> * /*buffers*/)
 {
   // clear 3d density array
 
@@ -968,7 +963,7 @@ void PPPMDispIntel::make_rho_c(IntelBuffers<flt_t,acc_t> * /*buffers*/)
 ------------------------------------------------------------------------- */
 
 template<class flt_t, class acc_t, int use_table>
-void PPPMDispIntel::make_rho_g(IntelBuffers<flt_t,acc_t> * /*buffers*/)
+void PPPMDispIntel::make_rho_g_intel(IntelBuffers<flt_t,acc_t> * /*buffers*/)
 {
   // clear 3d density array
 
@@ -1134,7 +1129,7 @@ void PPPMDispIntel::make_rho_g(IntelBuffers<flt_t,acc_t> * /*buffers*/)
 ------------------------------------------------------------------------- */
 
 template<class flt_t, class acc_t, int use_table>
-void PPPMDispIntel::make_rho_a(IntelBuffers<flt_t,acc_t> * /*buffers*/)
+void PPPMDispIntel::make_rho_a_intel(IntelBuffers<flt_t,acc_t> * /*buffers*/)
 {
   // clear 3d density array
 
@@ -1268,7 +1263,7 @@ void PPPMDispIntel::make_rho_a(IntelBuffers<flt_t,acc_t> * /*buffers*/)
 ------------------------------------------------------------------------- */
 
 template<class flt_t, class acc_t, int use_table>
-void PPPMDispIntel::make_rho_none(IntelBuffers<flt_t,acc_t> * /*buffers*/)
+void PPPMDispIntel::make_rho_none_intel(IntelBuffers<flt_t,acc_t> * /*buffers*/)
 {
 
   FFT_SCALAR * _noalias global_density = &(density_brick_none[0][nzlo_out_6][nylo_out_6][nxlo_out_6]);
@@ -1428,7 +1423,7 @@ void PPPMDispIntel::make_rho_none(IntelBuffers<flt_t,acc_t> * /*buffers*/)
 ------------------------------------------------------------------------- */
 
 template<class flt_t, class acc_t, int use_table>
-void PPPMDispIntel::fieldforce_c_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
+void PPPMDispIntel::fieldforce_c_ik_intel(IntelBuffers<flt_t,acc_t> * /*buffers*/)
 {
 
   // loop over my charges, interpolate electric field from nearby grid points
@@ -1587,7 +1582,7 @@ void PPPMDispIntel::fieldforce_c_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
 ------------------------------------------------------------------------- */
 
 template<class flt_t, class acc_t, int use_table>
-void PPPMDispIntel::fieldforce_c_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
+void PPPMDispIntel::fieldforce_c_ad_intel(IntelBuffers<flt_t,acc_t> * /*buffers*/)
 {
 
   // loop over my charges, interpolate electric field from nearby grid points
@@ -1808,7 +1803,7 @@ void PPPMDispIntel::fieldforce_c_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
 ------------------------------------------------------------------------- */
 
 template<class flt_t, class acc_t, int use_table>
-void PPPMDispIntel::fieldforce_g_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
+void PPPMDispIntel::fieldforce_g_ik_intel(IntelBuffers<flt_t,acc_t> * /*buffers*/)
 {
 
   // loop over my charges, interpolate electric field from nearby grid points
@@ -1964,7 +1959,7 @@ void PPPMDispIntel::fieldforce_g_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
 ------------------------------------------------------------------------- */
 
 template<class flt_t, class acc_t, int use_table>
-void PPPMDispIntel::fieldforce_g_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
+void PPPMDispIntel::fieldforce_g_ad_intel(IntelBuffers<flt_t,acc_t> * /*buffers*/)
 {
 
   // loop over my charges, interpolate electric field from nearby grid points
@@ -2180,7 +2175,7 @@ void PPPMDispIntel::fieldforce_g_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
 ------------------------------------------------------------------------- */
 
 template<class flt_t, class acc_t, int use_table>
-void PPPMDispIntel::fieldforce_a_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
+void PPPMDispIntel::fieldforce_a_ik_intel(IntelBuffers<flt_t,acc_t> * /*buffers*/)
 {
 
   // loop over my charges, interpolate electric field from nearby grid points
@@ -2405,7 +2400,7 @@ void PPPMDispIntel::fieldforce_a_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
 ------------------------------------------------------------------------- */
 
 template<class flt_t, class acc_t, int use_table>
-void PPPMDispIntel::fieldforce_a_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
+void PPPMDispIntel::fieldforce_a_ad_intel(IntelBuffers<flt_t,acc_t> * /*buffers*/)
 {
 
   // loop over my charges, interpolate electric field from nearby grid points
@@ -2733,7 +2728,7 @@ void PPPMDispIntel::fieldforce_a_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
 ------------------------------------------------------------------------- */
 
 template<class flt_t, class acc_t, int use_table>
-void PPPMDispIntel::fieldforce_none_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
+void PPPMDispIntel::fieldforce_none_ik_intel(IntelBuffers<flt_t,acc_t> * /*buffers*/)
 {
 
   // loop over my charges, interpolate electric field from nearby grid points
@@ -2906,7 +2901,7 @@ void PPPMDispIntel::fieldforce_none_ik(IntelBuffers<flt_t,acc_t> * /*buffers*/)
 ------------------------------------------------------------------------- */
 
 template<class flt_t, class acc_t, int use_table>
-void PPPMDispIntel::fieldforce_none_ad(IntelBuffers<flt_t,acc_t> * /*buffers*/)
+void PPPMDispIntel::fieldforce_none_ad_intel(IntelBuffers<flt_t,acc_t> * /*buffers*/)
 {
   // loop over my charges, interpolate electric field from nearby grid points
   // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
diff --git a/src/INTEL/pppm_disp_intel.h b/src/INTEL/pppm_disp_intel.h
index 20c59c00e8..11fdd05638 100644
--- a/src/INTEL/pppm_disp_intel.h
+++ b/src/INTEL/pppm_disp_intel.h
@@ -86,137 +86,137 @@ class PPPMDispIntel : public PPPMDisp {
 #endif
 
   template <class flt_t, class acc_t>
-  void particle_map(double, double, double, double, int **, int, int, int, int, int, int, int, int,
-                    IntelBuffers<flt_t, acc_t> *buffers);
+  void particle_map_intel(double, double, double, double, int **, int, int, int, int, int, int, int, int,
+                          IntelBuffers<flt_t, acc_t> *buffers);
 
   template <class flt_t, class acc_t, int use_table>
-  void make_rho_c(IntelBuffers<flt_t, acc_t> *buffers);
-  template <class flt_t, class acc_t> void make_rho_c(IntelBuffers<flt_t, acc_t> *buffers)
+  void make_rho_c_intel(IntelBuffers<flt_t, acc_t> *buffers);
+  template <class flt_t, class acc_t> void make_rho_c_intel(IntelBuffers<flt_t, acc_t> *buffers)
   {
     if (_use_table == 1) {
-      make_rho_c<flt_t, acc_t, 1>(buffers);
+      make_rho_c_intel<flt_t, acc_t, 1>(buffers);
     } else {
-      make_rho_c<flt_t, acc_t, 0>(buffers);
+      make_rho_c_intel<flt_t, acc_t, 0>(buffers);
     }
   }
 
   template <class flt_t, class acc_t, int use_table>
-  void make_rho_g(IntelBuffers<flt_t, acc_t> *buffers);
-  template <class flt_t, class acc_t> void make_rho_g(IntelBuffers<flt_t, acc_t> *buffers)
+  void make_rho_g_intel(IntelBuffers<flt_t, acc_t> *buffers);
+  template <class flt_t, class acc_t> void make_rho_g_intel(IntelBuffers<flt_t, acc_t> *buffers)
   {
     if (_use_table == 1) {
-      make_rho_g<flt_t, acc_t, 1>(buffers);
+      make_rho_g_intel<flt_t, acc_t, 1>(buffers);
     } else {
-      make_rho_g<flt_t, acc_t, 0>(buffers);
+      make_rho_g_intel<flt_t, acc_t, 0>(buffers);
     }
   }
 
   template <class flt_t, class acc_t, int use_table>
-  void make_rho_a(IntelBuffers<flt_t, acc_t> *buffers);
-  template <class flt_t, class acc_t> void make_rho_a(IntelBuffers<flt_t, acc_t> *buffers)
+  void make_rho_a_intel(IntelBuffers<flt_t, acc_t> *buffers);
+  template <class flt_t, class acc_t> void make_rho_a_intel(IntelBuffers<flt_t, acc_t> *buffers)
   {
     if (_use_table == 1) {
-      make_rho_a<flt_t, acc_t, 1>(buffers);
+      make_rho_a_intel<flt_t, acc_t, 1>(buffers);
     } else {
-      make_rho_a<flt_t, acc_t, 0>(buffers);
+      make_rho_a_intel<flt_t, acc_t, 0>(buffers);
     }
   }
 
   template <class flt_t, class acc_t, int use_table>
-  void make_rho_none(IntelBuffers<flt_t, acc_t> *buffers);
-  template <class flt_t, class acc_t> void make_rho_none(IntelBuffers<flt_t, acc_t> *buffers)
+  void make_rho_none_intel(IntelBuffers<flt_t, acc_t> *buffers);
+  template <class flt_t, class acc_t> void make_rho_none_intel(IntelBuffers<flt_t, acc_t> *buffers)
   {
     if (_use_table == 1) {
-      make_rho_none<flt_t, acc_t, 1>(buffers);
+      make_rho_none_intel<flt_t, acc_t, 1>(buffers);
     } else {
-      make_rho_none<flt_t, acc_t, 0>(buffers);
+      make_rho_none_intel<flt_t, acc_t, 0>(buffers);
     }
   }
 
   template <class flt_t, class acc_t, int use_table>
-  void fieldforce_c_ik(IntelBuffers<flt_t, acc_t> *buffers);
-  template <class flt_t, class acc_t> void fieldforce_c_ik(IntelBuffers<flt_t, acc_t> *buffers)
+  void fieldforce_c_ik_intel(IntelBuffers<flt_t, acc_t> *buffers);
+  template <class flt_t, class acc_t> void fieldforce_c_ik_intel(IntelBuffers<flt_t, acc_t> *buffers)
   {
     if (_use_table == 1) {
-      fieldforce_c_ik<flt_t, acc_t, 1>(buffers);
+      fieldforce_c_ik_intel<flt_t, acc_t, 1>(buffers);
     } else {
-      fieldforce_c_ik<flt_t, acc_t, 0>(buffers);
+      fieldforce_c_ik_intel<flt_t, acc_t, 0>(buffers);
     }
   }
 
   template <class flt_t, class acc_t, int use_table>
-  void fieldforce_c_ad(IntelBuffers<flt_t, acc_t> *buffers);
-  template <class flt_t, class acc_t> void fieldforce_c_ad(IntelBuffers<flt_t, acc_t> *buffers)
+  void fieldforce_c_ad_intel(IntelBuffers<flt_t, acc_t> *buffers);
+  template <class flt_t, class acc_t> void fieldforce_c_ad_intel(IntelBuffers<flt_t, acc_t> *buffers)
   {
     if (_use_table == 1) {
-      fieldforce_c_ad<flt_t, acc_t, 1>(buffers);
+      fieldforce_c_ad_intel<flt_t, acc_t, 1>(buffers);
     } else {
-      fieldforce_c_ad<flt_t, acc_t, 0>(buffers);
+      fieldforce_c_ad_intel<flt_t, acc_t, 0>(buffers);
     }
   }
 
   template <class flt_t, class acc_t, int use_table>
-  void fieldforce_g_ik(IntelBuffers<flt_t, acc_t> *buffers);
-  template <class flt_t, class acc_t> void fieldforce_g_ik(IntelBuffers<flt_t, acc_t> *buffers)
+  void fieldforce_g_ik_intel(IntelBuffers<flt_t, acc_t> *buffers);
+  template <class flt_t, class acc_t> void fieldforce_g_ik_intel(IntelBuffers<flt_t, acc_t> *buffers)
   {
     if (_use_table == 1) {
-      fieldforce_g_ik<flt_t, acc_t, 1>(buffers);
+      fieldforce_g_ik_intel<flt_t, acc_t, 1>(buffers);
     } else {
-      fieldforce_g_ik<flt_t, acc_t, 0>(buffers);
+      fieldforce_g_ik_intel<flt_t, acc_t, 0>(buffers);
     }
   }
 
   template <class flt_t, class acc_t, int use_table>
-  void fieldforce_g_ad(IntelBuffers<flt_t, acc_t> *buffers);
-  template <class flt_t, class acc_t> void fieldforce_g_ad(IntelBuffers<flt_t, acc_t> *buffers)
+  void fieldforce_g_ad_intel(IntelBuffers<flt_t, acc_t> *buffers);
+  template <class flt_t, class acc_t> void fieldforce_g_ad_intel(IntelBuffers<flt_t, acc_t> *buffers)
   {
     if (_use_table == 1) {
-      fieldforce_g_ad<flt_t, acc_t, 1>(buffers);
+      fieldforce_g_ad_intel<flt_t, acc_t, 1>(buffers);
     } else {
-      fieldforce_g_ad<flt_t, acc_t, 0>(buffers);
+      fieldforce_g_ad_intel<flt_t, acc_t, 0>(buffers);
     }
   }
 
   template <class flt_t, class acc_t, int use_table>
-  void fieldforce_a_ik(IntelBuffers<flt_t, acc_t> *buffers);
-  template <class flt_t, class acc_t> void fieldforce_a_ik(IntelBuffers<flt_t, acc_t> *buffers)
+  void fieldforce_a_ik_intel(IntelBuffers<flt_t, acc_t> *buffers);
+  template <class flt_t, class acc_t> void fieldforce_a_ik_intel(IntelBuffers<flt_t, acc_t> *buffers)
   {
     if (_use_table == 1) {
-      fieldforce_a_ik<flt_t, acc_t, 1>(buffers);
+      fieldforce_a_ik_intel<flt_t, acc_t, 1>(buffers);
     } else {
-      fieldforce_a_ik<flt_t, acc_t, 0>(buffers);
+      fieldforce_a_ik_intel<flt_t, acc_t, 0>(buffers);
     }
   }
 
   template <class flt_t, class acc_t, int use_table>
-  void fieldforce_a_ad(IntelBuffers<flt_t, acc_t> *buffers);
-  template <class flt_t, class acc_t> void fieldforce_a_ad(IntelBuffers<flt_t, acc_t> *buffers)
+  void fieldforce_a_ad_intel(IntelBuffers<flt_t, acc_t> *buffers);
+  template <class flt_t, class acc_t> void fieldforce_a_ad_intel(IntelBuffers<flt_t, acc_t> *buffers)
   {
     if (_use_table == 1) {
-      fieldforce_a_ad<flt_t, acc_t, 1>(buffers);
+      fieldforce_a_ad_intel<flt_t, acc_t, 1>(buffers);
     } else {
-      fieldforce_a_ad<flt_t, acc_t, 0>(buffers);
+      fieldforce_a_ad_intel<flt_t, acc_t, 0>(buffers);
     }
   }
   template <class flt_t, class acc_t, int use_table>
-  void fieldforce_none_ik(IntelBuffers<flt_t, acc_t> *buffers);
-  template <class flt_t, class acc_t> void fieldforce_none_ik(IntelBuffers<flt_t, acc_t> *buffers)
+  void fieldforce_none_ik_intel(IntelBuffers<flt_t, acc_t> *buffers);
+  template <class flt_t, class acc_t> void fieldforce_none_ik_intel(IntelBuffers<flt_t, acc_t> *buffers)
   {
     if (_use_table == 1) {
-      fieldforce_none_ik<flt_t, acc_t, 1>(buffers);
+      fieldforce_none_ik_intel<flt_t, acc_t, 1>(buffers);
     } else {
-      fieldforce_none_ik<flt_t, acc_t, 0>(buffers);
+      fieldforce_none_ik_intel<flt_t, acc_t, 0>(buffers);
     }
   }
 
   template <class flt_t, class acc_t, int use_table>
-  void fieldforce_none_ad(IntelBuffers<flt_t, acc_t> *buffers);
-  template <class flt_t, class acc_t> void fieldforce_none_ad(IntelBuffers<flt_t, acc_t> *buffers)
+  void fieldforce_none_ad_intel(IntelBuffers<flt_t, acc_t> *buffers);
+  template <class flt_t, class acc_t> void fieldforce_none_ad_intel(IntelBuffers<flt_t, acc_t> *buffers)
   {
     if (_use_table == 1) {
-      fieldforce_none_ad<flt_t, acc_t, 1>(buffers);
+      fieldforce_none_ad_intel<flt_t, acc_t, 1>(buffers);
     } else {
-      fieldforce_none_ad<flt_t, acc_t, 0>(buffers);
+      fieldforce_none_ad_intel<flt_t, acc_t, 0>(buffers);
     }
   }
 
diff --git a/src/INTEL/pppm_electrode_intel.cpp b/src/INTEL/pppm_electrode_intel.cpp
index 5cb62dc5d2..9f3c57b50e 100644
--- a/src/INTEL/pppm_electrode_intel.cpp
+++ b/src/INTEL/pppm_electrode_intel.cpp
@@ -48,23 +48,18 @@
 using namespace LAMMPS_NS;
 using namespace std;
 
-#define MAXORDER 7
-#define OFFSET 16384
-#define LARGE 10000.0
-#define SMALL 0.00001
-#define EPS_HOC 1.0e-7
+static constexpr int MAXORDER = 7;
+static constexpr int OFFSET = 16384;
+static constexpr double LARGE = 10000.0;
+static constexpr double SMALL = 0.00001;
+static constexpr double EPS_HOC = 1.0e-7;
 
 enum { REVERSE_RHO };
 enum { FORWARD_IK, FORWARD_AD, FORWARD_IK_PERATOM, FORWARD_AD_PERATOM };
 enum : bool { ELECTRODE = true, ELECTROLYTE = false };
 
-#ifdef FFT_SINGLE
-#define ZEROF 0.0f
-#define ONEF 1.0f
-#else
-#define ZEROF 0.0
-#define ONEF 1.0
-#endif
+static constexpr FFT_SCALAR ZEROF = 0.0;
+static constexpr FFT_SCALAR ONEF = 1.0;
 
 static const char cite_pppm_electrode[] =
     "kspace_style pppm/electrode command:\n\n"
@@ -420,7 +415,9 @@ void PPPMElectrodeIntel::project_psi(IntelBuffers<flt_t, acc_t> *buffers, double
 #endif
   {
     int *mask = atom->mask;
-    const flt_t scaleinv = 1.0 / (nx_pppm * ny_pppm * nz_pppm);
+
+    const bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm;
+    const flt_t scaleinv = 1.0 / ngridtotal;
 
     const flt_t lo0 = boxlo[0];
     const flt_t lo1 = boxlo[1];
diff --git a/src/INTEL/pppm_intel.cpp b/src/INTEL/pppm_intel.cpp
index f67b3a89b3..b72a7efd11 100644
--- a/src/INTEL/pppm_intel.cpp
+++ b/src/INTEL/pppm_intel.cpp
@@ -41,22 +41,17 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
-#define MAXORDER 7
-#define OFFSET 16384
-#define LARGE 10000.0
-#define SMALL 0.00001
-#define EPS_HOC 1.0e-7
+static constexpr int MAXORDER = 7;
+static constexpr int OFFSET = 16384;
+static constexpr double LARGE = 10000.0;
+static constexpr double SMALL = 0.00001;
+static constexpr double EPS_HOC = 1.0e-7;
 
 enum{REVERSE_RHO};
 enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM};
 
-#ifdef FFT_SINGLE
-#define ZEROF 0.0f
-#define ONEF  1.0f
-#else
-#define ZEROF 0.0
-#define ONEF  1.0
-#endif
+static constexpr FFT_SCALAR ZEROF = 0.0;
+static constexpr FFT_SCALAR ONEF =  1.0;
 
 /* ---------------------------------------------------------------------- */
 
@@ -156,8 +151,6 @@ void PPPMIntel::compute(int eflag, int vflag)
 
 void PPPMIntel::compute_first(int eflag, int vflag)
 {
-  int i,j;
-
   // set energy/virial flags
   // invoke allocate_peratom() if needed for first time
 
@@ -465,7 +458,6 @@ void PPPMIntel::make_rho(IntelBuffers<flt_t,acc_t> *buffers)
     const flt_t xi = delxinv;
     const flt_t yi = delyinv;
     const flt_t zi = delzinv;
-    const flt_t fshift = shift;
     const flt_t fshiftone = shiftone;
     const flt_t fdelvolinv = delvolinv;
 
diff --git a/src/INTERLAYER/pair_aip_water_2dm.cpp b/src/INTERLAYER/pair_aip_water_2dm.cpp
index 6e2bf7228d..655fc9f695 100644
--- a/src/INTERLAYER/pair_aip_water_2dm.cpp
+++ b/src/INTERLAYER/pair_aip_water_2dm.cpp
@@ -29,9 +29,9 @@
 
 using namespace LAMMPS_NS;
 
-#define MAXLINE 1024
-#define DELTA 4
-#define PGDELTA 1
+static constexpr int MAXLINE = 1024;
+static constexpr int DELTA = 4;
+static constexpr int PGDELTA = 1;
 
 static const char cite_aip_water[] =
     "aip/water/2dm potential doi/10.1021/acs.jpcc.2c08464\n"
diff --git a/src/INTERLAYER/pair_drip.cpp b/src/INTERLAYER/pair_drip.cpp
index 90773b4034..e9a820d5db 100644
--- a/src/INTERLAYER/pair_drip.cpp
+++ b/src/INTERLAYER/pair_drip.cpp
@@ -36,9 +36,9 @@
 
 using namespace LAMMPS_NS;
 
-#define MAXLINE 1024
-#define DELTA 4
-#define HALF 0.5
+static constexpr int MAXLINE = 1024;
+static constexpr int DELTA = 4;
+static constexpr double HALF = 0.5;
 
 // inline functions
 static inline double dot(double const *x, double const *y)
diff --git a/src/INTERLAYER/pair_ilp_graphene_hbn.cpp b/src/INTERLAYER/pair_ilp_graphene_hbn.cpp
index 69896d7c0b..a3e3a833c3 100644
--- a/src/INTERLAYER/pair_ilp_graphene_hbn.cpp
+++ b/src/INTERLAYER/pair_ilp_graphene_hbn.cpp
@@ -39,8 +39,8 @@
 using namespace LAMMPS_NS;
 using namespace InterLayer;
 
-#define DELTA 4
-#define PGDELTA 1
+static constexpr int DELTA = 4;
+static constexpr int PGDELTA = 1;
 
 static const char cite_ilp[] =
     "ilp/graphene/hbn potential doi:10.1021/acs.nanolett.8b02848\n"
diff --git a/src/INTERLAYER/pair_ilp_tmd.cpp b/src/INTERLAYER/pair_ilp_tmd.cpp
index 8b08de39c0..73f89803c2 100644
--- a/src/INTERLAYER/pair_ilp_tmd.cpp
+++ b/src/INTERLAYER/pair_ilp_tmd.cpp
@@ -210,7 +210,7 @@ void PairILPTMD::calc_FRep(int eflag, int /* vflag */)
           delki[1] = x[k][1] - x[i][1];
           delki[2] = x[k][2] - x[i][2];
           if (evflag)
-            ev_tally_xyz(k, j, nlocal, newton_pair, 0.0, 0.0, fk[0], fk[1], fk[2], delki[0],
+            ev_tally_xyz(k, i, nlocal, newton_pair, 0.0, 0.0, fk[0], fk[1], fk[2], delki[0],
                          delki[1], delki[2]);
         }
 
diff --git a/src/INTERLAYER/pair_kolmogorov_crespi_full.cpp b/src/INTERLAYER/pair_kolmogorov_crespi_full.cpp
index b497ae3568..64e71f22fc 100644
--- a/src/INTERLAYER/pair_kolmogorov_crespi_full.cpp
+++ b/src/INTERLAYER/pair_kolmogorov_crespi_full.cpp
@@ -40,9 +40,9 @@
 using namespace LAMMPS_NS;
 using namespace InterLayer;
 
-#define MAXLINE 1024
-#define DELTA 4
-#define PGDELTA 1
+static constexpr int MAXLINE = 1024;
+static constexpr int DELTA = 4;
+static constexpr int PGDELTA = 1;
 
 static const char cite_kc[] =
     "kolmogorov/crespi/full potential doi:10.1021/acs.nanolett.8b02848\n"
@@ -590,7 +590,7 @@ void PairKolmogorovCrespiFull::calc_FRep(int eflag, int /* vflag */)
           delki[1] = x[k][1] - x[i][1];
           delki[2] = x[k][2] - x[i][2];
           if (evflag)
-            ev_tally_xyz(k, j, nlocal, newton_pair, 0.0, 0.0, fk[0], fk[1], fk[2], delki[0],
+            ev_tally_xyz(k, i, nlocal, newton_pair, 0.0, 0.0, fk[0], fk[1], fk[2], delki[0],
                          delki[1], delki[2]);
         }
 
diff --git a/src/INTERLAYER/pair_kolmogorov_crespi_z.cpp b/src/INTERLAYER/pair_kolmogorov_crespi_z.cpp
index d8f0d798e4..4ccbd28c02 100644
--- a/src/INTERLAYER/pair_kolmogorov_crespi_z.cpp
+++ b/src/INTERLAYER/pair_kolmogorov_crespi_z.cpp
@@ -37,8 +37,8 @@
 
 using namespace LAMMPS_NS;
 
-#define MAXLINE 1024
-#define DELTA 4
+static constexpr int MAXLINE = 1024;
+static constexpr int DELTA = 4;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/INTERLAYER/pair_lebedeva_z.cpp b/src/INTERLAYER/pair_lebedeva_z.cpp
index b38900ad14..d7b13ef821 100644
--- a/src/INTERLAYER/pair_lebedeva_z.cpp
+++ b/src/INTERLAYER/pair_lebedeva_z.cpp
@@ -39,8 +39,8 @@
 
 using namespace LAMMPS_NS;
 
-#define MAXLINE 1024
-#define DELTA 4
+static constexpr int MAXLINE = 1024;
+static constexpr int DELTA = 4;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/INTERLAYER/pair_saip_metal.cpp b/src/INTERLAYER/pair_saip_metal.cpp
index bd327391a4..c6c107c980 100644
--- a/src/INTERLAYER/pair_saip_metal.cpp
+++ b/src/INTERLAYER/pair_saip_metal.cpp
@@ -33,9 +33,9 @@
 using namespace LAMMPS_NS;
 using namespace InterLayer;
 
-#define MAXLINE 1024
-#define DELTA 4
-#define PGDELTA 1
+static constexpr int MAXLINE = 1024;
+static constexpr int DELTA = 4;
+static constexpr int PGDELTA = 1;
 
 static const char cite_saip[] =
     "saip/metal potential: doi:10.1021/acs.jctc.1c00622\n\n"
diff --git a/src/KIM/kim_interactions.cpp b/src/KIM/kim_interactions.cpp
index 1f4f84e648..c0ec0ee28e 100644
--- a/src/KIM/kim_interactions.cpp
+++ b/src/KIM/kim_interactions.cpp
@@ -70,6 +70,8 @@
 #include "modify.h"
 #include "update.h"
 
+#include "fmt/ranges.h"
+
 #include <cstring>
 #include <vector>
 
@@ -79,7 +81,7 @@ extern "C" {
 
 using namespace LAMMPS_NS;
 
-#define MAXLINE 1024
+static constexpr int MAXLINE = 1024;
 
 /* ---------------------------------------------------------------------- */
 
@@ -279,7 +281,8 @@ void KimInteractions::KIM_SET_TYPE_PARAMETERS(const std::string &input_line) con
     if (fp == nullptr) error->one(FLERR, "Parameter file {} not found", filename);
   }
 
-  char line[MAXLINE], *ptr;
+  char line[MAXLINE] = {'\0'};
+  char *ptr;
   int n, eof = 0;
 
   while (true) {
diff --git a/src/KIM/kim_param.cpp b/src/KIM/kim_param.cpp
index f72df81989..c50474fe67 100644
--- a/src/KIM/kim_param.cpp
+++ b/src/KIM/kim_param.cpp
@@ -68,6 +68,8 @@
 #include "pair_kim.h"
 #include "variable.h"
 
+#include "fmt/ranges.h"
+
 #include <cstdlib>
 #include <cstring>
 #include <vector>
diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh
index 489efc55a0..462c0cbe57 100755
--- a/src/KOKKOS/Install.sh
+++ b/src/KOKKOS/Install.sh
@@ -106,6 +106,8 @@ action compute_temp_kokkos.cpp
 action compute_temp_kokkos.h
 action dihedral_charmm_kokkos.cpp dihedral_charmm.cpp
 action dihedral_charmm_kokkos.h dihedral_charmm.h
+action dihedral_charmmfsw_kokkos.cpp dihedral_charmmfsw.cpp
+action dihedral_charmmfsw_kokkos.h dihedral_charmmfsw.h
 action dihedral_class2_kokkos.cpp dihedral_class2.cpp
 action dihedral_class2_kokkos.h dihedral_class2.h
 action dihedral_harmonic_kokkos.cpp dihedral_harmonic.cpp
@@ -165,6 +167,8 @@ action fix_qeq_reaxff_kokkos.cpp fix_qeq_reaxff.cpp
 action fix_qeq_reaxff_kokkos.h fix_qeq_reaxff.h
 action fix_reaxff_bonds_kokkos.cpp fix_reaxff_bonds.cpp
 action fix_reaxff_bonds_kokkos.h fix_reaxff_bonds.h
+action compute_reaxff_atom_kokkos.cpp compute_reaxff_atom.cpp
+action compute_reaxff_atom_kokkos.h compute_reaxff_atom.h
 action fix_reaxff_species_kokkos.cpp fix_reaxff_species.cpp
 action fix_reaxff_species_kokkos.h fix_reaxff_species.h
 action fix_rx_kokkos.cpp fix_rx.cpp
@@ -177,6 +181,10 @@ action fix_shardlow_kokkos.cpp fix_shardlow.cpp
 action fix_shardlow_kokkos.h fix_shardlow.h
 action fix_spring_self_kokkos.cpp
 action fix_spring_self_kokkos.h
+action fix_temp_berendsen_kokkos.cpp
+action fix_temp_berendsen_kokkos.h
+action fix_temp_rescale_kokkos.cpp
+action fix_temp_rescale_kokkos.h
 action fix_viscous_kokkos.cpp
 action fix_viscous_kokkos.h
 action fix_wall_gran_kokkos.cpp fix_wall_gran.cpp
@@ -304,6 +312,8 @@ action pair_lj_charmm_coul_charmm_kokkos.cpp pair_lj_charmm_coul_charmm.cpp
 action pair_lj_charmm_coul_charmm_kokkos.h pair_lj_charmm_coul_charmm.h
 action pair_lj_charmm_coul_long_kokkos.cpp pair_lj_charmm_coul_long.cpp
 action pair_lj_charmm_coul_long_kokkos.h pair_lj_charmm_coul_long.h
+action pair_lj_charmmfsw_coul_long_kokkos.cpp pair_lj_charmmfsw_coul_long.cpp
+action pair_lj_charmmfsw_coul_long_kokkos.h pair_lj_charmmfsw_coul_long.h
 action pair_lj_class2_coul_cut_kokkos.cpp pair_lj_class2_coul_cut.cpp
 action pair_lj_class2_coul_cut_kokkos.h pair_lj_class2_coul_cut.h
 action pair_lj_class2_coul_long_kokkos.cpp pair_lj_class2_coul_long.cpp
diff --git a/src/KOKKOS/angle_charmm_kokkos.cpp b/src/KOKKOS/angle_charmm_kokkos.cpp
index 8b41a93451..666002686c 100644
--- a/src/KOKKOS/angle_charmm_kokkos.cpp
+++ b/src/KOKKOS/angle_charmm_kokkos.cpp
@@ -31,7 +31,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/KOKKOS/angle_class2_kokkos.cpp b/src/KOKKOS/angle_class2_kokkos.cpp
index 8f77ab4c94..e831ae2283 100644
--- a/src/KOKKOS/angle_class2_kokkos.cpp
+++ b/src/KOKKOS/angle_class2_kokkos.cpp
@@ -31,7 +31,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/KOKKOS/angle_cosine_kokkos.cpp b/src/KOKKOS/angle_cosine_kokkos.cpp
index 189a156866..ec20c8fbda 100644
--- a/src/KOKKOS/angle_cosine_kokkos.cpp
+++ b/src/KOKKOS/angle_cosine_kokkos.cpp
@@ -31,7 +31,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/KOKKOS/angle_harmonic_kokkos.cpp b/src/KOKKOS/angle_harmonic_kokkos.cpp
index 1d8ada4bd9..d7be418326 100644
--- a/src/KOKKOS/angle_harmonic_kokkos.cpp
+++ b/src/KOKKOS/angle_harmonic_kokkos.cpp
@@ -31,7 +31,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp
index bc393b29d8..af1816c1d3 100644
--- a/src/KOKKOS/atom_kokkos.cpp
+++ b/src/KOKKOS/atom_kokkos.cpp
@@ -25,12 +25,15 @@
 #include "kokkos_base.h"
 #include "modify.h"
 #include "fix.h"
+#include "fix_property_atom_kokkos.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-AtomKokkos::AtomKokkos(LAMMPS *lmp) : Atom(lmp)
+AtomKokkos::AtomKokkos(LAMMPS *lmp) : Atom(lmp),
+mapBinner(1, 0.0, 1.0), // no default constructor, these values are not used
+mapSorter(d_tag_sorted, 0, 1, mapBinner, true)
 {
   avecKK = nullptr;
 
@@ -297,15 +300,17 @@ void AtomKokkos::grow(unsigned int mask)
    return index in ivector or dvector of its location
 ------------------------------------------------------------------------- */
 
-int AtomKokkos::add_custom(const char *name, int flag, int cols)
+int AtomKokkos::add_custom(const char *name, int flag, int cols, int ghost)
 {
-  int index;
+  int index = -1;
 
   if (flag == 0 && cols == 0) {
     index = nivector;
     nivector++;
     ivname = (char **) memory->srealloc(ivname, nivector * sizeof(char *), "atom:ivname");
     ivname[index] = utils::strdup(name);
+    ivghost = (int *) memory->srealloc(ivghost,nivector * sizeof(int),"atom:ivghost");
+    ivghost[index] = ghost;
     ivector = (int **) memory->srealloc(ivector, nivector * sizeof(int *), "atom:ivector");
     memory->create(ivector[index], nmax, "atom:ivector");
 
@@ -314,6 +319,8 @@ int AtomKokkos::add_custom(const char *name, int flag, int cols)
     ndvector++;
     dvname = (char **) memory->srealloc(dvname, ndvector * sizeof(char *), "atom:dvname");
     dvname[index] = utils::strdup(name);
+    dvghost = (int *) memory->srealloc(dvghost, ndvector * sizeof(int), "atom:dvghost");
+    dvghost[index] = ghost;
     dvector = (double **) memory->srealloc(dvector, ndvector * sizeof(double *), "atom:dvector");
     this->sync(Device, DVECTOR_MASK);
     memoryKK->grow_kokkos(k_dvector, dvector, ndvector, nmax, "atom:dvector");
@@ -324,6 +331,8 @@ int AtomKokkos::add_custom(const char *name, int flag, int cols)
     niarray++;
     ianame = (char **) memory->srealloc(ianame, niarray * sizeof(char *), "atom:ianame");
     ianame[index] = utils::strdup(name);
+    iaghost = (int *) memory->srealloc(iaghost, niarray * sizeof(int), "atom:iaghost");
+    iaghost[index] = ghost;
     iarray = (int ***) memory->srealloc(iarray, niarray * sizeof(int **), "atom:iarray");
     memory->create(iarray[index], nmax, cols, "atom:iarray");
 
@@ -335,6 +344,8 @@ int AtomKokkos::add_custom(const char *name, int flag, int cols)
     ndarray++;
     daname = (char **) memory->srealloc(daname, ndarray * sizeof(char *), "atom:daname");
     daname[index] = utils::strdup(name);
+    daghost = (int *) memory->srealloc(daghost, ndarray * sizeof(int), "atom:daghost");
+    daghost[index] = ghost;
     darray = (double ***) memory->srealloc(darray, ndarray * sizeof(double **), "atom:darray");
     memory->create(darray[index], nmax, cols, "atom:darray");
 
@@ -342,6 +353,9 @@ int AtomKokkos::add_custom(const char *name, int flag, int cols)
     dcols[index] = cols;
   }
 
+  if (index < 0)
+    error->all(FLERR,"Invalid call to AtomKokkos::add_custom()");
+
   return index;
 }
 
diff --git a/src/KOKKOS/atom_kokkos.h b/src/KOKKOS/atom_kokkos.h
index 21a9aeebbd..db132bce69 100644
--- a/src/KOKKOS/atom_kokkos.h
+++ b/src/KOKKOS/atom_kokkos.h
@@ -14,7 +14,6 @@
 
 #include "atom.h"               // IWYU pragma: export
 #include "kokkos_type.h"
-#include "fix_property_atom_kokkos.h"
 
 #include <Kokkos_Sort.hpp>
 
@@ -27,7 +26,7 @@ class AtomKokkos : public Atom {
  public:
   bool sort_classic;
   int nprop_atom;
-  FixPropertyAtomKokkos** fix_prop_atom;
+  class FixPropertyAtomKokkos **fix_prop_atom;
 
   DAT::tdual_tagint_1d k_tag;
   DAT::tdual_int_1d k_type, k_mask;
@@ -104,7 +103,8 @@ class AtomKokkos : public Atom {
 
   using MapKeyViewType = decltype(d_tag_sorted);
   using BinOpMap = Kokkos::BinOp1D<MapKeyViewType>;
-  Kokkos::BinSort<MapKeyViewType, BinOpMap> Sorter;
+  BinOpMap mapBinner;
+  Kokkos::BinSort<MapKeyViewType, BinOpMap> mapSorter;
 
   class AtomVecKokkos* avecKK;
 
@@ -154,7 +154,7 @@ class AtomKokkos : public Atom {
   void sync_overlapping_device(const ExecutionSpace space, unsigned int mask);
   void sort() override;
   virtual void grow(unsigned int mask);
-  int add_custom(const char *, int, int) override;
+  int add_custom(const char *, int, int, int border = 0) override;
   void remove_custom(int, int, int) override;
   virtual void deallocate_topology();
  private:
diff --git a/src/KOKKOS/atom_map_kokkos.cpp b/src/KOKKOS/atom_map_kokkos.cpp
index 06516e4142..828eb7edea 100644
--- a/src/KOKKOS/atom_map_kokkos.cpp
+++ b/src/KOKKOS/atom_map_kokkos.cpp
@@ -25,7 +25,7 @@
 
 using namespace LAMMPS_NS;
 
-#define EXTRA 1000
+static constexpr int EXTRA = 1000;
 
 /* ----------------------------------------------------------------------
    allocate and initialize array or hash table for global -> local map
@@ -146,7 +146,7 @@ void AtomKokkos::map_set()
   int nmax = atom->nmax;
 
   int realloc_flag = 0;
-  if (d_tag_sorted.extent(0) < nmax) {
+  if (!d_tag_sorted.data() || (int)d_tag_sorted.extent(0) < nmax) {
     MemKK::realloc_kokkos(d_tag_sorted,"atom:tag_sorted",nmax);
     MemKK::realloc_kokkos(d_i_sorted,"atom:i_sorted",nmax);
     realloc_flag = 1;
@@ -179,25 +179,25 @@ void AtomKokkos::map_set()
   using MapKeyViewType = decltype(d_tag_sorted);
   using BinOpMap = Kokkos::BinOp1D<MapKeyViewType>;
 
-  auto binner = BinOpMap(nall, min, max);
+  mapBinner = BinOpMap(nall, min, max);
 
-  if (!Sorter.bin_offsets.data() || realloc_flag) {
-    Sorter = Kokkos::BinSort<MapKeyViewType, BinOpMap>(d_tag_sorted, 0, nall, binner, true);
-    MemKK::realloc_kokkos(Sorter.bin_count_atomic,"Kokkos::SortImpl::BinSortFunctor::bin_count",nmax+1);
-    Kokkos::deep_copy(Sorter.bin_count_atomic,0);
-    Sorter.bin_count_const = Sorter.bin_count_atomic;
-    MemKK::realloc_kokkos(Sorter.bin_offsets,"Kokkos::SortImpl::BinSortFunctor::bin_offsets",nmax+1);
-    MemKK::realloc_kokkos(Sorter.sort_order,"Kokkos::SortImpl::BinSortFunctor::sort_order",nmax);
+  if (realloc_flag) {
+    mapSorter = Kokkos::BinSort<MapKeyViewType, BinOpMap>(d_tag_sorted, 0, nall, mapBinner, true);
+    MemKK::realloc_kokkos(mapSorter.bin_count_atomic,"Kokkos::SortImpl::BinSortFunctor::bin_count",nmax+1);
+    Kokkos::deep_copy(mapSorter.bin_count_atomic,0);
+    mapSorter.bin_count_const = mapSorter.bin_count_atomic;
+    MemKK::realloc_kokkos(mapSorter.bin_offsets,"Kokkos::SortImpl::BinSortFunctor::bin_offsets",nmax+1);
+    MemKK::realloc_kokkos(mapSorter.sort_order,"Kokkos::SortImpl::BinSortFunctor::sort_order",nmax);
   } else {
-    Kokkos::deep_copy(Sorter.bin_count_atomic,0);
-    Sorter.bin_op = binner;
-    Sorter.range_begin = 0;
-    Sorter.range_end = nall;
+    Kokkos::deep_copy(mapSorter.bin_count_atomic,0);
+    mapSorter.bin_op = mapBinner;
+    mapSorter.range_begin = 0;
+    mapSorter.range_end = nall;
   }
 
-  Sorter.create_permute_vector(LMPDeviceType());
-  Sorter.sort(LMPDeviceType(), d_tag_sorted, 0, nall);
-  Sorter.sort(LMPDeviceType(), d_i_sorted, 0, nall);
+  mapSorter.create_permute_vector(LMPDeviceType());
+  mapSorter.sort(LMPDeviceType(), d_tag_sorted, 0, nall);
+  mapSorter.sort(LMPDeviceType(), d_i_sorted, 0, nall);
 
   auto d_map_array = k_map_array.d_view;
   auto d_map_hash = k_map_hash.d_view;
@@ -273,6 +273,7 @@ void AtomKokkos::map_set()
     error->one(FLERR,"Failed to insert into Kokkos hash atom map");
 
   k_sametag.modify_device();
+  k_sametag.sync_host();
 
   if (map_style == MAP_ARRAY)
     k_map_array.modify_device();
diff --git a/src/KOKKOS/atom_vec_angle_kokkos.cpp b/src/KOKKOS/atom_vec_angle_kokkos.cpp
index dd6be164c0..418c2d629d 100644
--- a/src/KOKKOS/atom_vec_angle_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_angle_kokkos.cpp
@@ -680,7 +680,6 @@ struct AtomVecAngleKokkos_PackExchangeFunctor {
       const typename AT::tdual_xfloat_2d buf,
       typename AT::tdual_int_1d sendlist,
       typename AT::tdual_int_1d copylist):
-    _size_exchange(atom->avecKK->size_exchange),
     _x(atom->k_x.view<DeviceType>()),
     _v(atom->k_v.view<DeviceType>()),
     _tag(atom->k_tag.view<DeviceType>()),
@@ -716,7 +715,8 @@ struct AtomVecAngleKokkos_PackExchangeFunctor {
     _angle_atom2w(atom->k_angle_atom2.view<DeviceType>()),
     _angle_atom3w(atom->k_angle_atom3.view<DeviceType>()),
     _sendlist(sendlist.template view<DeviceType>()),
-    _copylist(copylist.template view<DeviceType>()) {
+    _copylist(copylist.template view<DeviceType>()),
+    _size_exchange(atom->avecKK->size_exchange) {
     const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
                              buf.template view<DeviceType>().extent(1))/_size_exchange;
     buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
@@ -858,7 +858,6 @@ struct AtomVecAngleKokkos_UnpackExchangeFunctor {
       const typename AT::tdual_xfloat_2d buf,
       typename AT::tdual_int_1d nlocal,
       int dim, X_FLOAT lo, X_FLOAT hi):
-    _size_exchange(atom->avecKK->size_exchange),
     _x(atom->k_x.view<DeviceType>()),
     _v(atom->k_v.view<DeviceType>()),
     _tag(atom->k_tag.view<DeviceType>()),
@@ -876,8 +875,8 @@ struct AtomVecAngleKokkos_UnpackExchangeFunctor {
     _angle_atom1(atom->k_angle_atom1.view<DeviceType>()),
     _angle_atom2(atom->k_angle_atom2.view<DeviceType>()),
     _angle_atom3(atom->k_angle_atom3.view<DeviceType>()),
-    _nlocal(nlocal.template view<DeviceType>()),_dim(dim),
-    _lo(lo),_hi(hi) {
+    _nlocal(nlocal.template view<DeviceType>()),
+    _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) {
     const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
                              buf.template view<DeviceType>().extent(1))/_size_exchange;
     buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
@@ -927,7 +926,7 @@ struct AtomVecAngleKokkos_UnpackExchangeFunctor {
 
 int AtomVecAngleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal,
                                                int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space,
-                                               DAT::tdual_int_1d &k_indices)
+                                               DAT::tdual_int_1d &/*k_indices*/)
 {
   while (nlocal + nrecv/size_exchange >= nmax) grow(0);
 
diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp
index 1ea8377a68..973ad2f7f2 100644
--- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp
@@ -294,7 +294,6 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor {
     const typename AT::tdual_xfloat_2d buf,
     typename AT::tdual_int_1d sendlist,
     typename AT::tdual_int_1d copylist):
-    _size_exchange(atom->avecKK->size_exchange),
     _x(atom->k_x.view<DeviceType>()),
     _v(atom->k_v.view<DeviceType>()),
     _tag(atom->k_tag.view<DeviceType>()),
@@ -308,7 +307,8 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor {
     _maskw(atom->k_mask.view<DeviceType>()),
     _imagew(atom->k_image.view<DeviceType>()),
     _sendlist(sendlist.template view<DeviceType>()),
-    _copylist(copylist.template view<DeviceType>()) {
+    _copylist(copylist.template view<DeviceType>()),
+    _size_exchange(atom->avecKK->size_exchange) {
     const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/_size_exchange;
 
     buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
@@ -392,16 +392,15 @@ struct AtomVecAtomicKokkos_UnpackExchangeFunctor {
     typename AT::tdual_int_1d nlocal,
     typename AT::tdual_int_1d indices,
     int dim, X_FLOAT lo, X_FLOAT hi):
-      _size_exchange(atom->avecKK->size_exchange),
       _x(atom->k_x.view<DeviceType>()),
       _v(atom->k_v.view<DeviceType>()),
       _tag(atom->k_tag.view<DeviceType>()),
       _type(atom->k_type.view<DeviceType>()),
       _mask(atom->k_mask.view<DeviceType>()),
       _image(atom->k_image.view<DeviceType>()),
-      _indices(indices.template view<DeviceType>()),
-      _nlocal(nlocal.template view<DeviceType>()),_dim(dim),
-      _lo(lo),_hi(hi) {
+      _nlocal(nlocal.template view<DeviceType>()),
+      _indices(indices.template view<DeviceType>()),_dim(dim),
+      _lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) {
         const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
                                  buf.template view<DeviceType>().extent(1))/_size_exchange;
         buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
diff --git a/src/KOKKOS/atom_vec_bond_kokkos.cpp b/src/KOKKOS/atom_vec_bond_kokkos.cpp
index c45bdedf38..a4fd9ca1b5 100644
--- a/src/KOKKOS/atom_vec_bond_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp
@@ -352,7 +352,6 @@ struct AtomVecBondKokkos_PackExchangeFunctor {
     const typename AT::tdual_xfloat_2d buf,
     typename AT::tdual_int_1d sendlist,
     typename AT::tdual_int_1d copylist):
-      _size_exchange(atom->avecKK->size_exchange),
       _x(atom->k_x.view<DeviceType>()),
       _v(atom->k_v.view<DeviceType>()),
       _tag(atom->k_tag.view<DeviceType>()),
@@ -378,7 +377,8 @@ struct AtomVecBondKokkos_PackExchangeFunctor {
       _bond_typew(atom->k_bond_type.view<DeviceType>()),
       _bond_atomw(atom->k_bond_atom.view<DeviceType>()),
       _sendlist(sendlist.template view<DeviceType>()),
-      _copylist(copylist.template view<DeviceType>()) {
+      _copylist(copylist.template view<DeviceType>()),
+      _size_exchange(atom->avecKK->size_exchange) {
     const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
                              buf.template view<DeviceType>().extent(1))/_size_exchange;
     buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
@@ -503,7 +503,6 @@ struct AtomVecBondKokkos_UnpackExchangeFunctor {
     typename AT::tdual_int_1d nlocal,
     typename AT::tdual_int_1d indices,
     int dim, X_FLOAT lo, X_FLOAT hi):
-      _size_exchange(atom->avecKK->size_exchange),
       _x(atom->k_x.view<DeviceType>()),
       _v(atom->k_v.view<DeviceType>()),
       _tag(atom->k_tag.view<DeviceType>()),
@@ -516,9 +515,9 @@ struct AtomVecBondKokkos_UnpackExchangeFunctor {
       _num_bond(atom->k_num_bond.view<DeviceType>()),
       _bond_type(atom->k_bond_type.view<DeviceType>()),
       _bond_atom(atom->k_bond_atom.view<DeviceType>()),
+      _nlocal(nlocal.template view<DeviceType>()),
       _indices(indices.template view<DeviceType>()),
-      _nlocal(nlocal.template view<DeviceType>()),_dim(dim),
-      _lo(lo),_hi(hi) {
+      _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) {
         const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
                                  buf.template view<DeviceType>().extent(1))/_size_exchange;
         buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp
index 22fc63ff91..4fa814f1ac 100644
--- a/src/KOKKOS/atom_vec_charge_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp
@@ -366,7 +366,6 @@ struct AtomVecChargeKokkos_PackExchangeFunctor {
       const typename AT::tdual_xfloat_2d buf,
       typename AT::tdual_int_1d sendlist,
       typename AT::tdual_int_1d copylist):
-    _size_exchange(atom->avecKK->size_exchange),
     _x(atom->k_x.view<DeviceType>()),
     _v(atom->k_v.view<DeviceType>()),
     _tag(atom->k_tag.view<DeviceType>()),
@@ -382,7 +381,8 @@ struct AtomVecChargeKokkos_PackExchangeFunctor {
     _imagew(atom->k_image.view<DeviceType>()),
     _qw(atom->k_q.view<DeviceType>()),
     _sendlist(sendlist.template view<DeviceType>()),
-    _copylist(copylist.template view<DeviceType>()) {
+    _copylist(copylist.template view<DeviceType>()),
+    _size_exchange(atom->avecKK->size_exchange) {
     const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
                              buf.template view<DeviceType>().extent(1))/_size_exchange;
 
@@ -474,17 +474,16 @@ struct AtomVecChargeKokkos_UnpackExchangeFunctor {
     typename AT::tdual_int_1d nlocal,
     typename AT::tdual_int_1d indices,
     int dim, X_FLOAT lo, X_FLOAT hi):
-      _size_exchange(atom->avecKK->size_exchange),
       _x(atom->k_x.view<DeviceType>()),
       _v(atom->k_v.view<DeviceType>()),
       _tag(atom->k_tag.view<DeviceType>()),
       _type(atom->k_type.view<DeviceType>()),
       _mask(atom->k_mask.view<DeviceType>()),
       _image(atom->k_image.view<DeviceType>()),
-      _indices(indices.template view<DeviceType>()),
       _q(atom->k_q.view<DeviceType>()),
-      _nlocal(nlocal.template view<DeviceType>()),_dim(dim),
-      _lo(lo),_hi(hi) {
+      _nlocal(nlocal.template view<DeviceType>()),
+      _indices(indices.template view<DeviceType>()),_dim(dim),
+      _lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) {
     const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/_size_exchange;
 
     buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
diff --git a/src/KOKKOS/atom_vec_dipole_kokkos.cpp b/src/KOKKOS/atom_vec_dipole_kokkos.cpp
index ad06570cdc..ecc0f3b497 100644
--- a/src/KOKKOS/atom_vec_dipole_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_dipole_kokkos.cpp
@@ -398,7 +398,6 @@ struct AtomVecDipoleKokkos_PackExchangeFunctor {
       const typename AT::tdual_xfloat_2d buf,
       typename AT::tdual_int_1d sendlist,
       typename AT::tdual_int_1d copylist):
-    _size_exchange(atom->avecKK->size_exchange),
     _x(atom->k_x.view<DeviceType>()),
     _v(atom->k_v.view<DeviceType>()),
     _tag(atom->k_tag.view<DeviceType>()),
@@ -416,7 +415,8 @@ struct AtomVecDipoleKokkos_PackExchangeFunctor {
     _qw(atom->k_q.view<DeviceType>()),
     _muw(atom->k_mu.view<DeviceType>()),
     _sendlist(sendlist.template view<DeviceType>()),
-    _copylist(copylist.template view<DeviceType>()) {
+    _copylist(copylist.template view<DeviceType>()),
+    _size_exchange(atom->avecKK->size_exchange) {
     const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
                              buf.template view<DeviceType>().extent(1))/_size_exchange;
 
@@ -515,7 +515,6 @@ struct AtomVecDipoleKokkos_UnpackExchangeFunctor {
       const typename AT::tdual_xfloat_2d buf,
       typename AT::tdual_int_1d nlocal,
       int dim, X_FLOAT lo, X_FLOAT hi):
-      _size_exchange(atom->avecKK->size_exchange),
       _x(atom->k_x.view<DeviceType>()),
       _v(atom->k_v.view<DeviceType>()),
       _tag(atom->k_tag.view<DeviceType>()),
@@ -524,8 +523,8 @@ struct AtomVecDipoleKokkos_UnpackExchangeFunctor {
       _image(atom->k_image.view<DeviceType>()),
       _q(atom->k_q.view<DeviceType>()),
       _mu(atom->k_mu.view<DeviceType>()),
-      _nlocal(nlocal.template view<DeviceType>()),_dim(dim),
-      _lo(lo),_hi(hi) {
+      _nlocal(nlocal.template view<DeviceType>()),
+      _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) {
     const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/_size_exchange;
 
     buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
@@ -557,8 +556,8 @@ struct AtomVecDipoleKokkos_UnpackExchangeFunctor {
 
 /* ---------------------------------------------------------------------- */
 int AtomVecDipoleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal,
-                                               int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space,
-                                               DAT::tdual_int_1d &k_indices)
+                                                int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space,
+                                                DAT::tdual_int_1d &/*k_indices*/)
 {
   if (space == Host) {
     k_count.h_view(0) = nlocal;
diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp
index c3430b9f6e..70aedcc931 100644
--- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp
@@ -746,7 +746,6 @@ struct AtomVecDPDKokkos_PackExchangeFunctor {
       const typename AT::tdual_xfloat_2d buf,
       typename AT::tdual_int_1d sendlist,
       typename AT::tdual_int_1d copylist):
-                _size_exchange(atom->avecKK->size_exchange),
                 _x(atom->k_x.view<DeviceType>()),
                 _v(atom->k_v.view<DeviceType>()),
                 _tag(atom->k_tag.view<DeviceType>()),
@@ -772,7 +771,8 @@ struct AtomVecDPDKokkos_PackExchangeFunctor {
                 _uCGw(atom->k_uCG.view<DeviceType>()),
                 _uCGneww(atom->k_uCGnew.view<DeviceType>()),
                 _sendlist(sendlist.template view<DeviceType>()),
-                _copylist(copylist.template view<DeviceType>()) {
+                _copylist(copylist.template view<DeviceType>()),
+                _size_exchange(atom->avecKK->size_exchange) {
     const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/_size_exchange;
 
     buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
@@ -875,15 +875,14 @@ struct AtomVecDPDKokkos_UnpackExchangeFunctor {
       const typename AT::tdual_xfloat_2d buf,
       typename AT::tdual_int_1d nlocal,
       int dim, X_FLOAT lo, X_FLOAT hi):
-                _size_exchange(atom->avecKK->size_exchange),
                 _x(atom->k_x.view<DeviceType>()),
                 _v(atom->k_v.view<DeviceType>()),
                 _tag(atom->k_tag.view<DeviceType>()),
                 _type(atom->k_type.view<DeviceType>()),
                 _mask(atom->k_mask.view<DeviceType>()),
                 _image(atom->k_image.view<DeviceType>()),
-                _nlocal(nlocal.template view<DeviceType>()),_dim(dim),
-                _lo(lo),_hi(hi) {
+                _nlocal(nlocal.template view<DeviceType>()),
+                _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) {
     const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/_size_exchange;
 
     buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
@@ -917,7 +916,7 @@ struct AtomVecDPDKokkos_UnpackExchangeFunctor {
 /* ---------------------------------------------------------------------- */
 int AtomVecDPDKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal,
                                              int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space,
-                                             DAT::tdual_int_1d &k_indices)
+                                             DAT::tdual_int_1d &/*k_indices*/)
 {
   while (nlocal + nrecv/size_exchange >= nmax) grow(0);
 
diff --git a/src/KOKKOS/atom_vec_full_kokkos.cpp b/src/KOKKOS/atom_vec_full_kokkos.cpp
index 829ebc75e6..732078a627 100644
--- a/src/KOKKOS/atom_vec_full_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_full_kokkos.cpp
@@ -501,7 +501,6 @@ struct AtomVecFullKokkos_PackExchangeFunctor {
       const typename AT::tdual_xfloat_2d buf,
       typename AT::tdual_int_1d sendlist,
       typename AT::tdual_int_1d copylist):
-    _size_exchange(atom->avecKK->size_exchange),
     _x(atom->k_x.view<DeviceType>()),
     _v(atom->k_v.view<DeviceType>()),
     _tag(atom->k_tag.view<DeviceType>()),
@@ -563,7 +562,8 @@ struct AtomVecFullKokkos_PackExchangeFunctor {
     _improper_atom3w(atom->k_improper_atom3.view<DeviceType>()),
     _improper_atom4w(atom->k_improper_atom4.view<DeviceType>()),
     _sendlist(sendlist.template view<DeviceType>()),
-    _copylist(copylist.template view<DeviceType>()) {
+    _copylist(copylist.template view<DeviceType>()),
+    _size_exchange(atom->avecKK->size_exchange) {
     const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
                              buf.template view<DeviceType>().extent(1))/_size_exchange;
     buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
@@ -755,14 +755,12 @@ struct AtomVecFullKokkos_UnpackExchangeFunctor {
     typename AT::tdual_int_1d nlocal,
     typename AT::tdual_int_1d indices,
     int dim, X_FLOAT lo, X_FLOAT hi):
-      _size_exchange(atom->avecKK->size_exchange),
       _x(atom->k_x.view<DeviceType>()),
       _v(atom->k_v.view<DeviceType>()),
       _tag(atom->k_tag.view<DeviceType>()),
       _type(atom->k_type.view<DeviceType>()),
       _mask(atom->k_mask.view<DeviceType>()),
       _image(atom->k_image.view<DeviceType>()),
-      _indices(indices.template view<DeviceType>()),
       _q(atom->k_q.view<DeviceType>()),
       _molecule(atom->k_molecule.view<DeviceType>()),
       _nspecial(atom->k_nspecial.view<DeviceType>()),
@@ -787,9 +785,9 @@ struct AtomVecFullKokkos_UnpackExchangeFunctor {
       _improper_atom2(atom->k_improper_atom2.view<DeviceType>()),
       _improper_atom3(atom->k_improper_atom3.view<DeviceType>()),
       _improper_atom4(atom->k_improper_atom4.view<DeviceType>()),
-      _nlocal(nlocal.template view<DeviceType>()),_dim(dim),
-      _lo(lo),_hi(hi) {
-
+      _nlocal(nlocal.template view<DeviceType>()),
+      _indices(indices.template view<DeviceType>()),
+      _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) {
     const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
                              buf.template view<DeviceType>().extent(1))/_size_exchange;
     buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp
index 4e01ab5794..08bcaaef74 100644
--- a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp
@@ -66,7 +66,7 @@ void AtomVecHybridKokkos::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorte
 int AtomVecHybridKokkos::pack_comm_kokkos(const int &/*n*/, const DAT::tdual_int_2d &/*k_sendlist*/,
                                           const int & /*iswap*/,
                                           const DAT::tdual_xfloat_2d &/*buf*/,
-                                          const int &/*pbc_flag*/, const int pbc[])
+                                          const int &/*pbc_flag*/, const int /*pbc*/[])
 {
   error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm");
   return 0;
@@ -80,7 +80,7 @@ void AtomVecHybridKokkos::unpack_comm_kokkos(const int &/*n*/, const int &/*nfir
 
 int AtomVecHybridKokkos::pack_comm_self(const int &/*n*/, const DAT::tdual_int_2d &/*list*/,
                                         const int & /*iswap*/, const int /*nfirst*/,
-                                        const int &/*pbc_flag*/, const int pbc[])
+                                        const int &/*pbc_flag*/, const int /*pbc*/[])
 {
   error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm");
   return 0;
@@ -113,7 +113,7 @@ int AtomVecHybridKokkos::pack_exchange_kokkos(const int &/*nsend*/,DAT::tdual_xf
 int AtomVecHybridKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d & /*k_buf*/, int /*nrecv*/,
                                                 int /*nlocal*/, int /*dim*/, X_FLOAT /*lo*/,
                                                 X_FLOAT /*hi*/, ExecutionSpace /*space*/,
-                                                DAT::tdual_int_1d &k_indices)
+                                                DAT::tdual_int_1d &/*k_indices*/)
 {
   error->all(FLERR,"AtomVecHybridKokkos doesn't yet support threaded comm");
   return 0;
diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.cpp b/src/KOKKOS/atom_vec_molecular_kokkos.cpp
index 471dd0ad58..ec98ff9239 100644
--- a/src/KOKKOS/atom_vec_molecular_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_molecular_kokkos.cpp
@@ -762,7 +762,6 @@ struct AtomVecMolecularKokkos_PackExchangeFunctor {
     const typename AT::tdual_xfloat_2d buf,
     typename AT::tdual_int_1d sendlist,
     typename AT::tdual_int_1d copylist):
-      _size_exchange(atom->avecKK->size_exchange),
       _x(atom->k_x.view<DeviceType>()),
       _v(atom->k_v.view<DeviceType>()),
       _tag(atom->k_tag.view<DeviceType>()),
@@ -822,7 +821,8 @@ struct AtomVecMolecularKokkos_PackExchangeFunctor {
       _improper_atom3w(atom->k_improper_atom3.view<DeviceType>()),
       _improper_atom4w(atom->k_improper_atom4.view<DeviceType>()),
       _sendlist(sendlist.template view<DeviceType>()),
-      _copylist(copylist.template view<DeviceType>()) {
+      _copylist(copylist.template view<DeviceType>()),
+      _size_exchange(atom->avecKK->size_exchange) {
     const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
                              buf.template view<DeviceType>().extent(1))/_size_exchange;
     buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
@@ -1010,7 +1010,6 @@ struct AtomVecMolecularKokkos_UnpackExchangeFunctor {
     typename AT::tdual_int_1d nlocal,
     typename AT::tdual_int_1d indices,
     int dim, X_FLOAT lo, X_FLOAT hi):
-      _size_exchange(atom->avecKK->size_exchange),
       _x(atom->k_x.view<DeviceType>()),
       _v(atom->k_v.view<DeviceType>()),
       _tag(atom->k_tag.view<DeviceType>()),
@@ -1040,9 +1039,9 @@ struct AtomVecMolecularKokkos_UnpackExchangeFunctor {
       _improper_atom2(atom->k_improper_atom2.view<DeviceType>()),
       _improper_atom3(atom->k_improper_atom3.view<DeviceType>()),
       _improper_atom4(atom->k_improper_atom4.view<DeviceType>()),
+      _nlocal(nlocal.template view<DeviceType>()),
       _indices(indices.template view<DeviceType>()),
-      _nlocal(nlocal.template view<DeviceType>()),_dim(dim),
-      _lo(lo),_hi(hi) {
+      _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) {
         const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
                                  buf.template view<DeviceType>().extent(1))/_size_exchange;
         buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.cpp b/src/KOKKOS/atom_vec_sphere_kokkos.cpp
index 5a1c2beee3..3dfb5143cd 100644
--- a/src/KOKKOS/atom_vec_sphere_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_sphere_kokkos.cpp
@@ -1448,7 +1448,6 @@ struct AtomVecSphereKokkos_PackExchangeFunctor {
     const typename AT::tdual_xfloat_2d buf,
     typename AT::tdual_int_1d sendlist,
     typename AT::tdual_int_1d copylist):
-    _size_exchange(atom->avecKK->size_exchange),
     _x(atom->k_x.view<DeviceType>()),
     _v(atom->k_v.view<DeviceType>()),
     _tag(atom->k_tag.view<DeviceType>()),
@@ -1468,7 +1467,8 @@ struct AtomVecSphereKokkos_PackExchangeFunctor {
     _rmassw(atom->k_rmass.view<DeviceType>()),
     _omegaw(atom->k_omega.view<DeviceType>()),
     _sendlist(sendlist.template view<DeviceType>()),
-    _copylist(copylist.template view<DeviceType>()) {
+    _copylist(copylist.template view<DeviceType>()),
+    _size_exchange(atom->avecKK->size_exchange) {
     const int maxsend = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/_size_exchange;
 
     _buf = typename AT::t_xfloat_2d_um(buf.template view<DeviceType>().data(),maxsend,_size_exchange);
@@ -1572,7 +1572,6 @@ struct AtomVecSphereKokkos_UnpackExchangeFunctor {
     typename AT::tdual_int_1d nlocal,
     typename AT::tdual_int_1d indices,
     int dim, X_FLOAT lo, X_FLOAT hi):
-      _size_exchange(atom->avecKK->size_exchange),
       _x(atom->k_x.view<DeviceType>()),
       _v(atom->k_v.view<DeviceType>()),
       _tag(atom->k_tag.view<DeviceType>()),
@@ -1584,9 +1583,7 @@ struct AtomVecSphereKokkos_UnpackExchangeFunctor {
       _omega(atom->k_omega.view<DeviceType>()),
       _nlocal(nlocal.template view<DeviceType>()),
       _indices(indices.template view<DeviceType>()),
-      _dim(dim),
-      _lo(lo),_hi(hi)
-  {
+      _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) {
     const size_t size_exchange = 16;
     const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/size_exchange;
 
diff --git a/src/KOKKOS/atom_vec_spin_kokkos.cpp b/src/KOKKOS/atom_vec_spin_kokkos.cpp
index d2dd3a05ab..c6ee7c1d3b 100644
--- a/src/KOKKOS/atom_vec_spin_kokkos.cpp
+++ b/src/KOKKOS/atom_vec_spin_kokkos.cpp
@@ -35,7 +35,7 @@
 
 using namespace LAMMPS_NS;
 
-#define DELTA 10
+static constexpr int DELTA = 10;
 
 /* ---------------------------------------------------------------------- */
 
@@ -410,7 +410,6 @@ struct AtomVecSpinKokkos_PackExchangeFunctor {
       const typename AT::tdual_xfloat_2d buf,
       typename AT::tdual_int_1d sendlist,
       typename AT::tdual_int_1d copylist):
-    _size_exchange(atom->avecKK->size_exchange),
     _x(atom->k_x.view<DeviceType>()),
     _v(atom->k_v.view<DeviceType>()),
     _tag(atom->k_tag.view<DeviceType>()),
@@ -426,7 +425,8 @@ struct AtomVecSpinKokkos_PackExchangeFunctor {
     _imagew(atom->k_image.view<DeviceType>()),
     _spw(atom->k_sp.view<DeviceType>()),
     _sendlist(sendlist.template view<DeviceType>()),
-    _copylist(copylist.template view<DeviceType>()) {
+    _copylist(copylist.template view<DeviceType>()),
+    _size_exchange(atom->avecKK->size_exchange) {
     const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
                              buf.template view<DeviceType>().extent(1))/_size_exchange;
     buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
@@ -521,7 +521,6 @@ struct AtomVecSpinKokkos_UnpackExchangeFunctor {
       const typename AT::tdual_xfloat_2d buf,
       typename AT::tdual_int_1d nlocal,
       int dim, X_FLOAT lo, X_FLOAT hi):
-      _size_exchange(atom->avecKK->size_exchange),
       _x(atom->k_x.view<DeviceType>()),
       _v(atom->k_v.view<DeviceType>()),
       _tag(atom->k_tag.view<DeviceType>()),
@@ -529,8 +528,8 @@ struct AtomVecSpinKokkos_UnpackExchangeFunctor {
       _mask(atom->k_mask.view<DeviceType>()),
       _image(atom->k_image.view<DeviceType>()),
       _sp(atom->k_sp.view<DeviceType>()),
-      _nlocal(nlocal.template view<DeviceType>()),_dim(dim),
-      _lo(lo),_hi(hi) {
+      _nlocal(nlocal.template view<DeviceType>()),
+      _dim(dim),_lo(lo),_hi(hi),_size_exchange(atom->avecKK->size_exchange) {
     const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/_size_exchange;
 
     buffer_view<DeviceType>(_buf,buf,maxsendlist,_size_exchange);
@@ -563,7 +562,7 @@ struct AtomVecSpinKokkos_UnpackExchangeFunctor {
 
 int AtomVecSpinKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int nrecv, int nlocal,
                                               int dim, X_FLOAT lo, X_FLOAT hi, ExecutionSpace space,
-                                              DAT::tdual_int_1d &k_indices)
+                                              DAT::tdual_int_1d &/*k_indices*/)
 {
   while (nlocal + nrecv/size_exchange >= nmax) grow(0);
 
@@ -592,7 +591,7 @@ int AtomVecSpinKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int n
    include f b/c this is invoked from within SPIN pair styles
 ------------------------------------------------------------------------- */
 
-void AtomVecSpinKokkos::force_clear(int n, size_t nbytes)
+void AtomVecSpinKokkos::force_clear(int /*n*/, size_t nbytes)
 {
   int nzero = (double)nbytes/sizeof(double);
 
diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp
index b586dca7a5..4d527cc16b 100644
--- a/src/KOKKOS/comm_kokkos.cpp
+++ b/src/KOKKOS/comm_kokkos.cpp
@@ -36,9 +36,9 @@
 
 using namespace LAMMPS_NS;
 
-#define BUFFACTOR 1.5
-#define BUFMIN 10000
-#define BUFEXTRA 1000
+static constexpr double BUFFACTOR = 1.5;
+static constexpr int BUFMIN = 10000;
+static constexpr int BUFEXTRA = 1000;
 
 /* ----------------------------------------------------------------------
    setup MPI and allocate buffer space
@@ -864,7 +864,7 @@ void CommKokkos::exchange_device()
         if (nrecv) {
 
           if (atom->nextra_grow) {
-            if (k_indices.extent(0) < nrecv/data_size)
+            if ((int) k_indices.extent(0) < nrecv/data_size)
               MemoryKokkos::realloc_kokkos(k_indices,"comm:indices",nrecv/data_size);
           } else if (k_indices.h_view.data())
            k_indices = DAT::tdual_int_1d();
@@ -931,6 +931,7 @@ void CommKokkos::exchange_device()
             if (nextrarecv) {
               kkbase->unpack_exchange_kokkos(
                 k_buf_recv,k_indices,nrecv/data_size,
+                nrecv1/data_size,nextrarecv1,
                 ExecutionSpaceFromDevice<DeviceType>::space);
               DeviceType().fence();
             }
diff --git a/src/KOKKOS/comm_tiled_kokkos.cpp b/src/KOKKOS/comm_tiled_kokkos.cpp
index e3286a73f5..4549f53f70 100644
--- a/src/KOKKOS/comm_tiled_kokkos.cpp
+++ b/src/KOKKOS/comm_tiled_kokkos.cpp
@@ -20,11 +20,10 @@
 
 using namespace LAMMPS_NS;
 
-#define BUFFACTOR 1.5
-#define BUFFACTOR 1.5
-#define BUFMIN 1000
-#define BUFEXTRA 1000
-#define EPSILON 1.0e-6
+static constexpr double BUFFACTOR = 1.5;
+static constexpr int BUFMIN = 1000;
+static constexpr int BUFEXTRA = 1000;
+static constexpr double EPSILON = 1.0e-6;
 
 #define DELTA_PROCS 16
 
diff --git a/src/KOKKOS/compute_reaxff_atom_kokkos.cpp b/src/KOKKOS/compute_reaxff_atom_kokkos.cpp
new file mode 100644
index 0000000000..3f6c9242d4
--- /dev/null
+++ b/src/KOKKOS/compute_reaxff_atom_kokkos.cpp
@@ -0,0 +1,195 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Richard Berger (LANL)
+------------------------------------------------------------------------- */
+
+#include "compute_reaxff_atom_kokkos.h"
+#include "atom.h"
+#include "molecule.h"
+#include "update.h"
+#include "force.h"
+#include "memory.h"
+#include "error.h"
+#include "neigh_list.h"
+
+#include "memory_kokkos.h"
+#include "pair_reaxff_kokkos.h"
+#include "reaxff_api.h"
+
+using namespace LAMMPS_NS;
+using namespace ReaxFF;
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+ComputeReaxFFAtomKokkos<DeviceType>::ComputeReaxFFAtomKokkos(LAMMPS *lmp, int narg, char **arg) :
+  ComputeReaxFFAtom(lmp, narg, arg),
+  nbuf(-1), buf(nullptr)
+{
+  kokkosable = 1;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+ComputeReaxFFAtomKokkos<DeviceType>::~ComputeReaxFFAtomKokkos()
+{
+  memoryKK->destroy_kokkos(k_buf, buf);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void ComputeReaxFFAtomKokkos<DeviceType>::init()
+{
+  ComputeReaxFFAtom::init();
+
+  if (!reaxff || !reaxff->kokkosable) {
+    error->all(FLERR,"Cannot use compute reaxff/atom/kk without "
+                     "pair_style reaxff/kk");
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+template<class DeviceType>
+void ComputeReaxFFAtomKokkos<DeviceType>::compute_bonds()
+{
+  if (atom->nlocal > nlocal) {
+    memory->destroy(array_atom);
+    nlocal = atom->nlocal;
+    memory->create(array_atom, nlocal, 3, "reaxff/atom:array_atom");
+  }
+
+  // retrieve bond information from kokkos pair style. the data potentially
+  // lives on device. it is copied into buf on the host in a condensed format
+  // compute_local and compute_atom then expand the data from this buffer into
+  // appropiate arrays for consumption by others (e.g. dump local, dump custom
+  // or library interface)
+
+  int maxnumbonds = 0;
+  if (reaxff->execution_space == Device)
+    device_pair()->FindBond(maxnumbonds, groupbit);
+  else
+    host_pair()->FindBond(maxnumbonds, groupbit);
+
+  nbuf = ((store_bonds ? maxnumbonds*2 : 0) + 3)*nlocal;
+
+  if (!buf || ((int)k_buf.extent(0) < nbuf)) {
+    memoryKK->destroy_kokkos(k_buf, buf);
+    memoryKK->create_kokkos(k_buf, buf, nbuf, "reaxff/atom:buf");
+  }
+
+  // Pass information to buffer, will sync to host
+
+  int nbuf_local;
+  if (reaxff->execution_space == Device)
+    device_pair()->PackReducedBondBuffer(k_buf, nbuf_local, store_bonds);
+  else
+    host_pair()->PackReducedBondBuffer(k_buf, nbuf_local, store_bonds);
+
+  // Extract number of bonds from buffer
+
+  nbonds = 0;
+  int j = 0;
+  for (int i = 0; i < nlocal; i++) {
+    int numbonds = static_cast<int>(buf[j+2]);
+    nbonds += numbonds;
+    j += (store_bonds ? 2*numbonds : 0) + 3;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void ComputeReaxFFAtomKokkos<DeviceType>::compute_local()
+{
+  invoked_local = update->ntimestep;
+
+  if (invoked_bonds < update->ntimestep)
+    compute_bonds();
+
+  if (nbonds > prev_nbonds) {
+    // grow array_local
+    memory->destroy(array_local);
+    memory->create(array_local, nbonds, 3, "reaxff/atom:array_local");
+    prev_nbonds = nbonds;
+  }
+
+  size_local_rows = nbonds;
+
+  // extract local bond information from buffer
+
+  int b = 0;
+  int j = 0;
+  auto tag = atom->tag;
+
+  for (int i = 0; i < nlocal; ++i) {
+    const int numbonds = static_cast<int>(buf[j+2]);
+    const int neigh_offset = j + 3;
+    const int bo_offset = neigh_offset + numbonds;
+    for (int k = 0; k < numbonds; k++) {
+      auto bond = array_local[b++];
+      bond[0] = tag[i];
+      bond[1] = static_cast<tagint> (buf[neigh_offset+k]);
+      bond[2] = buf[bo_offset+k];
+    }
+    j += 2*numbonds + 3;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void ComputeReaxFFAtomKokkos<DeviceType>::compute_peratom()
+{
+  invoked_peratom = update->ntimestep;
+
+  if (invoked_bonds < update->ntimestep)
+    compute_bonds();
+
+  // extract peratom bond information from buffer
+
+  int j = 0;
+  for (int i = 0; i < nlocal; ++i) {
+    auto ptr = array_atom[i];
+    int numbonds = static_cast<int>(buf[j+2]);
+    ptr[0] = buf[j]; // sbo
+    ptr[1] = buf[j+1]; // nlp
+    ptr[2] = numbonds;
+    j += (store_bonds ? 2*numbonds : 0) + 3;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   memory usage of local data
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+double ComputeReaxFFAtomKokkos<DeviceType>::memory_usage()
+{
+  double bytes = (double)(nlocal*3) * sizeof(double);
+  if (store_bonds)
+    bytes += (double)(nbonds*3) * sizeof(double);
+  bytes += (double)(nbuf > 0 ? nbuf * sizeof(double) : 0);
+  return bytes;
+}
+
+namespace LAMMPS_NS {
+template class ComputeReaxFFAtomKokkos<LMPDeviceType>;
+#ifdef LMP_KOKKOS_GPU
+template class ComputeReaxFFAtomKokkos<LMPHostType>;
+#endif
+}
diff --git a/src/KOKKOS/compute_reaxff_atom_kokkos.h b/src/KOKKOS/compute_reaxff_atom_kokkos.h
new file mode 100644
index 0000000000..7037c7e308
--- /dev/null
+++ b/src/KOKKOS/compute_reaxff_atom_kokkos.h
@@ -0,0 +1,66 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Richard Berger (LANL)
+------------------------------------------------------------------------- */
+
+#ifdef COMPUTE_CLASS
+// clang-format off
+ComputeStyle(reaxff/atom/kk,ComputeReaxFFAtomKokkos<LMPDeviceType>);
+ComputeStyle(reaxff/atom/kk/device,ComputeReaxFFAtomKokkos<LMPDeviceType>);
+ComputeStyle(reaxff/atom/kk/host,ComputeReaxFFAtomKokkos<LMPHostType>);
+// clang-format on
+#else
+
+#ifndef LMP_COMPUTE_REAXFF_BONDS_KOKKOS_H
+#define LMP_COMPUTE_REAXFF_BONDS_KOKKOS_H
+
+#include "compute_reaxff_atom.h"
+#include "pair_reaxff_kokkos.h"
+#include "kokkos_type.h"
+
+namespace LAMMPS_NS {
+
+template<class DeviceType>
+class ComputeReaxFFAtomKokkos : public ComputeReaxFFAtom {
+ public:
+  using device_type = DeviceType;
+  using AT = ArrayTypes<DeviceType>;
+
+  ComputeReaxFFAtomKokkos(class LAMMPS *, int, char **);
+  ~ComputeReaxFFAtomKokkos() override;
+  void init() override;
+  void compute_local() override;
+  void compute_peratom() override;
+  void compute_bonds() override;
+  double memory_usage() override;
+
+ private:
+  int nbuf;
+  double *buf;
+  typename AT::tdual_float_1d k_buf;
+
+  auto device_pair() {
+    return static_cast<PairReaxFFKokkos<LMPDeviceType>*>(reaxff);
+  }
+
+  auto host_pair() {
+    return static_cast<PairReaxFFKokkos<LMPHostType>*>(reaxff);
+  }
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/KOKKOS/dihedral_charmm_kokkos.cpp b/src/KOKKOS/dihedral_charmm_kokkos.cpp
index 70a74a4846..b385ec7f01 100644
--- a/src/KOKKOS/dihedral_charmm_kokkos.cpp
+++ b/src/KOKKOS/dihedral_charmm_kokkos.cpp
@@ -33,7 +33,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define TOLERANCE 0.05
+static constexpr double TOLERANCE = 0.05;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/KOKKOS/dihedral_charmmfsw_kokkos.cpp b/src/KOKKOS/dihedral_charmmfsw_kokkos.cpp
new file mode 100644
index 0000000000..aeb9b022a7
--- /dev/null
+++ b/src/KOKKOS/dihedral_charmmfsw_kokkos.cpp
@@ -0,0 +1,815 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+
+   Contributing author: Mitch Murphy (alphataubio)
+
+   Based on serial dihedral_charmmfsw.cpp lj-fsw sections (force-switched)
+   provided by Robert Meissner and Lucio Colombi Ciacchi of Bremen
+   University, Germany, with additional assistance from
+   Robert A. Latour, Clemson University.
+
+------------------------------------------------------------------------- */
+
+#include "dihedral_charmmfsw_kokkos.h"
+
+#include "atom_kokkos.h"
+#include "atom_masks.h"
+#include "error.h"
+#include "force.h"
+#include "kokkos.h"
+#include "math_const.h"
+#include "memory_kokkos.h"
+#include "neighbor_kokkos.h"
+#include "pair.h"
+
+#include <cmath>
+
+using namespace LAMMPS_NS;
+using namespace MathConst;
+
+static constexpr double TOLERANCE = 0.05;
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+DihedralCharmmfswKokkos<DeviceType>::DihedralCharmmfswKokkos(LAMMPS *lmp) : DihedralCharmmfsw(lmp)
+{
+  atomKK = (AtomKokkos *) atom;
+  neighborKK = (NeighborKokkos *) neighbor;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+  datamask_read = X_MASK | F_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK | TYPE_MASK;
+  datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
+
+  k_warning_flag = Kokkos::DualView<int,DeviceType>("Dihedral:warning_flag");
+  d_warning_flag = k_warning_flag.template view<DeviceType>();
+  h_warning_flag = k_warning_flag.h_view;
+
+  centroidstressflag = CENTROID_NOTAVAIL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+DihedralCharmmfswKokkos<DeviceType>::~DihedralCharmmfswKokkos()
+{
+  if (!copymode) {
+    memoryKK->destroy_kokkos(k_eatom,eatom);
+    memoryKK->destroy_kokkos(k_vatom,vatom);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void DihedralCharmmfswKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
+{
+  eflag = eflag_in;
+  vflag = vflag_in;
+
+  if (lmp->kokkos->neighflag == FULL)
+    error->all(FLERR,"Dihedral_style charmm/kk requires half neighbor list");
+
+  ev_init(eflag,vflag,0);
+
+  // ensure pair->ev_tally() will use 1-4 virial contribution
+
+  if (weightflag && vflag_global == VIRIAL_FDOTR)
+    force->pair->vflag_either = force->pair->vflag_global = 1;
+
+  // reallocate per-atom arrays if necessary
+
+  if (eflag_atom) {
+    //if(k_eatom.extent(0)<maxeatom) { // won't work without adding zero functor
+      memoryKK->destroy_kokkos(k_eatom,eatom);
+      memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"dihedral:eatom");
+      d_eatom = k_eatom.template view<KKDeviceType>();
+      k_eatom_pair = Kokkos::DualView<E_FLOAT*,Kokkos::LayoutRight,KKDeviceType>("dihedral:eatom_pair",maxeatom);
+      d_eatom_pair = k_eatom_pair.template view<KKDeviceType>();
+    //}
+  }
+  if (vflag_atom) {
+    //if(k_vatom.extent(0)<maxvatom) { // won't work without adding zero functor
+      memoryKK->destroy_kokkos(k_vatom,vatom);
+      memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"dihedral:vatom");
+      d_vatom = k_vatom.template view<KKDeviceType>();
+      k_vatom_pair = Kokkos::DualView<F_FLOAT*[6],Kokkos::LayoutRight,KKDeviceType>("dihedral:vatom_pair",maxvatom);
+      d_vatom_pair = k_vatom_pair.template view<KKDeviceType>();
+    //}
+  }
+
+  x = atomKK->k_x.view<DeviceType>();
+  f = atomKK->k_f.view<DeviceType>();
+  q = atomKK->k_q.view<DeviceType>();
+  atomtype = atomKK->k_type.view<DeviceType>();
+  neighborKK->k_dihedrallist.template sync<DeviceType>();
+  dihedrallist = neighborKK->k_dihedrallist.view<DeviceType>();
+  int ndihedrallist = neighborKK->ndihedrallist;
+  nlocal = atom->nlocal;
+  newton_bond = force->newton_bond;
+  qqrd2e = force->qqrd2e;
+
+  h_warning_flag() = 0;
+  k_warning_flag.template modify<LMPHostType>();
+  k_warning_flag.template sync<DeviceType>();
+
+  copymode = 1;
+
+  // loop over neighbors of my atoms
+
+  EVM_FLOAT evm;
+
+  if (evflag) {
+    if (newton_bond) {
+      Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagDihedralCharmmfswCompute<1,1> >(0,ndihedrallist),*this,evm);
+    } else {
+      Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagDihedralCharmmfswCompute<0,1> >(0,ndihedrallist),*this,evm);
+    }
+  } else {
+    if (newton_bond) {
+      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagDihedralCharmmfswCompute<1,0> >(0,ndihedrallist),*this);
+    } else {
+      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagDihedralCharmmfswCompute<0,0> >(0,ndihedrallist),*this);
+    }
+  }
+
+  // error check
+
+  k_warning_flag.template modify<DeviceType>();
+  k_warning_flag.template sync<LMPHostType>();
+  if (h_warning_flag())
+    error->warning(FLERR,"Dihedral problem");
+
+  if (eflag_global) {
+    energy += evm.emol;
+    force->pair->eng_vdwl += evm.evdwl;
+    force->pair->eng_coul += evm.ecoul;
+  }
+  if (vflag_global) {
+    virial[0] += evm.v[0];
+    virial[1] += evm.v[1];
+    virial[2] += evm.v[2];
+    virial[3] += evm.v[3];
+    virial[4] += evm.v[4];
+    virial[5] += evm.v[5];
+
+    force->pair->virial[0] += evm.vp[0];
+    force->pair->virial[1] += evm.vp[1];
+    force->pair->virial[2] += evm.vp[2];
+    force->pair->virial[3] += evm.vp[3];
+    force->pair->virial[4] += evm.vp[4];
+    force->pair->virial[5] += evm.vp[5];
+  }
+
+  // don't yet have dualviews for eatom and vatom in pair_kokkos,
+  //  so need to manually copy these to pair style
+
+  int n = nlocal;
+  if (newton_bond) n += atom->nghost;
+
+  if (eflag_atom) {
+    k_eatom.template modify<DeviceType>();
+    k_eatom.template sync<LMPHostType>();
+
+    k_eatom_pair.template modify<DeviceType>();
+    k_eatom_pair.template sync<LMPHostType>();
+    for (int i = 0; i < n; i++)
+      force->pair->eatom[i] += k_eatom_pair.h_view(i);
+  }
+
+  if (vflag_atom) {
+    k_vatom.template modify<DeviceType>();
+    k_vatom.template sync<LMPHostType>();
+
+    k_vatom_pair.template modify<DeviceType>();
+    k_vatom_pair.template sync<LMPHostType>();
+    for (int i = 0; i < n; i++) {
+      force->pair->vatom[i][0] += k_vatom_pair.h_view(i,0);
+      force->pair->vatom[i][1] += k_vatom_pair.h_view(i,1);
+      force->pair->vatom[i][2] += k_vatom_pair.h_view(i,2);
+      force->pair->vatom[i][3] += k_vatom_pair.h_view(i,3);
+      force->pair->vatom[i][4] += k_vatom_pair.h_view(i,4);
+      force->pair->vatom[i][5] += k_vatom_pair.h_view(i,5);
+    }
+  }
+
+  copymode = 0;
+}
+
+template<class DeviceType>
+template<int NEWTON_BOND, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void DihedralCharmmfswKokkos<DeviceType>::operator()(TagDihedralCharmmfswCompute<NEWTON_BOND,EVFLAG>, const int &n, EVM_FLOAT& evm) const {
+
+  // The f array is atomic
+  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,typename KKDevice<DeviceType>::value,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > a_f = f;
+
+  const int i1 = dihedrallist(n,0);
+  const int i2 = dihedrallist(n,1);
+  const int i3 = dihedrallist(n,2);
+  const int i4 = dihedrallist(n,3);
+  const int type = dihedrallist(n,4);
+
+  // 1st bond
+
+  const F_FLOAT vb1x = x(i1,0) - x(i2,0);
+  const F_FLOAT vb1y = x(i1,1) - x(i2,1);
+  const F_FLOAT vb1z = x(i1,2) - x(i2,2);
+
+  // 2nd bond
+
+  const F_FLOAT vb2x = x(i3,0) - x(i2,0);
+  const F_FLOAT vb2y = x(i3,1) - x(i2,1);
+  const F_FLOAT vb2z = x(i3,2) - x(i2,2);
+
+  const F_FLOAT vb2xm = -vb2x;
+  const F_FLOAT vb2ym = -vb2y;
+  const F_FLOAT vb2zm = -vb2z;
+
+  // 3rd bond
+
+  const F_FLOAT vb3x = x(i4,0) - x(i3,0);
+  const F_FLOAT vb3y = x(i4,1) - x(i3,1);
+  const F_FLOAT vb3z = x(i4,2) - x(i3,2);
+
+  const F_FLOAT ax = vb1y*vb2zm - vb1z*vb2ym;
+  const F_FLOAT ay = vb1z*vb2xm - vb1x*vb2zm;
+  const F_FLOAT az = vb1x*vb2ym - vb1y*vb2xm;
+  const F_FLOAT bx = vb3y*vb2zm - vb3z*vb2ym;
+  const F_FLOAT by = vb3z*vb2xm - vb3x*vb2zm;
+  const F_FLOAT bz = vb3x*vb2ym - vb3y*vb2xm;
+
+  const F_FLOAT rasq = ax*ax + ay*ay + az*az;
+  const F_FLOAT rbsq = bx*bx + by*by + bz*bz;
+  const F_FLOAT rgsq = vb2xm*vb2xm + vb2ym*vb2ym + vb2zm*vb2zm;
+  const F_FLOAT rg = sqrt(rgsq);
+
+  F_FLOAT rginv,ra2inv,rb2inv;
+  rginv = ra2inv = rb2inv = 0.0;
+  if (rg > 0) rginv = 1.0/rg;
+  if (rasq > 0) ra2inv = 1.0/rasq;
+  if (rbsq > 0) rb2inv = 1.0/rbsq;
+  const F_FLOAT rabinv = sqrt(ra2inv*rb2inv);
+
+  F_FLOAT c = (ax*bx + ay*by + az*bz)*rabinv;
+  F_FLOAT s = rg*rabinv*(ax*vb3x + ay*vb3y + az*vb3z);
+
+    // error check
+
+  if ((c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) && !d_warning_flag())
+    d_warning_flag() = 1;
+
+  if (c > 1.0) c = 1.0;
+  if (c < -1.0) c = -1.0;
+
+  const int m = d_multiplicity[type];
+  F_FLOAT p = 1.0;
+  F_FLOAT ddf1,df1;
+  ddf1 = df1 = 0.0;
+
+  for (int i = 0; i < m; i++) {
+    ddf1 = p*c - df1*s;
+    df1 = p*s + df1*c;
+    p = ddf1;
+  }
+
+  p = p*d_cos_shift[type] + df1*d_sin_shift[type];
+  df1 = df1*d_cos_shift[type] - ddf1*d_sin_shift[type];
+  df1 *= -m;
+  p += 1.0;
+
+  if (m == 0) {
+    p = 1.0 + d_cos_shift[type];
+    df1 = 0.0;
+  }
+
+  E_FLOAT edihedral = 0.0;
+  if (eflag) edihedral = d_k[type] * p;
+
+  const F_FLOAT fg = vb1x*vb2xm + vb1y*vb2ym + vb1z*vb2zm;
+  const F_FLOAT hg = vb3x*vb2xm + vb3y*vb2ym + vb3z*vb2zm;
+  const F_FLOAT fga = fg*ra2inv*rginv;
+  const F_FLOAT hgb = hg*rb2inv*rginv;
+  const F_FLOAT gaa = -ra2inv*rg;
+  const F_FLOAT gbb = rb2inv*rg;
+
+  const F_FLOAT dtfx = gaa*ax;
+  const F_FLOAT dtfy = gaa*ay;
+  const F_FLOAT dtfz = gaa*az;
+  const F_FLOAT dtgx = fga*ax - hgb*bx;
+  const F_FLOAT dtgy = fga*ay - hgb*by;
+  const F_FLOAT dtgz = fga*az - hgb*bz;
+  const F_FLOAT dthx = gbb*bx;
+  const F_FLOAT dthy = gbb*by;
+  const F_FLOAT dthz = gbb*bz;
+
+  const F_FLOAT df = -d_k[type] * df1;
+
+  const F_FLOAT sx2 = df*dtgx;
+  const F_FLOAT sy2 = df*dtgy;
+  const F_FLOAT sz2 = df*dtgz;
+
+  F_FLOAT f1[3],f2[3],f3[3],f4[3];
+  f1[0] = df*dtfx;
+  f1[1] = df*dtfy;
+  f1[2] = df*dtfz;
+
+  f2[0] = sx2 - f1[0];
+  f2[1] = sy2 - f1[1];
+  f2[2] = sz2 - f1[2];
+
+  f4[0] = df*dthx;
+  f4[1] = df*dthy;
+  f4[2] = df*dthz;
+
+  f3[0] = -sx2 - f4[0];
+  f3[1] = -sy2 - f4[1];
+  f3[2] = -sz2 - f4[2];
+
+  // apply force to each of 4 atoms
+
+  if (NEWTON_BOND || i1 < nlocal) {
+    a_f(i1,0) += f1[0];
+    a_f(i1,1) += f1[1];
+    a_f(i1,2) += f1[2];
+  }
+
+  if (NEWTON_BOND || i2 < nlocal) {
+    a_f(i2,0) += f2[0];
+    a_f(i2,1) += f2[1];
+    a_f(i2,2) += f2[2];
+  }
+
+  if (NEWTON_BOND || i3 < nlocal) {
+    a_f(i3,0) += f3[0];
+    a_f(i3,1) += f3[1];
+    a_f(i3,2) += f3[2];
+  }
+
+  if (NEWTON_BOND || i4 < nlocal) {
+    a_f(i4,0) += f4[0];
+    a_f(i4,1) += f4[1];
+    a_f(i4,2) += f4[2];
+  }
+
+  if (EVFLAG)
+    ev_tally(evm,i1,i2,i3,i4,edihedral,f1,f3,f4,
+             vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z);
+
+  // 1-4 LJ and Coulomb interactions
+  // tally energy/virial in pair, using newton_bond as newton flag
+
+  if (d_weight[type] > 0.0) {
+    const int itype = atomtype[i1];
+    const int jtype = atomtype[i4];
+
+    const F_FLOAT delx = x(i1,0) - x(i4,0);
+    const F_FLOAT dely = x(i1,1) - x(i4,1);
+    const F_FLOAT delz = x(i1,2) - x(i4,2);
+    const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
+    const F_FLOAT r2inv = 1.0/rsq;
+    const F_FLOAT r6inv = r2inv*r2inv*r2inv;
+
+    F_FLOAT forcecoul;
+    if (implicit) forcecoul = qqrd2e * q[i1]*q[i4]*r2inv;
+    else forcecoul = qqrd2e * q[i1]*q[i4]*sqrt(r2inv);
+    const F_FLOAT forcelj = r6inv * (d_lj14_1(itype,jtype)*r6inv - d_lj14_2(itype,jtype));
+    const F_FLOAT fpair = d_weight[type] * (forcelj+forcecoul)*r2inv;
+
+    const F_FLOAT r = sqrt(rsq);
+    F_FLOAT ecoul = 0.0;
+    F_FLOAT evdwl = 0.0;
+    F_FLOAT evdwl14_12, evdwl14_6;
+    if (eflag) {
+      if (dihedflag)
+        ecoul = d_weight[type] * forcecoul;
+      else
+        ecoul = d_weight[type] * qqrd2e * q[i1] * q[i4] *
+          (sqrt(r2inv) + r * cut_coulinv14 * cut_coulinv14 - 2.0 * cut_coulinv14);
+      evdwl14_12 = r6inv * d_lj14_3(itype,jtype) * r6inv -
+        d_lj14_3(itype,jtype) * cut_lj_inner6inv * cut_lj6inv;
+      evdwl14_6 =
+        -d_lj14_4(itype,jtype) * r6inv + d_lj14_4(itype,jtype) * cut_lj_inner3inv * cut_lj3inv;
+      evdwl = evdwl14_12 + evdwl14_6;
+      evdwl *= d_weight[type];
+    }
+
+    if (newton_bond || i1 < nlocal) {
+      a_f(i1,0) += delx*fpair;
+      a_f(i1,1) += dely*fpair;
+      a_f(i1,2) += delz*fpair;
+    }
+    if (newton_bond || i4 < nlocal) {
+      a_f(i4,0) -= delx*fpair;
+      a_f(i4,1) -= dely*fpair;
+      a_f(i4,2) -= delz*fpair;
+    }
+
+    if (EVFLAG) ev_tally(evm,i1,i4,evdwl,ecoul,fpair,delx,dely,delz);
+  }
+}
+
+template<class DeviceType>
+template<int NEWTON_BOND, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void DihedralCharmmfswKokkos<DeviceType>::operator()(TagDihedralCharmmfswCompute<NEWTON_BOND,EVFLAG>, const int &n) const {
+  EVM_FLOAT evm;
+  this->template operator()<NEWTON_BOND,EVFLAG>(TagDihedralCharmmfswCompute<NEWTON_BOND,EVFLAG>(), n, evm);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void DihedralCharmmfswKokkos<DeviceType>::allocate()
+{
+  DihedralCharmmfsw::allocate();
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one or more types
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void DihedralCharmmfswKokkos<DeviceType>::coeff(int narg, char **arg)
+{
+  DihedralCharmmfsw::coeff(narg, arg);
+
+  int nd = atom->ndihedraltypes;
+  typename AT::tdual_ffloat_1d k_k("DihedralCharmmfsw::k",nd+1);
+  typename AT::tdual_ffloat_1d k_multiplicity("DihedralCharmmfsw::multiplicity",nd+1);
+  typename AT::tdual_ffloat_1d k_shift("DihedralCharmmfsw::shift",nd+1);
+  typename AT::tdual_ffloat_1d k_cos_shift("DihedralCharmmfsw::cos_shift",nd+1);
+  typename AT::tdual_ffloat_1d k_sin_shift("DihedralCharmmfsw::sin_shift",nd+1);
+  typename AT::tdual_ffloat_1d k_weight("DihedralCharmmfsw::weight",nd+1);
+
+  d_k = k_k.template view<DeviceType>();
+  d_multiplicity = k_multiplicity.template view<DeviceType>();
+  d_shift = k_shift.template view<DeviceType>();
+  d_cos_shift = k_cos_shift.template view<DeviceType>();
+  d_sin_shift = k_sin_shift.template view<DeviceType>();
+  d_weight = k_weight.template view<DeviceType>();
+
+  int n = atom->ndihedraltypes;
+  for (int i = 1; i <= n; i++) {
+    k_k.h_view[i] = k[i];
+    k_multiplicity.h_view[i] = multiplicity[i];
+    k_shift.h_view[i] = shift[i];
+    k_cos_shift.h_view[i] = cos_shift[i];
+    k_sin_shift.h_view[i] = sin_shift[i];
+    k_weight.h_view[i] = weight[i];
+  }
+
+  k_k.template modify<LMPHostType>();
+  k_multiplicity.template modify<LMPHostType>();
+  k_shift.template modify<LMPHostType>();
+  k_cos_shift.template modify<LMPHostType>();
+  k_sin_shift.template modify<LMPHostType>();
+  k_weight.template modify<LMPHostType>();
+
+  k_k.template sync<DeviceType>();
+  k_multiplicity.template sync<DeviceType>();
+  k_shift.template sync<DeviceType>();
+  k_cos_shift.template sync<DeviceType>();
+  k_sin_shift.template sync<DeviceType>();
+  k_weight.template sync<DeviceType>();
+}
+
+/* ----------------------------------------------------------------------
+   error check and initialize all values needed for force computation
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void DihedralCharmmfswKokkos<DeviceType>::init_style()
+{
+  DihedralCharmmfsw::init_style();
+
+  int n = atom->ntypes;
+  DAT::tdual_ffloat_2d k_lj14_1("DihedralCharmmfsw:lj14_1",n+1,n+1);
+  DAT::tdual_ffloat_2d k_lj14_2("DihedralCharmmfsw:lj14_2",n+1,n+1);
+  DAT::tdual_ffloat_2d k_lj14_3("DihedralCharmmfsw:lj14_3",n+1,n+1);
+  DAT::tdual_ffloat_2d k_lj14_4("DihedralCharmmfsw:lj14_4",n+1,n+1);
+
+  d_lj14_1 = k_lj14_1.template view<DeviceType>();
+  d_lj14_2 = k_lj14_2.template view<DeviceType>();
+  d_lj14_3 = k_lj14_3.template view<DeviceType>();
+  d_lj14_4 = k_lj14_4.template view<DeviceType>();
+
+
+  if (weightflag) {
+    int n = atom->ntypes;
+    for (int i = 1; i <= n; i++) {
+      for (int j = 1; j <= n; j++) {
+        k_lj14_1.h_view(i,j) = lj14_1[i][j];
+        k_lj14_2.h_view(i,j) = lj14_2[i][j];
+        k_lj14_3.h_view(i,j) = lj14_3[i][j];
+        k_lj14_4.h_view(i,j) = lj14_4[i][j];
+      }
+    }
+  }
+
+  k_lj14_1.template modify<LMPHostType>();
+  k_lj14_2.template modify<LMPHostType>();
+  k_lj14_3.template modify<LMPHostType>();
+  k_lj14_4.template modify<LMPHostType>();
+
+  k_lj14_1.template sync<DeviceType>();
+  k_lj14_2.template sync<DeviceType>();
+  k_lj14_3.template sync<DeviceType>();
+  k_lj14_4.template sync<DeviceType>();
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads coeffs from restart file, bcasts them
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void DihedralCharmmfswKokkos<DeviceType>::read_restart(FILE *fp)
+{
+  DihedralCharmmfsw::read_restart(fp);
+
+  int nd = atom->ndihedraltypes;
+  typename AT::tdual_ffloat_1d k_k("DihedralCharmmfsw::k",nd+1);
+  typename AT::tdual_ffloat_1d k_multiplicity("DihedralCharmmfsw::multiplicity",nd+1);
+  typename AT::tdual_ffloat_1d k_shift("DihedralCharmmfsw::shift",nd+1);
+  typename AT::tdual_ffloat_1d k_cos_shift("DihedralCharmmfsw::cos_shift",nd+1);
+  typename AT::tdual_ffloat_1d k_sin_shift("DihedralCharmmfsw::sin_shift",nd+1);
+  typename AT::tdual_ffloat_1d k_weight("DihedralCharmmfsw::weight",nd+1);
+
+  d_k = k_k.template view<DeviceType>();
+  d_multiplicity = k_multiplicity.template view<DeviceType>();
+  d_shift = k_shift.template view<DeviceType>();
+  d_cos_shift = k_cos_shift.template view<DeviceType>();
+  d_sin_shift = k_sin_shift.template view<DeviceType>();
+  d_weight = k_weight.template view<DeviceType>();
+
+  int n = atom->ndihedraltypes;
+  for (int i = 1; i <= n; i++) {
+    k_k.h_view[i] = k[i];
+    k_multiplicity.h_view[i] = multiplicity[i];
+    k_shift.h_view[i] = shift[i];
+    k_cos_shift.h_view[i] = cos_shift[i];
+    k_sin_shift.h_view[i] = sin_shift[i];
+    k_weight.h_view[i] = weight[i];
+  }
+
+  k_k.template modify<LMPHostType>();
+  k_multiplicity.template modify<LMPHostType>();
+  k_shift.template modify<LMPHostType>();
+  k_cos_shift.template modify<LMPHostType>();
+  k_sin_shift.template modify<LMPHostType>();
+  k_weight.template modify<LMPHostType>();
+
+  k_k.template sync<DeviceType>();
+  k_multiplicity.template sync<DeviceType>();
+  k_shift.template sync<DeviceType>();
+  k_cos_shift.template sync<DeviceType>();
+  k_sin_shift.template sync<DeviceType>();
+  k_weight.template sync<DeviceType>();
+}
+
+/* ----------------------------------------------------------------------
+   tally energy and virial into global and per-atom accumulators
+   virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4
+          = (r1-r2) F1 + (r3-r2) F3 + (r4-r3 + r3-r2) F4
+          = vb1*f1 + vb2*f3 + (vb3+vb2)*f4
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+//template<int NEWTON_BOND>
+KOKKOS_INLINE_FUNCTION
+void DihedralCharmmfswKokkos<DeviceType>::ev_tally(EVM_FLOAT &evm, const int i1, const int i2, const int i3, const int i4,
+                        F_FLOAT &edihedral, F_FLOAT *f1, F_FLOAT *f3, F_FLOAT *f4,
+                        const F_FLOAT &vb1x, const F_FLOAT &vb1y, const F_FLOAT &vb1z,
+                        const F_FLOAT &vb2x, const F_FLOAT &vb2y, const F_FLOAT &vb2z,
+                        const F_FLOAT &vb3x, const F_FLOAT &vb3y, const F_FLOAT &vb3z) const
+{
+  E_FLOAT edihedralquarter;
+  F_FLOAT v[6];
+
+  if (eflag_either) {
+    if (eflag_global) {
+      if (newton_bond) evm.emol += edihedral;
+      else {
+        edihedralquarter = 0.25*edihedral;
+        if (i1 < nlocal) evm.emol += edihedralquarter;
+        if (i2 < nlocal) evm.emol += edihedralquarter;
+        if (i3 < nlocal) evm.emol += edihedralquarter;
+        if (i4 < nlocal) evm.emol += edihedralquarter;
+      }
+    }
+    if (eflag_atom) {
+      edihedralquarter = 0.25*edihedral;
+      if (newton_bond || i1 < nlocal) d_eatom[i1] += edihedralquarter;
+      if (newton_bond || i2 < nlocal) d_eatom[i2] += edihedralquarter;
+      if (newton_bond || i3 < nlocal) d_eatom[i3] += edihedralquarter;
+      if (newton_bond || i4 < nlocal) d_eatom[i4] += edihedralquarter;
+    }
+  }
+
+  if (vflag_either) {
+    v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0];
+    v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1];
+    v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2];
+    v[3] = vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1];
+    v[4] = vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2];
+    v[5] = vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2];
+
+    if (vflag_global) {
+      if (newton_bond) {
+        evm.v[0] += v[0];
+        evm.v[1] += v[1];
+        evm.v[2] += v[2];
+        evm.v[3] += v[3];
+        evm.v[4] += v[4];
+        evm.v[5] += v[5];
+      } else {
+        if (i1 < nlocal) {
+          evm.v[0] += 0.25*v[0];
+          evm.v[1] += 0.25*v[1];
+          evm.v[2] += 0.25*v[2];
+          evm.v[3] += 0.25*v[3];
+          evm.v[4] += 0.25*v[4];
+          evm.v[5] += 0.25*v[5];
+        }
+        if (i2 < nlocal) {
+          evm.v[0] += 0.25*v[0];
+          evm.v[1] += 0.25*v[1];
+          evm.v[2] += 0.25*v[2];
+          evm.v[3] += 0.25*v[3];
+          evm.v[4] += 0.25*v[4];
+          evm.v[5] += 0.25*v[5];
+        }
+        if (i3 < nlocal) {
+          evm.v[0] += 0.25*v[0];
+          evm.v[1] += 0.25*v[1];
+          evm.v[2] += 0.25*v[2];
+          evm.v[3] += 0.25*v[3];
+          evm.v[4] += 0.25*v[4];
+          evm.v[5] += 0.25*v[5];
+        }
+        if (i4 < nlocal) {
+          evm.v[0] += 0.25*v[0];
+          evm.v[1] += 0.25*v[1];
+          evm.v[2] += 0.25*v[2];
+          evm.v[3] += 0.25*v[3];
+          evm.v[4] += 0.25*v[4];
+          evm.v[5] += 0.25*v[5];
+        }
+      }
+    }
+
+    if (vflag_atom) {
+      if (newton_bond || i1 < nlocal) {
+        d_vatom(i1,0) += 0.25*v[0];
+        d_vatom(i1,1) += 0.25*v[1];
+        d_vatom(i1,2) += 0.25*v[2];
+        d_vatom(i1,3) += 0.25*v[3];
+        d_vatom(i1,4) += 0.25*v[4];
+        d_vatom(i1,5) += 0.25*v[5];
+      }
+      if (newton_bond || i2 < nlocal) {
+        d_vatom(i2,0) += 0.25*v[0];
+        d_vatom(i2,1) += 0.25*v[1];
+        d_vatom(i2,2) += 0.25*v[2];
+        d_vatom(i2,3) += 0.25*v[3];
+        d_vatom(i2,4) += 0.25*v[4];
+        d_vatom(i2,5) += 0.25*v[5];
+      }
+      if (newton_bond || i3 < nlocal) {
+        d_vatom(i3,0) += 0.25*v[0];
+        d_vatom(i3,1) += 0.25*v[1];
+        d_vatom(i3,2) += 0.25*v[2];
+        d_vatom(i3,3) += 0.25*v[3];
+        d_vatom(i3,4) += 0.25*v[4];
+        d_vatom(i3,5) += 0.25*v[5];
+      }
+      if (newton_bond || i4 < nlocal) {
+        d_vatom(i4,0) += 0.25*v[0];
+        d_vatom(i4,1) += 0.25*v[1];
+        d_vatom(i4,2) += 0.25*v[2];
+        d_vatom(i4,3) += 0.25*v[3];
+        d_vatom(i4,4) += 0.25*v[4];
+        d_vatom(i4,5) += 0.25*v[5];
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   tally eng_vdwl and virial into global and per-atom accumulators
+   need i < nlocal test since called by bond_quartic and dihedral_charmm
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+KOKKOS_INLINE_FUNCTION
+void DihedralCharmmfswKokkos<DeviceType>::ev_tally(EVM_FLOAT &evm, const int i, const int j,
+      const F_FLOAT &evdwl, const F_FLOAT &ecoul, const F_FLOAT &fpair, const F_FLOAT &delx,
+                const F_FLOAT &dely, const F_FLOAT &delz) const
+{
+  E_FLOAT evdwlhalf,ecoulhalf,epairhalf;
+  F_FLOAT v[6];
+
+
+  if (eflag_either) {
+    if (eflag_global) {
+      if (newton_bond) {
+        evm.evdwl += evdwl;
+        evm.ecoul += ecoul;
+      } else {
+        evdwlhalf = 0.5*evdwl;
+        ecoulhalf = 0.5*ecoul;
+        if (i < nlocal) {
+          evm.evdwl += evdwlhalf;
+          evm.ecoul += ecoulhalf;
+        }
+        if (j < nlocal) {
+          evm.evdwl += evdwlhalf;
+          evm.ecoul += ecoulhalf;
+        }
+      }
+    }
+    if (eflag_atom) {
+      epairhalf = 0.5 * (evdwl + ecoul);
+      if (newton_bond || i < nlocal) d_eatom_pair[i] += epairhalf;
+      if (newton_bond || j < nlocal) d_eatom_pair[j] += epairhalf;
+    }
+  }
+
+  if (vflag_either) {
+    v[0] = delx*delx*fpair;
+    v[1] = dely*dely*fpair;
+    v[2] = delz*delz*fpair;
+    v[3] = delx*dely*fpair;
+    v[4] = delx*delz*fpair;
+    v[5] = dely*delz*fpair;
+
+    if (vflag_global) {
+      if (newton_bond) {
+        evm.vp[0] += v[0];
+        evm.vp[1] += v[1];
+        evm.vp[2] += v[2];
+        evm.vp[3] += v[3];
+        evm.vp[4] += v[4];
+        evm.vp[5] += v[5];
+      } else {
+        if (i < nlocal) {
+          evm.vp[0] += 0.5*v[0];
+          evm.vp[1] += 0.5*v[1];
+          evm.vp[2] += 0.5*v[2];
+          evm.vp[3] += 0.5*v[3];
+          evm.vp[4] += 0.5*v[4];
+          evm.vp[5] += 0.5*v[5];
+        }
+        if (j < nlocal) {
+          evm.vp[0] += 0.5*v[0];
+          evm.vp[1] += 0.5*v[1];
+          evm.vp[2] += 0.5*v[2];
+          evm.vp[3] += 0.5*v[3];
+          evm.vp[4] += 0.5*v[4];
+          evm.vp[5] += 0.5*v[5];
+        }
+      }
+    }
+
+    if (vflag_atom) {
+      if (newton_bond || i < nlocal) {
+        d_vatom_pair(i,0) += 0.5*v[0];
+        d_vatom_pair(i,1) += 0.5*v[1];
+        d_vatom_pair(i,2) += 0.5*v[2];
+        d_vatom_pair(i,3) += 0.5*v[3];
+        d_vatom_pair(i,4) += 0.5*v[4];
+        d_vatom_pair(i,5) += 0.5*v[5];
+      }
+      if (newton_bond || j < nlocal) {
+        d_vatom_pair(j,0) += 0.5*v[0];
+        d_vatom_pair(j,1) += 0.5*v[1];
+        d_vatom_pair(j,2) += 0.5*v[2];
+        d_vatom_pair(j,3) += 0.5*v[3];
+        d_vatom_pair(j,4) += 0.5*v[4];
+        d_vatom_pair(j,5) += 0.5*v[5];
+      }
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+namespace LAMMPS_NS {
+template class DihedralCharmmfswKokkos<LMPDeviceType>;
+#ifdef LMP_KOKKOS_GPU
+template class DihedralCharmmfswKokkos<LMPHostType>;
+#endif
+}
+
diff --git a/src/KOKKOS/dihedral_charmmfsw_kokkos.h b/src/KOKKOS/dihedral_charmmfsw_kokkos.h
new file mode 100644
index 0000000000..b1c65ae477
--- /dev/null
+++ b/src/KOKKOS/dihedral_charmmfsw_kokkos.h
@@ -0,0 +1,118 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef DIHEDRAL_CLASS
+// clang-format off
+DihedralStyle(charmmfsw/kk,DihedralCharmmfswKokkos<LMPDeviceType>);
+DihedralStyle(charmmfsw/kk/device,DihedralCharmmfswKokkos<LMPDeviceType>);
+DihedralStyle(charmmfsw/kk/host,DihedralCharmmfswKokkos<LMPHostType>);
+// clang-format on
+#else
+
+// clang-format off
+#ifndef LMP_DIHEDRAL_CHARMMFSW_KOKKOS_H
+#define LMP_DIHEDRAL_CHARMMFSW_KOKKOS_H
+
+#include "dihedral_charmmfsw.h"
+#include "kokkos_type.h"
+#include "dihedral_charmm_kokkos.h" // needed for s_EVM_FLOAT
+
+namespace LAMMPS_NS {
+
+template<int NEWTON_BOND, int EVFLAG>
+struct TagDihedralCharmmfswCompute{};
+
+template<class DeviceType>
+class DihedralCharmmfswKokkos : public DihedralCharmmfsw {
+ public:
+  typedef DeviceType device_type;
+  typedef EVM_FLOAT value_type;
+  typedef ArrayTypes<DeviceType> AT;
+
+  DihedralCharmmfswKokkos(class LAMMPS *);
+  ~DihedralCharmmfswKokkos() override;
+  void compute(int, int) override;
+  void coeff(int, char **) override;
+  void init_style() override;
+  void read_restart(FILE *) override;
+
+  template<int NEWTON_BOND, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagDihedralCharmmfswCompute<NEWTON_BOND,EVFLAG>, const int&, EVM_FLOAT&) const;
+
+  template<int NEWTON_BOND, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagDihedralCharmmfswCompute<NEWTON_BOND,EVFLAG>, const int&) const;
+
+  //template<int NEWTON_BOND>
+  KOKKOS_INLINE_FUNCTION
+  void ev_tally(EVM_FLOAT &evm, const int i1, const int i2, const int i3, const int i4,
+                          F_FLOAT &edihedral, F_FLOAT *f1, F_FLOAT *f3, F_FLOAT *f4,
+                          const F_FLOAT &vb1x, const F_FLOAT &vb1y, const F_FLOAT &vb1z,
+                          const F_FLOAT &vb2x, const F_FLOAT &vb2y, const F_FLOAT &vb2z,
+                          const F_FLOAT &vb3x, const F_FLOAT &vb3y, const F_FLOAT &vb3z) const;
+
+  KOKKOS_INLINE_FUNCTION
+  void ev_tally(EVM_FLOAT &evm, const int i, const int j,
+        const F_FLOAT &evdwl, const F_FLOAT &ecoul, const F_FLOAT &fpair, const F_FLOAT &delx,
+                const F_FLOAT &dely, const F_FLOAT &delz) const;
+
+ protected:
+
+  class NeighborKokkos *neighborKK;
+
+  typename AT::t_x_array_randomread x;
+  typename AT::t_int_1d_randomread atomtype;
+  typename AT::t_ffloat_1d_randomread q;
+  typename AT::t_f_array f;
+  typename AT::t_int_2d dihedrallist;
+
+  typedef typename KKDevice<DeviceType>::value KKDeviceType;
+  Kokkos::DualView<E_FLOAT*,Kokkos::LayoutRight,KKDeviceType> k_eatom;
+  Kokkos::DualView<F_FLOAT*[6],Kokkos::LayoutRight,KKDeviceType> k_vatom;
+  Kokkos::View<E_FLOAT*,Kokkos::LayoutRight,KKDeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > d_eatom;
+  Kokkos::View<F_FLOAT*[6],Kokkos::LayoutRight,KKDeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > d_vatom;
+
+  Kokkos::DualView<E_FLOAT*,Kokkos::LayoutRight,KKDeviceType> k_eatom_pair;
+  Kokkos::DualView<F_FLOAT*[6],Kokkos::LayoutRight,KKDeviceType> k_vatom_pair;
+  Kokkos::View<E_FLOAT*,Kokkos::LayoutRight,KKDeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > d_eatom_pair;
+  Kokkos::View<F_FLOAT*[6],Kokkos::LayoutRight,KKDeviceType,Kokkos::MemoryTraits<Kokkos::Atomic> > d_vatom_pair;
+
+  int nlocal,newton_bond;
+  int eflag,vflag;
+  double qqrd2e;
+
+  Kokkos::DualView<int,DeviceType> k_warning_flag;
+  typename Kokkos::DualView<int,DeviceType>::t_dev d_warning_flag;
+  typename Kokkos::DualView<int,DeviceType>::t_host h_warning_flag;
+
+  typename AT::t_ffloat_2d d_lj14_1;
+  typename AT::t_ffloat_2d d_lj14_2;
+  typename AT::t_ffloat_2d d_lj14_3;
+  typename AT::t_ffloat_2d d_lj14_4;
+
+  typename AT::t_ffloat_1d d_k;
+  typename AT::t_ffloat_1d d_multiplicity;
+  typename AT::t_ffloat_1d d_shift;
+  typename AT::t_ffloat_1d d_sin_shift;
+  typename AT::t_ffloat_1d d_cos_shift;
+  typename AT::t_ffloat_1d d_weight;
+
+  void allocate() override;
+};
+
+}
+
+#endif
+#endif
+
diff --git a/src/KOKKOS/dihedral_class2_kokkos.cpp b/src/KOKKOS/dihedral_class2_kokkos.cpp
index 2d6032600b..59f5c18ee7 100644
--- a/src/KOKKOS/dihedral_class2_kokkos.cpp
+++ b/src/KOKKOS/dihedral_class2_kokkos.cpp
@@ -30,9 +30,9 @@
 
 using namespace LAMMPS_NS;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
-#define SMALLER   0.00001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
+static constexpr double SMALLER =   0.00001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/KOKKOS/dihedral_harmonic_kokkos.cpp b/src/KOKKOS/dihedral_harmonic_kokkos.cpp
index 8ca0b368df..87e83a17fc 100644
--- a/src/KOKKOS/dihedral_harmonic_kokkos.cpp
+++ b/src/KOKKOS/dihedral_harmonic_kokkos.cpp
@@ -30,9 +30,9 @@
 
 using namespace LAMMPS_NS;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
-#define SMALLER   0.00001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
+static constexpr double SMALLER =   0.00001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/KOKKOS/dihedral_opls_kokkos.cpp b/src/KOKKOS/dihedral_opls_kokkos.cpp
index 670faa9e84..ce7502b25a 100644
--- a/src/KOKKOS/dihedral_opls_kokkos.cpp
+++ b/src/KOKKOS/dihedral_opls_kokkos.cpp
@@ -30,9 +30,9 @@
 
 using namespace LAMMPS_NS;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
-#define SMALLER   0.00001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
+static constexpr double SMALLER =   0.00001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/KOKKOS/domain_kokkos.cpp b/src/KOKKOS/domain_kokkos.cpp
index 6311d45a3f..d0af281a14 100644
--- a/src/KOKKOS/domain_kokkos.cpp
+++ b/src/KOKKOS/domain_kokkos.cpp
@@ -22,8 +22,8 @@
 
 using namespace LAMMPS_NS;
 
-#define BIG   1.0e20
-#define SMALL 1.0e-4
+static constexpr double BIG =   1.0e20;
+static constexpr double SMALL = 1.0e-4;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/KOKKOS/dynamical_matrix_kokkos.cpp b/src/KOKKOS/dynamical_matrix_kokkos.cpp
index 32986025e6..ec2cc17ef2 100644
--- a/src/KOKKOS/dynamical_matrix_kokkos.cpp
+++ b/src/KOKKOS/dynamical_matrix_kokkos.cpp
@@ -174,72 +174,45 @@ void DynamicalMatrixKokkos::update_force()
   }
 
   bool execute_on_host = false;
-  unsigned int datamask_read_device = 0;
-  unsigned int datamask_modify_device = 0;
   unsigned int datamask_read_host = 0;
 
   if (pair_compute_flag) {
     if (force->pair->execution_space==Host) {
       execute_on_host  = true;
       datamask_read_host   |= force->pair->datamask_read;
-      datamask_modify_device |= force->pair->datamask_modify;
-    } else {
-      datamask_read_device   |= force->pair->datamask_read;
-      datamask_modify_device |= force->pair->datamask_modify;
     }
   }
   if (atomKK->molecular && force->bond)  {
     if (force->bond->execution_space==Host) {
       execute_on_host  = true;
       datamask_read_host   |= force->bond->datamask_read;
-      datamask_modify_device |= force->bond->datamask_modify;
-    } else {
-      datamask_read_device   |= force->bond->datamask_read;
-      datamask_modify_device |= force->bond->datamask_modify;
     }
   }
   if (atomKK->molecular && force->angle) {
     if (force->angle->execution_space==Host) {
       execute_on_host  = true;
       datamask_read_host   |= force->angle->datamask_read;
-      datamask_modify_device |= force->angle->datamask_modify;
-    } else {
-      datamask_read_device   |= force->angle->datamask_read;
-      datamask_modify_device |= force->angle->datamask_modify;
     }
   }
   if (atomKK->molecular && force->dihedral) {
     if (force->dihedral->execution_space==Host) {
       execute_on_host  = true;
       datamask_read_host   |= force->dihedral->datamask_read;
-      datamask_modify_device |= force->dihedral->datamask_modify;
-    } else {
-      datamask_read_device   |= force->dihedral->datamask_read;
-      datamask_modify_device |= force->dihedral->datamask_modify;
     }
   }
   if (atomKK->molecular && force->improper) {
     if (force->improper->execution_space==Host) {
       execute_on_host  = true;
       datamask_read_host   |= force->improper->datamask_read;
-      datamask_modify_device |= force->improper->datamask_modify;
-    } else {
-      datamask_read_device   |= force->improper->datamask_read;
-      datamask_modify_device |= force->improper->datamask_modify;
     }
   }
   if (kspace_compute_flag) {
     if (force->kspace->execution_space==Host) {
       execute_on_host  = true;
       datamask_read_host   |= force->kspace->datamask_read;
-      datamask_modify_device |= force->kspace->datamask_modify;
-    } else {
-      datamask_read_device   |= force->kspace->datamask_read;
-      datamask_modify_device |= force->kspace->datamask_modify;
     }
   }
 
-
   if (pair_compute_flag) {
     atomKK->sync(force->pair->execution_space,force->pair->datamask_read);
     atomKK->sync(force->pair->execution_space,~(~force->pair->datamask_read|(F_MASK | ENERGY_MASK | VIRIAL_MASK)));
diff --git a/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp b/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp
index 59ed918729..5256b7f4a4 100644
--- a/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp
+++ b/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp
@@ -38,7 +38,7 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-#define SMALL 0.0001
+static constexpr double SMALL = 0.0001;
 #define EV_TO_KCAL_PER_MOL 14.4
 
 /* ---------------------------------------------------------------------- */
@@ -192,7 +192,7 @@ void FixACKS2ReaxFFKokkos<DeviceType>::setup_pre_force(int vflag)
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
-void FixACKS2ReaxFFKokkos<DeviceType>::pre_force(int vflag)
+void FixACKS2ReaxFFKokkos<DeviceType>::pre_force(int /*vflag*/)
 {
   if (update->ntimestep % nevery) return;
 
@@ -298,8 +298,8 @@ void FixACKS2ReaxFFKokkos<DeviceType>::pre_force(int vflag)
   } else { // GPU, use teams
     Kokkos::deep_copy(d_mfill_offset,0);
 
-    int vector_length = 32;
     int atoms_per_team = 4;
+    int vector_length = 32;
     int num_teams = nn / atoms_per_team + (nn % atoms_per_team ? 1 : 0);
 
     Kokkos::TeamPolicy<DeviceType> policy(num_teams, atoms_per_team,
diff --git a/src/KOKKOS/fix_acks2_reaxff_kokkos.h b/src/KOKKOS/fix_acks2_reaxff_kokkos.h
index 127c8d0402..c27719c364 100644
--- a/src/KOKKOS/fix_acks2_reaxff_kokkos.h
+++ b/src/KOKKOS/fix_acks2_reaxff_kokkos.h
@@ -289,8 +289,7 @@ struct FixACKS2ReaxFFKokkosComputeHFunctor {
 
   FixACKS2ReaxFFKokkosComputeHFunctor(FixACKS2ReaxFFKokkos<DeviceType> *c_ptr,
                                   int _atoms_per_team, int _vector_length)
-      : c(*c_ptr), atoms_per_team(_atoms_per_team),
-        vector_length(_vector_length) {
+      : atoms_per_team(_atoms_per_team), vector_length(_vector_length), c(*c_ptr) {
     c.cleanup_copy();
   };
 
@@ -337,8 +336,7 @@ struct FixACKS2ReaxFFKokkosComputeXFunctor {
 
   FixACKS2ReaxFFKokkosComputeXFunctor(FixACKS2ReaxFFKokkos<DeviceType> *c_ptr,
                                   int _atoms_per_team, int _vector_length)
-      : c(*c_ptr), atoms_per_team(_atoms_per_team),
-        vector_length(_vector_length) {
+    : atoms_per_team(_atoms_per_team), vector_length(_vector_length), c(*c_ptr) {
     c.cleanup_copy();
   };
 
diff --git a/src/KOKKOS/fix_dt_reset_kokkos.cpp b/src/KOKKOS/fix_dt_reset_kokkos.cpp
index 6e7709ace1..83cdc5f26f 100644
--- a/src/KOKKOS/fix_dt_reset_kokkos.cpp
+++ b/src/KOKKOS/fix_dt_reset_kokkos.cpp
@@ -30,7 +30,7 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-#define BIG 1.0e20
+static constexpr double BIG = 1.0e20;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/KOKKOS/fix_enforce2d_kokkos.cpp b/src/KOKKOS/fix_enforce2d_kokkos.cpp
index 24cf307827..567c6ad160 100644
--- a/src/KOKKOS/fix_enforce2d_kokkos.cpp
+++ b/src/KOKKOS/fix_enforce2d_kokkos.cpp
@@ -122,7 +122,7 @@ void FixEnforce2DKokkos<DeviceType>::post_force(int /*vflag*/)
 
 
 template <class DeviceType>
-template <int omega_flag, int angmom_flag, int torque_flag>
+template <int OMEGA_FLAG, int ANGMOM_FLAG, int TORQUE_FLAG>
 KOKKOS_INLINE_FUNCTION
 void FixEnforce2DKokkos<DeviceType>::post_force_item( int i ) const
 {
@@ -130,17 +130,17 @@ void FixEnforce2DKokkos<DeviceType>::post_force_item( int i ) const
     v(i,2) = 0.0;
     f(i,2) = 0.0;
 
-    if (omega_flag) {
+    if (OMEGA_FLAG) {
       omega(i,0) = 0.0;
       omega(i,1) = 0.0;
     }
 
-    if (angmom_flag) {
+    if (ANGMOM_FLAG) {
       angmom(i,0) = 0.0;
       angmom(i,1) = 0.0;
     }
 
-    if (torque_flag) {
+    if (TORQUE_FLAG) {
       torque(i,0) = 0.0;
       torque(i,1) = 0.0;
     }
diff --git a/src/KOKKOS/fix_enforce2d_kokkos.h b/src/KOKKOS/fix_enforce2d_kokkos.h
index cd6903f6c9..1c7a33f3b8 100644
--- a/src/KOKKOS/fix_enforce2d_kokkos.h
+++ b/src/KOKKOS/fix_enforce2d_kokkos.h
@@ -36,7 +36,7 @@ class FixEnforce2DKokkos : public FixEnforce2D {
   void setup(int) override;
   void post_force(int) override;
 
-  template <int omega_flag, int angmom_flag, int torque_flag>
+  template <int OMEGA_FLAG, int ANGMOM_FLAG, int TORQUE_FLAG>
   KOKKOS_INLINE_FUNCTION
   void post_force_item(const int i) const;
 
diff --git a/src/KOKKOS/fix_eos_table_rx_kokkos.cpp b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp
index 309eaeeebf..9b8ac91569 100644
--- a/src/KOKKOS/fix_eos_table_rx_kokkos.cpp
+++ b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp
@@ -25,7 +25,7 @@
 #include <cmath>
 #include "atom_masks.h"
 
-#define MAXLINE 1024
+static constexpr int MAXLINE = 1024;
 
 #ifdef DBL_EPSILON
   #define MY_EPSILON (10.0*DBL_EPSILON)
diff --git a/src/KOKKOS/fix_langevin_kokkos.cpp b/src/KOKKOS/fix_langevin_kokkos.cpp
index 437dd9daef..89cf91130f 100644
--- a/src/KOKKOS/fix_langevin_kokkos.cpp
+++ b/src/KOKKOS/fix_langevin_kokkos.cpp
@@ -34,8 +34,8 @@ using namespace FixConst;
 
 enum{NOBIAS,BIAS};
 enum{CONSTANT,EQUAL,ATOM};
-#define SINERTIA 0.4          // moment of inertia prefactor for sphere
-#define EINERTIA 0.2          // moment of inertia prefactor for ellipsoid
+static constexpr double SINERTIA = 0.4;          // moment of inertia prefactor for sphere
+static constexpr double EINERTIA = 0.2;          // moment of inertia prefactor for ellipsoid
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/KOKKOS/fix_neigh_history_kokkos.cpp b/src/KOKKOS/fix_neigh_history_kokkos.cpp
index b4a852ba70..49fe3f1177 100644
--- a/src/KOKKOS/fix_neigh_history_kokkos.cpp
+++ b/src/KOKKOS/fix_neigh_history_kokkos.cpp
@@ -453,8 +453,12 @@ KOKKOS_INLINE_FUNCTION
 void FixNeighHistoryKokkos<DeviceType>::operator()(TagFixNeighHistoryUnpackExchange, const int &i) const
 {
   int index = d_indices(i);
+
   if (index > -1) {
     int m = (int) d_ubuf(d_buf(i)).i;
+    if (i >= nrecv1)
+      m = nextrarecv1 + (int) d_ubuf(d_buf(nextrarecv1 + i - nrecv1)).i;
+
     int n = (int) d_ubuf(d_buf(m++)).i;
     d_npartner(index) = n;
     for (int p = 0; p < n; p++) {
@@ -471,6 +475,7 @@ void FixNeighHistoryKokkos<DeviceType>::operator()(TagFixNeighHistoryUnpackExcha
 template<class DeviceType>
 void FixNeighHistoryKokkos<DeviceType>::unpack_exchange_kokkos(
   DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv,
+  int nrecv1, int nextrarecv1,
   ExecutionSpace /*space*/)
 {
   d_buf = typename AT::t_xfloat_1d_um(
@@ -478,6 +483,9 @@ void FixNeighHistoryKokkos<DeviceType>::unpack_exchange_kokkos(
     k_buf.extent(0)*k_buf.extent(1));
   d_indices = k_indices.view<DeviceType>();
 
+  this->nrecv1 = nrecv1;
+  this->nextrarecv1 = nextrarecv1;
+
   d_npartner = k_npartner.template view<DeviceType>();
   d_partner = k_partner.template view<DeviceType>();
   d_valuepartner = k_valuepartner.template view<DeviceType>();
diff --git a/src/KOKKOS/fix_neigh_history_kokkos.h b/src/KOKKOS/fix_neigh_history_kokkos.h
index 9c07a953c4..dd1ad769b8 100644
--- a/src/KOKKOS/fix_neigh_history_kokkos.h
+++ b/src/KOKKOS/fix_neigh_history_kokkos.h
@@ -72,12 +72,14 @@ class FixNeighHistoryKokkos : public FixNeighHistory, public KokkosBase {
 
   void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,
                               DAT::tdual_int_1d &indices,int nrecv,
+                              int nrecv1,int nrecv1extra,
                               ExecutionSpace space) override;
 
   typename DAT::tdual_int_2d k_firstflag;
   typename DAT::tdual_float_2d k_firstvalue;
 
  private:
+  int nrecv1,nextrarecv1;
   int nlocal,nsend,beyond_contact;
 
   typename AT::t_tagint_1d tag;
diff --git a/src/KOKKOS/fix_nh_kokkos.cpp b/src/KOKKOS/fix_nh_kokkos.cpp
index 3d2d3ebb71..d038093317 100644
--- a/src/KOKKOS/fix_nh_kokkos.cpp
+++ b/src/KOKKOS/fix_nh_kokkos.cpp
@@ -39,8 +39,8 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-#define DELTAFLIP 0.1
-#define TILTMAX 1.5
+static constexpr double DELTAFLIP = 0.1;
+static constexpr double TILTMAX = 1.5;
 
 enum{NOBIAS,BIAS};
 enum{NONE,XYZ,XY,YZ,XZ};
diff --git a/src/KOKKOS/fix_nvt_sllod_kokkos.cpp b/src/KOKKOS/fix_nvt_sllod_kokkos.cpp
index bd65a6965e..948e3b88f6 100644
--- a/src/KOKKOS/fix_nvt_sllod_kokkos.cpp
+++ b/src/KOKKOS/fix_nvt_sllod_kokkos.cpp
@@ -128,7 +128,7 @@ void FixNVTSllodKokkos<DeviceType>::nh_v_temp()
 
   d_h_two = Few<double, 6>(h_two);
 
-  if (vdelu.extent(0) < atomKK->nmax)
+  if ((int)vdelu.extent(0) < atomKK->nmax)
     vdelu = typename AT::t_v_array(Kokkos::NoInit("nvt/sllod/kk:vdelu"), atomKK->nmax);
 
   if (!this->psllod_flag) {
diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp
index a2a50d84bb..06485eb1cc 100644
--- a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp
+++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp
@@ -46,7 +46,7 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-#define SMALL 0.0001
+static constexpr double SMALL = 0.0001;
 #define EV_TO_KCAL_PER_MOL 14.4
 
 /* ---------------------------------------------------------------------- */
@@ -1416,6 +1416,7 @@ KOKKOS_INLINE_FUNCTION
 void FixQEqReaxFFKokkos<DeviceType>::operator()(TagQEqUnpackExchange, const int &i) const
 {
   int index = d_indices(i);
+
   if (index > -1) {
     for (int m = 0; m < nprev; m++) d_s_hist(index,m) = d_buf(i*nprev*2 + m);
     for (int m = 0; m < nprev; m++) d_t_hist(index,m) = d_buf(i*nprev*2 + nprev+m);
@@ -1427,6 +1428,7 @@ void FixQEqReaxFFKokkos<DeviceType>::operator()(TagQEqUnpackExchange, const int
 template <class DeviceType>
 void FixQEqReaxFFKokkos<DeviceType>::unpack_exchange_kokkos(
   DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv,
+  int /*nrecv1*/, int /*nextrarecv1*/,
   ExecutionSpace /*space*/)
 {
   k_buf.sync<DeviceType>();
diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.h b/src/KOKKOS/fix_qeq_reaxff_kokkos.h
index 9bc38b0492..6aa345fba6 100644
--- a/src/KOKKOS/fix_qeq_reaxff_kokkos.h
+++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.h
@@ -143,6 +143,7 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase {
 
   void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,
                               DAT::tdual_int_1d &indices,int nrecv,
+                              int nrecv1,int nextrarecv1,
                               ExecutionSpace space) override;
 
   struct params_qeq{
diff --git a/src/KOKKOS/fix_shake_kokkos.cpp b/src/KOKKOS/fix_shake_kokkos.cpp
index dd6de8f9ec..81489142db 100644
--- a/src/KOKKOS/fix_shake_kokkos.cpp
+++ b/src/KOKKOS/fix_shake_kokkos.cpp
@@ -41,10 +41,10 @@ using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathConst;
 
-#define RVOUS 1   // 0 for irregular, 1 for all2all
+static constexpr int RVOUS = 1;   // 0 for irregular, 1 for all2all
 
-#define BIG 1.0e20
-#define MASSDELTA 0.1
+static constexpr double BIG = 1.0e20;
+static constexpr double MASSDELTA = 0.1;
 
 /* ---------------------------------------------------------------------- */
 
@@ -525,7 +525,7 @@ void FixShakeKokkos<DeviceType>::operator()(TagFixShakePostForce<NEIGHFLAG,EVFLA
 ------------------------------------------------------------------------- */
 
 template<class DeviceType>
-int FixShakeKokkos<DeviceType>::dof(int igroup)
+bigint FixShakeKokkos<DeviceType>::dof(int igroup)
 {
   d_mask = atomKK->k_mask.view<DeviceType>();
   d_tag = atomKK->k_tag.view<DeviceType>();
@@ -538,7 +538,7 @@ int FixShakeKokkos<DeviceType>::dof(int igroup)
   // count dof in a cluster if and only if
   // the central atom is in group and atom i is the central atom
 
-  int n = 0;
+  bigint n = 0;
   {
     // local variables for lambda capture
 
@@ -549,7 +549,7 @@ int FixShakeKokkos<DeviceType>::dof(int igroup)
     auto groupbit = group->bitmask[igroup];
 
     Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,nlocal),
-     LAMMPS_LAMBDA(const int& i, int& n) {
+     LAMMPS_LAMBDA(const int& i, bigint& n) {
       if (!(mask[i] & groupbit)) return;
       if (d_shake_flag[i] == 0) return;
       if (d_shake_atom(i,0) != tag[i]) return;
@@ -560,8 +560,8 @@ int FixShakeKokkos<DeviceType>::dof(int igroup)
     },n);
   }
 
-  int nall;
-  MPI_Allreduce(&n,&nall,1,MPI_INT,MPI_SUM,world);
+  bigint nall;
+  MPI_Allreduce(&n,&nall,1,MPI_LMP_BIGINT,MPI_SUM,world);
   return nall;
 }
 
@@ -1581,8 +1581,8 @@ void FixShakeKokkos<DeviceType>::pack_exchange_item(const int &mysend, int &offs
     else offset++;
   } else {
 
-    d_buf[mysend] = nsend + offset;
     int m = nsend + offset;
+    d_buf[mysend] = m;
     d_buf[m++] = flag;
     if (flag == 1) {
       d_buf[m++] = d_shake_atom(i,0);
@@ -1703,6 +1703,8 @@ void FixShakeKokkos<DeviceType>::operator()(TagFixShakeUnpackExchange, const int
 
   if (index > -1) {
     int m = d_buf[i];
+    if (i >= nrecv1)
+      m = nextrarecv1 + d_buf[nextrarecv1 + i - nrecv1];
 
     int flag = d_shake_flag[index] = static_cast<int> (d_buf[m++]);
     if (flag == 1) {
@@ -1739,6 +1741,7 @@ void FixShakeKokkos<DeviceType>::operator()(TagFixShakeUnpackExchange, const int
 template <class DeviceType>
 void FixShakeKokkos<DeviceType>::unpack_exchange_kokkos(
   DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv,
+  int nrecv1, int nextrarecv1,
   ExecutionSpace /*space*/)
 {
   k_buf.sync<DeviceType>();
@@ -1749,6 +1752,9 @@ void FixShakeKokkos<DeviceType>::unpack_exchange_kokkos(
     k_buf.extent(0)*k_buf.extent(1));
   d_indices = k_indices.view<DeviceType>();
 
+  this->nrecv1 = nrecv1;
+  this->nextrarecv1 = nextrarecv1;
+
   k_shake_flag.template sync<DeviceType>();
   k_shake_atom.template sync<DeviceType>();
   k_shake_type.template sync<DeviceType>();
diff --git a/src/KOKKOS/fix_shake_kokkos.h b/src/KOKKOS/fix_shake_kokkos.h
index 185e69ce86..19f3a2343d 100644
--- a/src/KOKKOS/fix_shake_kokkos.h
+++ b/src/KOKKOS/fix_shake_kokkos.h
@@ -44,8 +44,6 @@ struct TagFixShakeUnpackExchange{};
 template<class DeviceType>
 class FixShakeKokkos : public FixShake, public KokkosBase {
 
- //friend class FixEHEX;
-
  public:
   typedef DeviceType device_type;
   typedef EV_FLOAT value_type;
@@ -77,7 +75,7 @@ class FixShakeKokkos : public FixShake, public KokkosBase {
   void shake_end_of_step(int vflag) override;
   void correct_coordinates(int vflag) override;
 
-  int dof(int) override;
+  bigint dof(int) override;
 
   void unconstrained_update() override;
 
@@ -112,9 +110,12 @@ class FixShakeKokkos : public FixShake, public KokkosBase {
 
   void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,
                               DAT::tdual_int_1d &indices,int nrecv,
+                              int nrecv1,int nrecv1extra,
                               ExecutionSpace space) override;
 
  protected:
+  int nrecv1,nextrarecv1;
+
   typename AT::t_x_array d_x;
   typename AT::t_v_array d_v;
   typename AT::t_f_array d_f;
@@ -259,4 +260,3 @@ struct FixShakeKokkosPackExchangeFunctor {
 
 #endif
 #endif
-
diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp
index 37ffd15cdf..cb35a63fcb 100644
--- a/src/KOKKOS/fix_shardlow_kokkos.cpp
+++ b/src/KOKKOS/fix_shardlow_kokkos.cpp
@@ -57,7 +57,7 @@ using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace random_external_state;
 
-#define EPSILON 1.0e-10
+static constexpr double EPSILON = 1.0e-10;
 #define EPSILON_SQUARED ((EPSILON) * (EPSILON))
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/KOKKOS/fix_spring_self_kokkos.cpp b/src/KOKKOS/fix_spring_self_kokkos.cpp
index efd8a652ff..6571db37ed 100644
--- a/src/KOKKOS/fix_spring_self_kokkos.cpp
+++ b/src/KOKKOS/fix_spring_self_kokkos.cpp
@@ -184,12 +184,12 @@ void FixSpringSelfKokkos<DeviceType>::copy_arrays(int i, int j, int delflag)
 
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
-void FixSpringSelfKokkos<DeviceType>::pack_exchange_item(const int &mysend, int &offset, const bool &final) const
+void FixSpringSelfKokkos<DeviceType>::pack_exchange_item(const int &mysend, int &offset, const bool &/*final*/) const
 {
   const int i = d_exchange_sendlist(mysend);
 
-  d_buf[mysend] = nsend + offset;
   int m = nsend + offset;
+  d_buf[mysend] = m;
   d_buf[m++] = d_xoriginal(i,0);
   d_buf[m++] = d_xoriginal(i,1);
   d_buf[m++] = d_xoriginal(i,2);
@@ -258,6 +258,8 @@ void FixSpringSelfKokkos<DeviceType>::operator()(TagFixSpringSelfUnpackExchange,
 
   if (index > -1) {
     int m = d_buf[i];
+    if (i >= nrecv1)
+      m = nextrarecv1 + d_buf[nextrarecv1 + i - nrecv1];
 
     d_xoriginal(index,0) = static_cast<tagint> (d_buf[m++]);
     d_xoriginal(index,1) = static_cast<tagint> (d_buf[m++]);
@@ -270,6 +272,7 @@ void FixSpringSelfKokkos<DeviceType>::operator()(TagFixSpringSelfUnpackExchange,
 template <class DeviceType>
 void FixSpringSelfKokkos<DeviceType>::unpack_exchange_kokkos(
   DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv,
+  int nrecv1, int nextrarecv1,
   ExecutionSpace /*space*/)
 {
   k_buf.sync<DeviceType>();
@@ -280,6 +283,9 @@ void FixSpringSelfKokkos<DeviceType>::unpack_exchange_kokkos(
     k_buf.extent(0)*k_buf.extent(1));
   d_indices = k_indices.view<DeviceType>();
 
+  this->nrecv1 = nrecv1;
+  this->nextrarecv1 = nextrarecv1;
+
   k_xoriginal.template sync<DeviceType>();
 
   copymode = 1;
diff --git a/src/KOKKOS/fix_spring_self_kokkos.h b/src/KOKKOS/fix_spring_self_kokkos.h
index b23e92249b..add5a80bc7 100644
--- a/src/KOKKOS/fix_spring_self_kokkos.h
+++ b/src/KOKKOS/fix_spring_self_kokkos.h
@@ -58,6 +58,7 @@ class FixSpringSelfKokkos : public FixSpringSelf, public KokkosBase {
 
   void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,
                               DAT::tdual_int_1d &indices,int nrecv,
+                              int nrecv1,int nrecv1extra,
                               ExecutionSpace space) override;
 
 
@@ -65,6 +66,8 @@ class FixSpringSelfKokkos : public FixSpringSelf, public KokkosBase {
   int unpack_exchange(int, double *) override;
 
  protected:
+  int nrecv1,nextrarecv1;
+
   DAT::tdual_x_array k_xoriginal;
   typename AT::t_x_array d_xoriginal;
 
diff --git a/src/KOKKOS/fix_temp_berendsen_kokkos.cpp b/src/KOKKOS/fix_temp_berendsen_kokkos.cpp
new file mode 100644
index 0000000000..b986b3189a
--- /dev/null
+++ b/src/KOKKOS/fix_temp_berendsen_kokkos.cpp
@@ -0,0 +1,135 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "fix_temp_berendsen_kokkos.h"
+
+#include "atom_kokkos.h"
+#include "comm.h"
+#include "compute.h"
+#include "error.h"
+#include "force.h"
+#include "group.h"
+#include "input.h"
+#include "modify.h"
+#include "update.h"
+#include "variable.h"
+#include "atom_masks.h"
+
+#include <cmath>
+#include <cstring>
+
+using namespace LAMMPS_NS;
+using namespace FixConst;
+
+enum{NOBIAS,BIAS};
+enum{CONSTANT,EQUAL};
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+FixTempBerendsenKokkos<DeviceType>::FixTempBerendsenKokkos(LAMMPS *lmp, int narg, char **arg) :
+  FixTempBerendsen(lmp, narg, arg)
+{
+  kokkosable = 1;
+  atomKK = (AtomKokkos *)atom;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+
+  datamask_read = EMPTY_MASK;
+  datamask_modify = EMPTY_MASK;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixTempBerendsenKokkos<DeviceType>::end_of_step()
+{
+  atomKK->sync(temperature->execution_space,temperature->datamask_read);
+  double t_current = temperature->compute_scalar();
+  atomKK->modified(temperature->execution_space,temperature->datamask_modify);
+  atomKK->sync(execution_space,temperature->datamask_modify);
+
+  double tdof = temperature->dof;
+
+  // there is nothing to do, if there are no degrees of freedom
+
+  if (tdof < 1) return;
+
+  if (t_current == 0.0)
+    error->all(FLERR, "Computed current temperature for fix temp/berendsen must not be 0.0");
+
+  double delta = update->ntimestep - update->beginstep;
+  if (delta != 0.0) delta /= update->endstep - update->beginstep;
+
+  // set current t_target
+  // if variable temp, evaluate variable, wrap with clear/add
+
+  if (tstyle == CONSTANT)
+    t_target = t_start + delta * (t_stop-t_start);
+  else {
+    modify->clearstep_compute();
+    t_target = input->variable->compute_equal(tvar);
+    if (t_target < 0.0)
+      error->one(FLERR, "Fix temp/berendsen variable {} returned negative temperature",
+                 input->variable->names[tvar]);
+    modify->addstep_compute(update->ntimestep + nevery);
+  }
+
+  // rescale velocities by lamda
+  // for BIAS:
+  //   temperature is current, so do not need to re-compute
+  //   OK to not test returned v = 0, since lamda is multiplied by v
+
+  double lamda = sqrt(1.0 + update->dt/t_period*(t_target/t_current - 1.0));
+  double efactor = 0.5 * force->boltz * tdof;
+  energy += t_current * (1.0-lamda*lamda) * efactor;
+
+  auto v = atomKK->k_v.view<DeviceType>();
+  auto mask = atomKK->k_mask.view<DeviceType>();
+  int nlocal = atom->nlocal;
+  auto groupbit = this->groupbit;
+
+  if (which == NOBIAS) {
+    atomKK->sync(temperature->execution_space,temperature->datamask_read);
+    temperature->remove_bias_all();
+    atomKK->modified(temperature->execution_space,temperature->datamask_modify);
+    atomKK->sync(execution_space,temperature->datamask_modify);
+  }
+
+  atomKK->sync(execution_space,V_MASK|MASK_MASK);
+
+  Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,nlocal), LAMMPS_LAMBDA(int i) {
+    if (mask[i] & groupbit) {
+      v(i,0) *= lamda;
+      v(i,1) *= lamda;
+      v(i,2) *= lamda;
+    }
+  });
+
+  atomKK->modified(execution_space,V_MASK);
+
+  if (which == NOBIAS) {
+    atomKK->sync(temperature->execution_space,temperature->datamask_read);
+    temperature->restore_bias_all();
+    atomKK->modified(temperature->execution_space,temperature->datamask_modify);
+    atomKK->sync(execution_space,temperature->datamask_modify);
+  }
+}
+/* ---------------------------------------------------------------------- */
+
+namespace LAMMPS_NS {
+template class FixTempBerendsenKokkos<LMPDeviceType>;
+#ifdef LMP_KOKKOS_GPU
+template class FixTempBerendsenKokkos<LMPHostType>;
+#endif
+}
diff --git a/src/npair_half_bin_atomonly_newton.h b/src/KOKKOS/fix_temp_berendsen_kokkos.h
similarity index 55%
rename from src/npair_half_bin_atomonly_newton.h
rename to src/KOKKOS/fix_temp_berendsen_kokkos.h
index b17f7608fc..6a0aa5ce98 100644
--- a/src/npair_half_bin_atomonly_newton.h
+++ b/src/KOKKOS/fix_temp_berendsen_kokkos.h
@@ -11,25 +11,31 @@
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
-#ifdef NPAIR_CLASS
+#ifdef FIX_CLASS
 // clang-format off
-NPairStyle(half/bin/atomonly/newton,
-           NPairHalfBinAtomonlyNewton,
-           NP_HALF | NP_BIN | NP_ATOMONLY | NP_NEWTON | NP_ORTHO);
+FixStyle(temp/berendsen/kk,FixTempBerendsenKokkos<LMPDeviceType>);
+FixStyle(temp/berendsen/kk/device,FixTempBerendsenKokkos<LMPDeviceType>);
+FixStyle(temp/berendsen/kk/host,FixTempBerendsenKokkos<LMPHostType>);
 // clang-format on
 #else
 
-#ifndef LMP_NPAIR_HALF_BIN_ATOMONLY_NEWTON_H
-#define LMP_NPAIR_HALF_BIN_ATOMONLY_NEWTON_H
+// clang-format off
+#ifndef LMP_FIX_TEMP_BERENDSEN_KOKKOS_H
+#define LMP_FIX_TEMP_BERENDSEN_KOKKOS_H
 
-#include "npair.h"
+#include "fix_temp_berendsen.h"
+#include "kokkos_type.h"
 
 namespace LAMMPS_NS {
 
-class NPairHalfBinAtomonlyNewton : public NPair {
+template<class DeviceType>
+class FixTempBerendsenKokkos : public FixTempBerendsen {
  public:
-  NPairHalfBinAtomonlyNewton(class LAMMPS *);
-  void build(class NeighList *) override;
+  typedef DeviceType device_type;
+
+  FixTempBerendsenKokkos(class LAMMPS *, int, char **);
+  ~FixTempBerendsenKokkos() override {}
+  void end_of_step() override;
 };
 
 }    // namespace LAMMPS_NS
diff --git a/src/KOKKOS/fix_temp_rescale_kokkos.cpp b/src/KOKKOS/fix_temp_rescale_kokkos.cpp
new file mode 100644
index 0000000000..3a1c6ddd26
--- /dev/null
+++ b/src/KOKKOS/fix_temp_rescale_kokkos.cpp
@@ -0,0 +1,140 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "fix_temp_rescale_kokkos.h"
+
+#include "atom_kokkos.h"
+#include "comm.h"
+#include "compute.h"
+#include "error.h"
+#include "force.h"
+#include "group.h"
+#include "input.h"
+#include "modify.h"
+#include "update.h"
+#include "variable.h"
+#include "atom_masks.h"
+
+#include <cmath>
+#include <cstring>
+
+using namespace LAMMPS_NS;
+using namespace FixConst;
+
+enum{NOBIAS,BIAS};
+enum{CONSTANT,EQUAL};
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+FixTempRescaleKokkos<DeviceType>::FixTempRescaleKokkos(LAMMPS *lmp, int narg, char **arg) :
+  FixTempRescale(lmp, narg, arg)
+{
+  kokkosable = 1;
+  atomKK = (AtomKokkos *)atom;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+
+  datamask_read = EMPTY_MASK;
+  datamask_modify = EMPTY_MASK;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void FixTempRescaleKokkos<DeviceType>::end_of_step()
+{
+  atomKK->sync(temperature->execution_space,temperature->datamask_read);
+  double t_current = temperature->compute_scalar();
+  atomKK->modified(temperature->execution_space,temperature->datamask_modify);
+  atomKK->sync(execution_space,temperature->datamask_modify);
+
+  // there is nothing to do, if there are no degrees of freedom
+
+  if (temperature->dof < 1) return;
+
+  // protect against division by zero
+
+  if (t_current == 0.0)
+    error->all(FLERR,"Computed temperature for fix temp/rescale cannot be 0.0");
+
+  double delta = update->ntimestep - update->beginstep;
+  if (delta != 0.0) delta /= update->endstep - update->beginstep;
+
+  // set current t_target
+  // if variable temp, evaluate variable, wrap with clear/add
+
+  if (tstyle == CONSTANT)
+    t_target = t_start + delta * (t_stop-t_start);
+  else {
+    modify->clearstep_compute();
+    t_target = input->variable->compute_equal(tvar);
+    if (t_target < 0.0)
+      error->one(FLERR, "Fix temp/rescale variable returned negative temperature");
+    modify->addstep_compute(update->ntimestep + nevery);
+  }
+
+  // rescale velocity of appropriate atoms if outside window
+  // for BIAS:
+  //   temperature is current, so do not need to re-compute
+  //   OK to not test returned v = 0, since factor is multiplied by v
+
+  if (fabs(t_current-t_target) > t_window) {
+    t_target = t_current - fraction*(t_current-t_target);
+    double factor = sqrt(t_target/t_current);
+    double efactor = 0.5 * force->boltz * temperature->dof;
+
+    energy += (t_current-t_target) * efactor;
+
+    auto v = atomKK->k_v.view<DeviceType>();
+    auto mask = atomKK->k_mask.view<DeviceType>();
+    int nlocal = atom->nlocal;
+    auto groupbit = this->groupbit;
+
+    if (which == NOBIAS) {
+      atomKK->sync(temperature->execution_space,temperature->datamask_read);
+      temperature->remove_bias_all();
+      atomKK->modified(temperature->execution_space,temperature->datamask_modify);
+      atomKK->sync(execution_space,temperature->datamask_modify);
+    }
+
+    atomKK->sync(execution_space,V_MASK|MASK_MASK);
+
+    Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,nlocal), LAMMPS_LAMBDA(int i) {
+      if (mask[i] & groupbit) {
+        v(i,0) *= factor;
+        v(i,1) *= factor;
+        v(i,2) *= factor;
+      }
+    });
+
+    atomKK->modified(execution_space,V_MASK);
+
+    if (which == NOBIAS) {
+      atomKK->sync(temperature->execution_space,temperature->datamask_read);
+      temperature->restore_bias_all();
+      atomKK->modified(temperature->execution_space,temperature->datamask_modify);
+      atomKK->sync(execution_space,temperature->datamask_modify);
+
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+namespace LAMMPS_NS {
+template class FixTempRescaleKokkos<LMPDeviceType>;
+#ifdef LMP_KOKKOS_GPU
+template class FixTempRescaleKokkos<LMPHostType>;
+#endif
+}
diff --git a/src/INTEL/npair_half_bin_newton_intel.h b/src/KOKKOS/fix_temp_rescale_kokkos.h
similarity index 57%
rename from src/INTEL/npair_half_bin_newton_intel.h
rename to src/KOKKOS/fix_temp_rescale_kokkos.h
index 092d4f2101..7dd3111325 100644
--- a/src/INTEL/npair_half_bin_newton_intel.h
+++ b/src/KOKKOS/fix_temp_rescale_kokkos.h
@@ -1,4 +1,3 @@
-// clang-format off
 /* -*- c++ -*- ----------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    https://www.lammps.org/, Sandia National Laboratories
@@ -12,29 +11,31 @@
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
-#ifdef NPAIR_CLASS
+#ifdef FIX_CLASS
 // clang-format off
-NPairStyle(half/bin/newton/intel,
-           NPairHalfBinNewtonIntel,
-           NP_HALF | NP_BIN | NP_NEWTON | NP_ORTHO | NP_INTEL);
+FixStyle(temp/rescale/kk,FixTempRescaleKokkos<LMPDeviceType>);
+FixStyle(temp/rescale/kk/device,FixTempRescaleKokkos<LMPDeviceType>);
+FixStyle(temp/rescale/kk/host,FixTempRescaleKokkos<LMPHostType>);
 // clang-format on
 #else
 
-#ifndef LMP_NPAIR_HALF_BIN_NEWTON_INTEL_H
-#define LMP_NPAIR_HALF_BIN_NEWTON_INTEL_H
+// clang-format off
+#ifndef LMP_FIX_TEMP_RESCALE_KOKKOS_H
+#define LMP_FIX_TEMP_RESCALE_KOKKOS_H
 
-#include "fix_intel.h"
-#include "npair_intel.h"
+#include "fix_temp_rescale.h"
+#include "kokkos_type.h"
 
 namespace LAMMPS_NS {
 
-class NPairHalfBinNewtonIntel : public NPairIntel {
+template<class DeviceType>
+class FixTempRescaleKokkos : public FixTempRescale {
  public:
-  NPairHalfBinNewtonIntel(class LAMMPS *);
-  void build(class NeighList *) override;
+  typedef DeviceType device_type;
 
- private:
-  template <class flt_t, class acc_t> void hbni(NeighList *, IntelBuffers<flt_t, acc_t> *);
+  FixTempRescaleKokkos(class LAMMPS *, int, char **);
+  ~FixTempRescaleKokkos() override {}
+  void end_of_step() override;
 };
 
 }    // namespace LAMMPS_NS
diff --git a/src/KOKKOS/fix_wall_gran_kokkos.cpp b/src/KOKKOS/fix_wall_gran_kokkos.cpp
index f870b0f240..25e405c798 100644
--- a/src/KOKKOS/fix_wall_gran_kokkos.cpp
+++ b/src/KOKKOS/fix_wall_gran_kokkos.cpp
@@ -419,6 +419,7 @@ void FixWallGranKokkos<DeviceType>::operator()(TagFixWallGranUnpackExchange, con
 template<class DeviceType>
 void FixWallGranKokkos<DeviceType>::unpack_exchange_kokkos(
   DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv,
+  int /*nrecv1*/, int /*nextrarecv1*/,
   ExecutionSpace /*space*/)
 {
   d_buf = typename ArrayTypes<DeviceType>::t_xfloat_1d_um(
@@ -430,7 +431,6 @@ void FixWallGranKokkos<DeviceType>::unpack_exchange_kokkos(
 
   copymode = 1;
 
-
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixWallGranUnpackExchange>(0,nrecv),*this);
 
   copymode = 0;
diff --git a/src/KOKKOS/fix_wall_gran_kokkos.h b/src/KOKKOS/fix_wall_gran_kokkos.h
index c7d566ec72..ae54fdb085 100644
--- a/src/KOKKOS/fix_wall_gran_kokkos.h
+++ b/src/KOKKOS/fix_wall_gran_kokkos.h
@@ -62,12 +62,13 @@ class FixWallGranKokkos : public FixWallGranOld, public KokkosBase {
   void operator()(TagFixWallGranUnpackExchange, const int&) const;
 
   int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
-			   DAT::tdual_int_1d k_sendlist,
-			   DAT::tdual_int_1d k_copylist,
-			   ExecutionSpace space) override;
+                           DAT::tdual_int_1d k_sendlist,
+                           DAT::tdual_int_1d k_copylist,
+                           ExecutionSpace space) override;
 
   void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,
                               DAT::tdual_int_1d &indices,int nrecv,
+                              int nrecv1,int nrecv1extra,
                               ExecutionSpace space) override;
 
  private:
@@ -91,6 +92,7 @@ class FixWallGranKokkos : public FixWallGranOld, public KokkosBase {
   typename AT::t_int_1d d_copylist;
   typename AT::t_int_1d d_indices;
 };
+
 }
 
 #endif
diff --git a/src/KOKKOS/fix_wall_gran_old.cpp b/src/KOKKOS/fix_wall_gran_old.cpp
index 8c02e6146d..63b779f6f2 100644
--- a/src/KOKKOS/fix_wall_gran_old.cpp
+++ b/src/KOKKOS/fix_wall_gran_old.cpp
@@ -45,8 +45,8 @@ using namespace MathConst;
 #define THREEQUARTERS 0.75                 // 3/4
 #define TWOPI 6.28318530717959             // 2*PI
 
-#define BIG 1.0e20
-#define EPSILON 1e-10
+static constexpr double BIG = 1.0e20;
+static constexpr double EPSILON = 1e-10;
 
 // XYZ PLANE need to be 0,1,2
 
@@ -68,8 +68,8 @@ FixWallGranOld::FixWallGranOld(LAMMPS *lmp, int narg, char **arg) :
 {
   if (narg < 4) error->all(FLERR,"Illegal fix wall/gran command");
 
-  if (!atom->sphere_flag)
-    error->all(FLERR,"Fix wall/gran requires atom style sphere");
+  if (!atom->omega_flag) error->all(FLERR,"Fix {} requires atom attribute omega", style);
+  if (!atom->radius_flag) error->all(FLERR,"Fix {} requires atom attribute radius", style);
 
   create_attribute = 1;
   limit_damping = 0;
@@ -81,7 +81,7 @@ FixWallGranOld::FixWallGranOld(LAMMPS *lmp, int narg, char **arg) :
   else if (strcmp(arg[3],"hooke/history") == 0) pairstyle = HOOKE_HISTORY;
   else if (strcmp(arg[3],"hertz/history") == 0) pairstyle = HERTZ_HISTORY;
   else if (strcmp(arg[3],"granular") == 0) pairstyle = GRANULAR;
-  else error->all(FLERR,"Invalid fix wall/gran interaction style");
+  else error->all(FLERR,"Invalid fix {} interaction style: {}", style, arg[3]);
 
   use_history = restart_peratom = 1;
   if (pairstyle == HOOKE) use_history = restart_peratom = 0;
diff --git a/src/KOKKOS/grid3d_kokkos.cpp b/src/KOKKOS/grid3d_kokkos.cpp
index 9a82e0157d..26882f20ca 100644
--- a/src/KOKKOS/grid3d_kokkos.cpp
+++ b/src/KOKKOS/grid3d_kokkos.cpp
@@ -24,7 +24,7 @@
 
 using namespace LAMMPS_NS;
 
-#define DELTA 16
+static constexpr int DELTA = 16;
 
 /* ----------------------------------------------------------------------
    NOTES:
@@ -635,17 +635,17 @@ void Grid3dKokkos<DeviceType>::setup_comm_tiled(int &nbuf1, int &nbuf2)
 ------------------------------------------------------------------------- */
 
 template<class DeviceType>
-void Grid3dKokkos<DeviceType>::forward_comm(int caller, void *ptr, int which, int nper, int nbyte,
+void Grid3dKokkos<DeviceType>::forward_comm(int caller, void *ptr, int which, int nper, int /*nbyte*/,
                             FFT_DAT::tdual_FFT_SCALAR_1d& k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d& k_buf2,
                             MPI_Datatype datatype)
 {
   if (caller == KSPACE) {
-    if (layout != Comm::LAYOUT_TILED)
+    if (comm->layout != Comm::LAYOUT_TILED)
     forward_comm_kspace_brick((KSpace *) ptr,which,nper,k_buf1,k_buf2,datatype);
   else
     forward_comm_kspace_tiled((KSpace *) ptr,which,nper,k_buf1,k_buf2,datatype);
   } else
-    error->all(FLERR,"Kokkos grid comm only supports Kspace");
+    error->all(FLERR,"Kokkos grid comm currently only supports Kspace");
 }
 
 /* ----------------------------------------------------------------------
@@ -775,12 +775,12 @@ forward_comm_kspace_tiled(KSpace *kspace, int which, int nper,
 ------------------------------------------------------------------------- */
 
 template<class DeviceType>
-void Grid3dKokkos<DeviceType>::reverse_comm(int caller, void *ptr, int which, int nper, int nbyte,
+void Grid3dKokkos<DeviceType>::reverse_comm(int caller, void *ptr, int which, int nper, int /*nbyte*/,
                             FFT_DAT::tdual_FFT_SCALAR_1d& k_buf1, FFT_DAT::tdual_FFT_SCALAR_1d& k_buf2,
                             MPI_Datatype datatype)
 {
   if (caller == KSPACE) {
-    if (layout != Comm::LAYOUT_TILED)
+    if (comm->layout != Comm::LAYOUT_TILED)
       reverse_comm_kspace_brick((KSpace *) ptr,which,nper,k_buf1,k_buf2,datatype);
     else
       reverse_comm_kspace_tiled((KSpace *) ptr,which,nper,k_buf1,k_buf2,datatype);
@@ -945,7 +945,7 @@ int Grid3dKokkos<DeviceType>::indices(DAT::tdual_int_2d &k_list, int index,
                        int xlo, int xhi, int ylo, int yhi, int zlo, int zhi)
 {
   int nmax = (xhi-xlo+1) * (yhi-ylo+1) * (zhi-zlo+1);
-  if (k_list.extent(1) < nmax)
+  if ((int)k_list.extent(1) < nmax)
     k_list.resize(k_list.extent(0),nmax);
 
   if (nmax == 0) return 0;
diff --git a/src/KOKKOS/improper_class2_kokkos.cpp b/src/KOKKOS/improper_class2_kokkos.cpp
index f1ed6fdbc0..4b9a009df2 100644
--- a/src/KOKKOS/improper_class2_kokkos.cpp
+++ b/src/KOKKOS/improper_class2_kokkos.cpp
@@ -27,8 +27,8 @@
 
 using namespace LAMMPS_NS;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/KOKKOS/improper_harmonic_kokkos.cpp b/src/KOKKOS/improper_harmonic_kokkos.cpp
index 1d217461d0..a075238f22 100644
--- a/src/KOKKOS/improper_harmonic_kokkos.cpp
+++ b/src/KOKKOS/improper_harmonic_kokkos.cpp
@@ -28,8 +28,8 @@
 
 using namespace LAMMPS_NS;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/KOKKOS/kissfft_kokkos.h b/src/KOKKOS/kissfft_kokkos.h
index 265677a21c..e24768f774 100644
--- a/src/KOKKOS/kissfft_kokkos.h
+++ b/src/KOKKOS/kissfft_kokkos.h
@@ -489,7 +489,7 @@ class KissFFTKokkos {
    * It can be freed with free(), rather than a kiss_fft-specific function.
    */
 
-  static kiss_fft_state_kokkos<DeviceType> kiss_fft_alloc_kokkos(int nfft, int inverse_fft, void *mem, size_t *lenmem)
+  static kiss_fft_state_kokkos<DeviceType> kiss_fft_alloc_kokkos(int nfft, int inverse_fft, void * /*mem*/, size_t * /*lenmem*/)
   {
       kiss_fft_state_kokkos<DeviceType> st;
       int i;
diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp
index c963cd52d0..b8bcd80a00 100644
--- a/src/KOKKOS/kokkos.cpp
+++ b/src/KOKKOS/kokkos.cpp
@@ -608,8 +608,8 @@ void KokkosLMP::accelerator(int narg, char **arg)
 
   force->newton = force->newton_pair = force->newton_bond = newtonflag;
 
-  if (neigh_thread && neighflag != FULL)
-    error->all(FLERR,"Must use KOKKOS package option 'neigh full' with 'neigh/thread on'");
+  if (neigh_thread && newtonflag)
+    error->all(FLERR,"Must use KOKKOS package option 'newton off' with 'neigh/thread on'");
 
   neighbor->binsize_user = binsize;
   if (binsize <= 0.0) neighbor->binsizeflag = 0;
@@ -622,7 +622,7 @@ void KokkosLMP::accelerator(int narg, char **arg)
 
 int KokkosLMP::neigh_count(int m)
 {
-  int inum;
+  int inum = 0;
   int nneigh = 0;
 
   ArrayTypes<LMPHostType>::t_int_1d h_ilist;
diff --git a/src/KOKKOS/kokkos_base.h b/src/KOKKOS/kokkos_base.h
index 1e22a38657..24fcc47579 100644
--- a/src/KOKKOS/kokkos_base.h
+++ b/src/KOKKOS/kokkos_base.h
@@ -47,6 +47,7 @@ class KokkosBase {
                                    ExecutionSpace /*space*/) { return 0; }
   virtual void unpack_exchange_kokkos(DAT::tdual_xfloat_2d & /*k_buf*/,
                                       DAT::tdual_int_1d & /*indices*/, int /*nrecv*/,
+                                      int /*nrecv1*/, int /*nextrarecv1*/,
                                       ExecutionSpace /*space*/) {}
 
   // Region
diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h
index c8ab2198d6..1009e43196 100644
--- a/src/KOKKOS/kokkos_type.h
+++ b/src/KOKKOS/kokkos_type.h
@@ -453,13 +453,6 @@ struct alignas(2*sizeof(F_FLOAT)) s_FLOAT2 {
     v[0] = v[1] = 0.0;
   }
 
-  KOKKOS_INLINE_FUNCTION
-  s_FLOAT2(const s_FLOAT2 & rhs) {
-    for (int i = 0; i < 2; i++){
-      v[i] = rhs.v[i];
-    }
-  }
-
   KOKKOS_INLINE_FUNCTION
   void operator+=(const s_FLOAT2 &rhs) {
     v[0] += rhs.v[0];
diff --git a/src/KOKKOS/min_kokkos.cpp b/src/KOKKOS/min_kokkos.cpp
index c01a53c7b3..3460fe9009 100644
--- a/src/KOKKOS/min_kokkos.cpp
+++ b/src/KOKKOS/min_kokkos.cpp
@@ -21,6 +21,7 @@
 #include "angle.h"
 #include "atom_kokkos.h"
 #include "atom_masks.h"
+#include "atom_vec.h"
 #include "bond.h"
 #include "comm.h"
 #include "compute.h"
diff --git a/src/KOKKOS/min_linesearch_kokkos.cpp b/src/KOKKOS/min_linesearch_kokkos.cpp
index e8a22f9ddb..601d5e7d57 100644
--- a/src/KOKKOS/min_linesearch_kokkos.cpp
+++ b/src/KOKKOS/min_linesearch_kokkos.cpp
@@ -43,7 +43,7 @@ using namespace LAMMPS_NS;
 #define BACKTRACK_SLOPE 0.4
 #define QUADRATIC_TOL 0.1
 //#define EMACH 1.0e-8
-#define EMACH 1.0e-8
+static constexpr double EMACH = 1.0e-8;
 #define EPS_QUAD 1.0e-28
 
 /* ---------------------------------------------------------------------- */
@@ -59,8 +59,8 @@ MinLineSearchKokkos::MinLineSearchKokkos(LAMMPS *lmp) : MinKokkos(lmp)
 
 MinLineSearchKokkos::~MinLineSearchKokkos()
 {
-  delete [] gextra;
-  delete [] hextra;
+  delete[] gextra;
+  delete[] hextra;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -171,8 +171,8 @@ int MinLineSearchKokkos::linemin_quadratic(double eoriginal, double &alpha)
 {
   double fdothall,fdothme,hme,hmaxall;
   double de_ideal,de;
-  double delfh,engprev,relerr,alphaprev,fhprev,ff,fh,alpha0;
-  double dot[2],dotall[2];
+  double delfh,engprev,relerr,alphaprev,fhprev,fh,alpha0;
+  double dot,dotall;
   double alphamax;
 
   fix_minimize_kk->k_vectors.sync<LMPDeviceType>();
@@ -280,22 +280,16 @@ int MinLineSearchKokkos::linemin_quadratic(double eoriginal, double &alpha)
         sdot.d1 += l_fvec[i]*l_h[i];
       },sdot);
     }
-    dot[0] = sdot.d0;
-    dot[1] = sdot.d1;
+    dot = sdot.d1;
 
-    MPI_Allreduce(dot,dotall,2,MPI_DOUBLE,MPI_SUM,world);
+    MPI_Allreduce(&dot,&dotall,1,MPI_DOUBLE,MPI_SUM,world);
     if (nextra_global) {
       for (int i = 0; i < nextra_global; i++) {
-        dotall[0] += fextra[i]*fextra[i];
-        dotall[1] += fextra[i]*hextra[i];
+        dotall += fextra[i]*hextra[i];
       }
     }
-    ff = dotall[0];
-    fh = dotall[1];
-    if (output->thermo->normflag) {
-      ff /= atom->natoms;
-      fh /= atom->natoms;
-    }
+    fh = dotall;
+    if (output->thermo->normflag) fh /= atom->natoms;
 
     delfh = fh - fhprev;
 
diff --git a/src/KOKKOS/mliap_descriptor_so3_kokkos.cpp b/src/KOKKOS/mliap_descriptor_so3_kokkos.cpp
index ff9e5d2aa6..b079b734e0 100644
--- a/src/KOKKOS/mliap_descriptor_so3_kokkos.cpp
+++ b/src/KOKKOS/mliap_descriptor_so3_kokkos.cpp
@@ -31,8 +31,8 @@
 
 using namespace LAMMPS_NS;
 
-#define MAXLINE 1024
-#define MAXWORD 3
+static constexpr int MAXLINE = 1024;
+static constexpr int MAXWORD = 3;
 
 /* ---------------------------------------------------------------------- */
 template <class DeviceType>
diff --git a/src/KOKKOS/mliap_so3_kokkos.cpp b/src/KOKKOS/mliap_so3_kokkos.cpp
index 1fb5ffb52e..3f6370a6fc 100644
--- a/src/KOKKOS/mliap_so3_kokkos.cpp
+++ b/src/KOKKOS/mliap_so3_kokkos.cpp
@@ -32,7 +32,7 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecialKokkos;
 
-#define SMALL 1.0e-8
+static constexpr double SMALL = 1.0e-8;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/KOKKOS/modify_kokkos.cpp b/src/KOKKOS/modify_kokkos.cpp
index 8d8ffca671..26ee88ff51 100644
--- a/src/KOKKOS/modify_kokkos.cpp
+++ b/src/KOKKOS/modify_kokkos.cpp
@@ -21,7 +21,7 @@
 
 using namespace LAMMPS_NS;
 
-#define BIG 1.0e20
+static constexpr double BIG = 1.0e20;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/KOKKOS/nbin_kokkos.cpp b/src/KOKKOS/nbin_kokkos.cpp
index e65cf4ecb7..fd8cf5771b 100644
--- a/src/KOKKOS/nbin_kokkos.cpp
+++ b/src/KOKKOS/nbin_kokkos.cpp
@@ -22,7 +22,7 @@
 
 using namespace LAMMPS_NS;
 
-#define SMALL 1.0e-6
+static constexpr double SMALL = 1.0e-6;
 #define CUT2BIN_RATIO 100
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/KOKKOS/neigh_bond_kokkos.cpp b/src/KOKKOS/neigh_bond_kokkos.cpp
index b749590779..85ca6c916e 100644
--- a/src/KOKKOS/neigh_bond_kokkos.cpp
+++ b/src/KOKKOS/neigh_bond_kokkos.cpp
@@ -35,8 +35,8 @@
 #include <cstring>
 using namespace LAMMPS_NS;
 
-#define BONDDELTA 10000
-#define LB_FACTOR 1.5
+static constexpr int BONDDELTA = 10000;
+static constexpr double LB_FACTOR = 1.5;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/KOKKOS/npair_kokkos.h b/src/KOKKOS/npair_kokkos.h
index fe5484a771..8dd7a1c5ef 100644
--- a/src/KOKKOS/npair_kokkos.h
+++ b/src/KOKKOS/npair_kokkos.h
@@ -303,7 +303,7 @@ class NeighborKokkosExecute
                         const typename ArrayTypes<LMPHostType>::t_int_scalar _h_resize,
                         const typename AT::t_int_scalar _new_maxneighs,
                         const typename ArrayTypes<LMPHostType>::t_int_scalar _h_new_maxneighs):
-    neigh_list(_neigh_list), cutneighsq(_cutneighsq),delta(_delta),exclude(_exclude),
+    neigh_list(_neigh_list),delta(_delta),cutneighsq(_cutneighsq),exclude(_exclude),
     nex_type(_nex_type),ex1_type(_ex1_type),ex2_type(_ex2_type),
     ex_type(_ex_type),nex_group(_nex_group),
     ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),
@@ -319,10 +319,11 @@ class NeighborKokkosExecute
     mbinxlo(_mbinxlo),mbinylo(_mbinylo),mbinzlo(_mbinzlo),
     bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz),
     nlocal(_nlocal),nall(_nall),neigh_transpose(_neigh_transpose),
+    resize(_resize),new_maxneighs(_new_maxneighs),
+    h_resize(_h_resize),h_new_maxneighs(_h_new_maxneighs),
     xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic),
     xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half),
-    skin(_skin),resize(_resize),h_resize(_h_resize),
-    new_maxneighs(_new_maxneighs),h_new_maxneighs(_h_new_maxneighs) {
+    skin(_skin) {
 
     if (molecular == 2) moltemplate = 1;
     else moltemplate = 0;
diff --git a/src/KOKKOS/pair_dpd_ext_kokkos.cpp b/src/KOKKOS/pair_dpd_ext_kokkos.cpp
index 7264877d70..3624208c6b 100644
--- a/src/KOKKOS/pair_dpd_ext_kokkos.cpp
+++ b/src/KOKKOS/pair_dpd_ext_kokkos.cpp
@@ -37,7 +37,7 @@
 
 using namespace LAMMPS_NS;
 
-#define EPSILON 1.0e-10
+static constexpr double EPSILON = 1.0e-10;
 
 
 template<class DeviceType>
diff --git a/src/KOKKOS/pair_dpd_ext_tstat_kokkos.cpp b/src/KOKKOS/pair_dpd_ext_tstat_kokkos.cpp
index dcf88d1763..9808c53856 100644
--- a/src/KOKKOS/pair_dpd_ext_tstat_kokkos.cpp
+++ b/src/KOKKOS/pair_dpd_ext_tstat_kokkos.cpp
@@ -37,7 +37,7 @@
 
 using namespace LAMMPS_NS;
 
-#define EPSILON 1.0e-10
+static constexpr double EPSILON = 1.0e-10;
 
 
 template<class DeviceType>
diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp
index a0ee204aeb..dd1591bf4b 100644
--- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp
+++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp
@@ -34,7 +34,7 @@
 
 using namespace LAMMPS_NS;
 
-#define EPSILON 1.0e-10
+static constexpr double EPSILON = 1.0e-10;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/KOKKOS/pair_dpd_kokkos.cpp b/src/KOKKOS/pair_dpd_kokkos.cpp
index 3db8a06f6d..5dca219cdf 100644
--- a/src/KOKKOS/pair_dpd_kokkos.cpp
+++ b/src/KOKKOS/pair_dpd_kokkos.cpp
@@ -37,7 +37,7 @@
 
 using namespace LAMMPS_NS;
 
-#define EPSILON 1.0e-10
+static constexpr double EPSILON = 1.0e-10;
 
 
 template<class DeviceType>
diff --git a/src/KOKKOS/pair_dpd_tstat_kokkos.cpp b/src/KOKKOS/pair_dpd_tstat_kokkos.cpp
index 9058c23628..78cc862ac0 100644
--- a/src/KOKKOS/pair_dpd_tstat_kokkos.cpp
+++ b/src/KOKKOS/pair_dpd_tstat_kokkos.cpp
@@ -37,7 +37,7 @@
 
 using namespace LAMMPS_NS;
 
-#define EPSILON 1.0e-10
+static constexpr double EPSILON = 1.0e-10;
 
 
 template<class DeviceType>
diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp
index b4576db89a..dad7413669 100644
--- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp
+++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp
@@ -42,8 +42,8 @@
 using namespace LAMMPS_NS;
 using namespace MathSpecialKokkos;
 
-#define MAXLINE 1024
-#define DELTA 4
+static constexpr int MAXLINE = 1024;
+static constexpr int DELTA = 4;
 
 #ifdef DBL_EPSILON
   #define MY_EPSILON (10.0*DBL_EPSILON)
@@ -1702,7 +1702,8 @@ void PairExp6rxKokkos<DeviceType>::read_file(char *file)
   // one set of params can span multiple lines
 
   int n,nwords,ispecies;
-  char line[MAXLINE],*ptr;
+  char line[MAXLINE] = {'\0'};
+  char *ptr;
   int eof = 0;
 
   while (true) {
diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h
index d3c766f5ae..87324b49b9 100644
--- a/src/KOKKOS/pair_kokkos.h
+++ b/src/KOKKOS/pair_kokkos.h
@@ -19,10 +19,12 @@
 #ifndef LMP_PAIR_KOKKOS_H
 #define LMP_PAIR_KOKKOS_H
 
-#include "Kokkos_Macros.hpp"
 #include "pair.h"               // IWYU pragma: export
 #include "neighbor_kokkos.h"
 #include "neigh_list_kokkos.h"
+#include "math_special.h"
+#include "update.h"
+#include "Kokkos_Macros.hpp"
 #include "Kokkos_ScatterView.hpp"
 
 namespace LAMMPS_NS {
@@ -63,6 +65,7 @@ struct PairComputeFunctor  {
   typename AT::t_f_array f;
   typename AT::t_efloat_1d d_eatom;
   typename AT::t_virial_array d_vatom;
+  int inum;
 
   using KKDeviceType = typename KKDevice<device_type>::value;
   using DUP = NeedDup_v<NEIGHFLAG,device_type>;
@@ -81,8 +84,6 @@ struct PairComputeFunctor  {
   //             typename KKDevice<device_type>::value,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > vatom;
   KKScatterView<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,KKDeviceType,KKScatterSum,DUP> dup_vatom;
 
-
-
   NeighListKokkos<device_type> list;
 
   PairComputeFunctor(PairStyle* c_ptr,
@@ -95,6 +96,7 @@ struct PairComputeFunctor  {
     dup_f     = Kokkos::Experimental::create_scatter_view<KKScatterSum, DUP>(c.f);
     dup_eatom = Kokkos::Experimental::create_scatter_view<KKScatterSum, DUP>(c.d_eatom);
     dup_vatom = Kokkos::Experimental::create_scatter_view<KKScatterSum, DUP>(c.d_vatom);
+    inum = list.inum;
   };
 
   // Set copymode = 1 so parent allocations aren't destructed by copies of the style
@@ -105,17 +107,22 @@ struct PairComputeFunctor  {
   }
 
   void contribute() {
-    Kokkos::Experimental::contribute(c.f, dup_f);
+    int need_dup = std::is_same_v<DUP,Kokkos::Experimental::ScatterDuplicated>;
 
-    if (c.eflag_atom)
-      Kokkos::Experimental::contribute(c.d_eatom, dup_eatom);
+    if (need_dup) {
+      Kokkos::Experimental::contribute(c.f, dup_f);
 
-    if (c.vflag_atom)
-      Kokkos::Experimental::contribute(c.d_vatom, dup_vatom);
+      if (c.eflag_atom)
+        Kokkos::Experimental::contribute(c.d_eatom, dup_eatom);
+
+      if (c.vflag_atom)
+        Kokkos::Experimental::contribute(c.d_vatom, dup_vatom);
+    }
   }
 
   // Loop over neighbors of one atom without coulomb interaction
   // This function is called in parallel
+
   template<int EVFLAG, int NEWTON_PAIR>
   KOKKOS_FUNCTION
   EV_FLOAT compute_item(const int& ii,
@@ -161,7 +168,7 @@ struct PairComputeFunctor  {
         fytmp += dely*fpair;
         fztmp += delz*fpair;
 
-        if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < c.nlocal)) {
+        if ((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD) && (NEWTON_PAIR || j < c.nlocal)) {
           a_f(j,0) -= delx*fpair;
           a_f(j,1) -= dely*fpair;
           a_f(j,2) -= delz*fpair;
@@ -169,9 +176,9 @@ struct PairComputeFunctor  {
 
         if (EVFLAG) {
           F_FLOAT evdwl = 0.0;
-          if (c.eflag) {
+          if (c.eflag_either) {
             evdwl = factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
-            ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)*evdwl;
+            ev.evdwl += (((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)*evdwl;
           }
 
           if (c.vflag_either || c.eflag_atom) ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz);
@@ -189,6 +196,7 @@ struct PairComputeFunctor  {
 
   // Loop over neighbors of one atom with coulomb interaction
   // This function is called in parallel
+
   template<int EVFLAG, int NEWTON_PAIR>
   KOKKOS_FUNCTION
   EV_FLOAT compute_item(const int& ii,
@@ -241,7 +249,7 @@ struct PairComputeFunctor  {
         fytmp += dely*fpair;
         fztmp += delz*fpair;
 
-        if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < c.nlocal)) {
+        if ((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD) && (NEWTON_PAIR || j < c.nlocal)) {
           a_f(j,0) -= delx*fpair;
           a_f(j,1) -= dely*fpair;
           a_f(j,2) -= delz*fpair;
@@ -250,14 +258,14 @@ struct PairComputeFunctor  {
         if (EVFLAG) {
           F_FLOAT evdwl = 0.0;
           F_FLOAT ecoul = 0.0;
-          if (c.eflag) {
+          if (c.eflag_either) {
             if (rsq < (STACKPARAMS?c.m_cut_ljsq[itype][jtype]:c.d_cut_ljsq(itype,jtype))) {
               evdwl = factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
-              ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)*evdwl;
+              ev.evdwl += (((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD) && (NEWTON_PAIR || (j < c.nlocal)))?1.0:0.5)*evdwl;
             }
             if (rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype))) {
               ecoul = c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype,factor_coul,qtmp);
-              ev.ecoul += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(j<c.nlocal)))?1.0:0.5)*ecoul;
+              ev.ecoul += (((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD) && (NEWTON_PAIR || (j < c.nlocal)))?1.0:0.5)*ecoul;
             }
           }
 
@@ -273,14 +281,16 @@ struct PairComputeFunctor  {
     return ev;
   }
 
-  // Use TeamPolicy, assume Newton off, Full Neighborlist, and no energy/virial
+  // TeamPolicy, newton off, and no energy/virial
   // Loop over neighbors of one atom without coulomb interaction
   // This function is called in parallel
+
   KOKKOS_FUNCTION
   void compute_item_team(typename Kokkos::TeamPolicy<device_type>::member_type team,
                          const NeighListKokkos<device_type> &list, const NoCoulTag&) const {
 
-    const int inum = team.league_size();
+    auto a_f = dup_f.template access<typename AtomicDup<NEIGHFLAG,device_type>::value>();
+
     const int atoms_per_team = team.team_size();
     const int firstatom = team.league_rank()*atoms_per_team;
     const int lastatom = firstatom + atoms_per_team < inum ? firstatom + atoms_per_team : inum;
@@ -292,7 +302,7 @@ struct PairComputeFunctor  {
       const X_FLOAT ztmp = c.x(i,2);
       const int itype = c.type(i);
 
-      if (ZEROFLAG) {
+      if (NEIGHFLAG == FULL && ZEROFLAG) {
         Kokkos::single(Kokkos::PerThread(team), [&] (){
           f(i,0) = 0.0;
           f(i,1) = 0.0;
@@ -321,30 +331,42 @@ struct PairComputeFunctor  {
 
           const F_FLOAT fpair = factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
 
-          ftmp.x += delx*fpair;
-          ftmp.y += dely*fpair;
-          ftmp.z += delz*fpair;
+          const F_FLOAT fx = delx*fpair;
+          const F_FLOAT fy = dely*fpair;
+          const F_FLOAT fz = delz*fpair;
+
+          ftmp.x += fx;
+          ftmp.y += fy;
+          ftmp.z += fz;
+
+          if ((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD) && j < c.nlocal) {
+            a_f(j,0) -= fx;
+            a_f(j,1) -= fy;
+            a_f(j,2) -= fz;
+          }
         }
 
       },fsum);
 
       Kokkos::single(Kokkos::PerThread(team), [&] () {
-        f(i,0) += fsum.x;
-        f(i,1) += fsum.y;
-        f(i,2) += fsum.z;
+        a_f(i,0) += fsum.x;
+        a_f(i,1) += fsum.y;
+        a_f(i,2) += fsum.z;
       });
 
     });
   }
 
-  // Use TeamPolicy, assume Newton off, Full Neighborlist, and no energy/virial
+  // TeamPolicy, newton off, and no energy/virial
   // Loop over neighbors of one atom with coulomb interaction
   // This function is called in parallel
+
   KOKKOS_FUNCTION
   void compute_item_team(typename Kokkos::TeamPolicy<device_type>::member_type team,
                          const NeighListKokkos<device_type> &list, const CoulTag& ) const {
 
-    const int inum = team.league_size();
+    auto a_f = dup_f.template access<typename AtomicDup<NEIGHFLAG,device_type>::value>();
+
     const int atoms_per_team = team.team_size();
     int firstatom = team.league_rank()*atoms_per_team;
     int lastatom = firstatom + atoms_per_team < inum ? firstatom + atoms_per_team : inum;
@@ -357,8 +379,9 @@ struct PairComputeFunctor  {
       const int itype = c.type(i);
       const F_FLOAT qtmp = c.q(i);
 
-      if (ZEROFLAG) {
-        Kokkos::single(Kokkos::PerThread(team), [&] (){
+      if (NEIGHFLAG == FULL && ZEROFLAG) {
+        Kokkos::single(Kokkos::PerThread(team), [&] ()
+        {
           f(i,0) = 0.0;
           f(i,1) = 0.0;
           f(i,2) = 0.0;
@@ -391,31 +414,45 @@ struct PairComputeFunctor  {
           if (rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype)))
             fpair+=c.template compute_fcoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype,factor_coul,qtmp);
 
-          ftmp.x += delx*fpair;
-          ftmp.y += dely*fpair;
-          ftmp.z += delz*fpair;
+          const F_FLOAT fx = delx*fpair;
+          const F_FLOAT fy = dely*fpair;
+          const F_FLOAT fz = delz*fpair;
+
+          ftmp.x += fx;
+          ftmp.y += fy;
+          ftmp.z += fz;
+
+          if ((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD) && j < c.nlocal) {
+            a_f(j,0) -= fx;
+            a_f(j,1) -= fy;
+            a_f(j,2) -= fz;
+          }
         }
+
       },fsum);
 
       Kokkos::single(Kokkos::PerThread(team), [&] () {
-      f(i,0) += fsum.x;
-      f(i,1) += fsum.y;
-      f(i,2) += fsum.z;
+        a_f(i,0) += fsum.x;
+        a_f(i,1) += fsum.y;
+        a_f(i,2) += fsum.z;
       });
     });
   }
 
-
-  // Use TeamPolicy, assume Newton off, Full Neighborlist, and energy/virial
+  // TeamPolicy, newton off, and energy/virial
   // Loop over neighbors of one atom without coulomb interaction
   // This function is called in parallel
+
   KOKKOS_FUNCTION
   EV_FLOAT compute_item_team_ev(typename Kokkos::TeamPolicy<device_type>::member_type team,
                                 const NeighListKokkos<device_type> &list, const NoCoulTag&) const {
 
+    auto a_f = dup_f.template access<typename AtomicDup<NEIGHFLAG,device_type>::value>();
+    auto a_eatom = dup_eatom.template access<typename AtomicDup<NEIGHFLAG,device_type>::value>();
+    auto a_vatom = dup_vatom.template access<typename AtomicDup<NEIGHFLAG,device_type>::value>();
+
     EV_FLOAT ev;
 
-    const int inum = team.league_size();
     const int atoms_per_team = team.team_size();
     const int firstatom = team.league_rank()*atoms_per_team;
     const int lastatom = firstatom + atoms_per_team < inum ? firstatom + atoms_per_team : inum;
@@ -427,8 +464,9 @@ struct PairComputeFunctor  {
       const X_FLOAT ztmp = c.x(i,2);
       const int itype = c.type(i);
 
-      if (ZEROFLAG) {
-        Kokkos::single(Kokkos::PerThread(team), [&] (){
+      if (NEIGHFLAG == FULL && ZEROFLAG) {
+        Kokkos::single(Kokkos::PerThread(team), [&] ()
+        {
           f(i,0) = 0.0;
           f(i,1) = 0.0;
           f(i,2) = 0.0;
@@ -456,37 +494,85 @@ struct PairComputeFunctor  {
 
           const F_FLOAT fpair = factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
 
-          fev_tmp.f[0] += delx*fpair;
-          fev_tmp.f[1] += dely*fpair;
-          fev_tmp.f[2] += delz*fpair;
+          const F_FLOAT fx = delx*fpair;
+          const F_FLOAT fy = dely*fpair;
+          const F_FLOAT fz = delz*fpair;
+
+          fev_tmp.f[0] += fx;
+          fev_tmp.f[1] += fy;
+          fev_tmp.f[2] += fz;
+
+          const int I_CONTRIB = (NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD);
+          const int J_CONTRIB = ((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD) && j < c.nlocal);
+          const E_FLOAT factor = J_CONTRIB?1.0:0.5;
+
+          if (J_CONTRIB) {
+            a_f(j,0) -= fx;
+            a_f(j,1) -= fy;
+            a_f(j,2) -= fz;
+          }
 
           F_FLOAT evdwl = 0.0;
-          if (c.eflag) {
+          if (c.eflag_either) {
             evdwl = factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
-            fev_tmp.evdwl += 0.5*evdwl;
+            fev_tmp.evdwl += factor * evdwl;
+
+            if (c.eflag_atom) {
+              const E_FLOAT epairhalf = 0.5 * evdwl;
+
+              if (I_CONTRIB)
+                a_eatom[i] += epairhalf;
+
+              if (J_CONTRIB)
+                a_eatom[j] += epairhalf;
+            }
           }
+
           if (c.vflag_either) {
-            fev_tmp.v[0] += 0.5*delx*delx*fpair;
-            fev_tmp.v[1] += 0.5*dely*dely*fpair;
-            fev_tmp.v[2] += 0.5*delz*delz*fpair;
-            fev_tmp.v[3] += 0.5*delx*dely*fpair;
-            fev_tmp.v[4] += 0.5*delx*delz*fpair;
-            fev_tmp.v[5] += 0.5*dely*delz*fpair;
+            const E_FLOAT v0 = delx*delx*fpair;
+            const E_FLOAT v1 = dely*dely*fpair;
+            const E_FLOAT v2 = delz*delz*fpair;
+            const E_FLOAT v3 = delx*dely*fpair;
+            const E_FLOAT v4 = delx*delz*fpair;
+            const E_FLOAT v5 = dely*delz*fpair;
+
+            fev_tmp.v[0] += factor*v0;
+            fev_tmp.v[1] += factor*v1;
+            fev_tmp.v[2] += factor*v2;
+            fev_tmp.v[3] += factor*v3;
+            fev_tmp.v[4] += factor*v4;
+            fev_tmp.v[5] += factor*v5;
+
+            if (c.vflag_atom) {
+              if (I_CONTRIB) {
+                a_vatom(i,0) += 0.5*v0;
+                a_vatom(i,1) += 0.5*v1;
+                a_vatom(i,2) += 0.5*v2;
+                a_vatom(i,3) += 0.5*v3;
+                a_vatom(i,4) += 0.5*v4;
+                a_vatom(i,5) += 0.5*v5;
+              }
+              if (J_CONTRIB) {
+                a_vatom(j,0) += 0.5*v0;
+                a_vatom(j,1) += 0.5*v1;
+                a_vatom(j,2) += 0.5*v2;
+                a_vatom(j,3) += 0.5*v3;
+                a_vatom(j,4) += 0.5*v4;
+                a_vatom(j,5) += 0.5*v5;
+              }
+            }
           }
         }
       },fev);
 
       Kokkos::single(Kokkos::PerThread(team), [&] () {
-        f(i,0) += fev.f[0];
-        f(i,1) += fev.f[1];
-        f(i,2) += fev.f[2];
+        a_f(i,0) += fev.f[0];
+        a_f(i,1) += fev.f[1];
+        a_f(i,2) += fev.f[2];
 
         if (c.eflag_global)
           ev.evdwl += fev.evdwl;
 
-        if (c.eflag_atom)
-          d_eatom(i) += fev.evdwl;
-
         if (c.vflag_global) {
           ev.v[0] += fev.v[0];
           ev.v[1] += fev.v[1];
@@ -496,29 +582,39 @@ struct PairComputeFunctor  {
           ev.v[5] += fev.v[5];
         }
 
-        if (c.vflag_atom) {
-          d_vatom(i,0) += fev.v[0];
-          d_vatom(i,1) += fev.v[1];
-          d_vatom(i,2) += fev.v[2];
-          d_vatom(i,3) += fev.v[3];
-          d_vatom(i,4) += fev.v[4];
-          d_vatom(i,5) += fev.v[5];
+        if (NEIGHFLAG == FULL) {
+
+          if (c.eflag_atom)
+            a_eatom(i) += fev.evdwl;
+
+          if (c.vflag_atom) {
+            a_vatom(i,0) += fev.v[0];
+            a_vatom(i,1) += fev.v[1];
+            a_vatom(i,2) += fev.v[2];
+            a_vatom(i,3) += fev.v[3];
+            a_vatom(i,4) += fev.v[4];
+            a_vatom(i,5) += fev.v[5];
+          }
         }
       });
     });
     return ev;
   }
 
-  // Use TeamPolicy, assume Newton off, Full Neighborlist, and energy/virial
+  // TeamPolicy, newton off, and energy/virial
   // Loop over neighbors of one atom with coulomb interaction
   // This function is called in parallel
+
   KOKKOS_FUNCTION
   EV_FLOAT compute_item_team_ev(typename Kokkos::TeamPolicy<device_type>::member_type team,
                                 const NeighListKokkos<device_type> &list, const CoulTag& ) const {
 
+    auto a_f = dup_f.template access<typename AtomicDup<NEIGHFLAG,device_type>::value>();
+    auto a_eatom = dup_eatom.template access<typename AtomicDup<NEIGHFLAG,device_type>::value>();
+    auto a_vatom = dup_vatom.template access<typename AtomicDup<NEIGHFLAG,device_type>::value>();
+
     EV_FLOAT ev;
 
-    const int inum = team.league_size();
     const int atoms_per_team = team.team_size();
     const int firstatom = team.league_rank()*atoms_per_team;
     const int lastatom = firstatom + atoms_per_team < inum ? firstatom + atoms_per_team : inum;
@@ -531,7 +627,7 @@ struct PairComputeFunctor  {
       const int itype = c.type(i);
       const F_FLOAT qtmp = c.q(i);
 
-      if (ZEROFLAG) {
+      if (NEIGHFLAG == FULL && ZEROFLAG) {
         Kokkos::single(Kokkos::PerThread(team), [&] (){
           f(i,0) = 0.0;
           f(i,1) = 0.0;
@@ -566,46 +662,95 @@ struct PairComputeFunctor  {
           if (rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype)))
             fpair+=c.template compute_fcoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype,factor_coul,qtmp);
 
-          fev_tmp.f[0] += delx*fpair;
-          fev_tmp.f[1] += dely*fpair;
-          fev_tmp.f[2] += delz*fpair;
+          const F_FLOAT fx = delx*fpair;
+          const F_FLOAT fy = dely*fpair;
+          const F_FLOAT fz = delz*fpair;
+
+          fev_tmp.f[0] += fx;
+          fev_tmp.f[1] += fy;
+          fev_tmp.f[2] += fz;
+
+          const int I_CONTRIB = (NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD);
+          const int J_CONTRIB = ((NEIGHFLAG == HALF || NEIGHFLAG == HALFTHREAD) && j < c.nlocal);
+          const E_FLOAT factor = J_CONTRIB?1.0:0.5;
+
+          if (J_CONTRIB) {
+            a_f(j,0) -= fx;
+            a_f(j,1) -= fy;
+            a_f(j,2) -= fz;
+          }
 
           F_FLOAT evdwl = 0.0;
           F_FLOAT ecoul = 0.0;
-          if (c.eflag) {
+          if (c.eflag_either) {
             if (rsq < (STACKPARAMS?c.m_cut_ljsq[itype][jtype]:c.d_cut_ljsq(itype,jtype))) {
               evdwl = factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
-              fev_tmp.evdwl += 0.5*evdwl;
+              fev_tmp.evdwl += factor * evdwl;
             }
             if (rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype))) {
               ecoul = c.template compute_ecoul<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype,factor_coul,qtmp);
-              fev_tmp.ecoul += 0.5*ecoul;
+              fev_tmp.ecoul += factor * ecoul;
+            }
+
+
+            if (c.eflag_atom) {
+              const E_FLOAT epairhalf = 0.5 * (evdwl + ecoul);
+
+              if (I_CONTRIB)
+                a_eatom[i] += epairhalf;
+
+              if (J_CONTRIB)
+                a_eatom[j] += epairhalf;
             }
           }
+
           if (c.vflag_either) {
-            fev_tmp.v[0] += 0.5*delx*delx*fpair;
-            fev_tmp.v[1] += 0.5*dely*dely*fpair;
-            fev_tmp.v[2] += 0.5*delz*delz*fpair;
-            fev_tmp.v[3] += 0.5*delx*dely*fpair;
-            fev_tmp.v[4] += 0.5*delx*delz*fpair;
-            fev_tmp.v[5] += 0.5*dely*delz*fpair;
+            const E_FLOAT v0 = delx*delx*fpair;
+            const E_FLOAT v1 = dely*dely*fpair;
+            const E_FLOAT v2 = delz*delz*fpair;
+            const E_FLOAT v3 = delx*dely*fpair;
+            const E_FLOAT v4 = delx*delz*fpair;
+            const E_FLOAT v5 = dely*delz*fpair;
+
+            fev_tmp.v[0] += factor*v0;
+            fev_tmp.v[1] += factor*v1;
+            fev_tmp.v[2] += factor*v2;
+            fev_tmp.v[3] += factor*v3;
+            fev_tmp.v[4] += factor*v4;
+            fev_tmp.v[5] += factor*v5;
+
+            if (c.vflag_atom) {
+              if (I_CONTRIB) {
+                a_vatom(i,0) += 0.5*v0;
+                a_vatom(i,1) += 0.5*v1;
+                a_vatom(i,2) += 0.5*v2;
+                a_vatom(i,3) += 0.5*v3;
+                a_vatom(i,4) += 0.5*v4;
+                a_vatom(i,5) += 0.5*v5;
+              }
+              if (J_CONTRIB) {
+                a_vatom(j,0) += 0.5*v0;
+                a_vatom(j,1) += 0.5*v1;
+                a_vatom(j,2) += 0.5*v2;
+                a_vatom(j,3) += 0.5*v3;
+                a_vatom(j,4) += 0.5*v4;
+                a_vatom(j,5) += 0.5*v5;
+              }
+            }
           }
         }
       },fev);
 
       Kokkos::single(Kokkos::PerThread(team), [&] () {
-        f(i,0) += fev.f[0];
-        f(i,1) += fev.f[1];
-        f(i,2) += fev.f[2];
+        a_f(i,0) += fev.f[0];
+        a_f(i,1) += fev.f[1];
+        a_f(i,2) += fev.f[2];
 
         if (c.eflag_global) {
           ev.evdwl += fev.evdwl;
           ev.ecoul += fev.ecoul;
         }
 
-        if (c.eflag_atom)
-          d_eatom(i) += fev.evdwl + fev.ecoul;
-
         if (c.vflag_global) {
           ev.v[0] += fev.v[0];
           ev.v[1] += fev.v[1];
@@ -615,13 +760,19 @@ struct PairComputeFunctor  {
           ev.v[5] += fev.v[5];
         }
 
-        if (c.vflag_atom) {
-          d_vatom(i,0) += fev.v[0];
-          d_vatom(i,1) += fev.v[1];
-          d_vatom(i,2) += fev.v[2];
-          d_vatom(i,3) += fev.v[3];
-          d_vatom(i,4) += fev.v[4];
-          d_vatom(i,5) += fev.v[5];
+        if (NEIGHFLAG == FULL) {
+
+          if (c.eflag_atom)
+            a_eatom(i) += fev.evdwl + fev.ecoul;
+
+          if (c.vflag_atom) {
+            a_vatom(i,0) += fev.v[0];
+            a_vatom(i,1) += fev.v[1];
+            a_vatom(i,2) += fev.v[2];
+            a_vatom(i,3) += fev.v[3];
+            a_vatom(i,4) += fev.v[4];
+            a_vatom(i,5) += fev.v[5];
+          }
         }
       });
     });
@@ -636,7 +787,7 @@ struct PairComputeFunctor  {
     auto a_eatom = dup_eatom.template access<typename AtomicDup<NEIGHFLAG,device_type>::value>();
     auto a_vatom = dup_vatom.template access<typename AtomicDup<NEIGHFLAG,device_type>::value>();
 
-    const int EFLAG = c.eflag;
+    const int EFLAG = c.eflag_either;
     const int NEWTON_PAIR = c.newton_pair;
     const int VFLAG = c.vflag_either;
 
@@ -657,7 +808,7 @@ struct PairComputeFunctor  {
       const E_FLOAT v5 = dely*delz*fpair;
 
       if (c.vflag_global) {
-        if (NEIGHFLAG!=FULL) {
+        if (NEIGHFLAG != FULL) {
           if (NEWTON_PAIR) {
             ev.v[0] += v0;
             ev.v[1] += v1;
@@ -747,7 +898,8 @@ struct PairComputeFunctor  {
 // This uses the fact that failure to match template parameters is not an error.
 // By having the enable_if with a ! and without it, exactly one of the functions
 // pair_compute_neighlist will match - either the dummy version
-// or the real one further below.
+// or the real one further below
+
 template<class PairStyle, unsigned NEIGHFLAG, int ZEROFLAG = 0, class Specialisation = void>
 EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<!((NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0), NeighListKokkos<typename PairStyle::device_type>*> list) {
   EV_FLOAT ev;
@@ -757,24 +909,29 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<!((NEIGHFLAG
   return ev;
 }
 
+template<class NeighStyle>
+int GetMaxNeighs(NeighStyle* list)
+{
+  auto d_ilist = list->d_ilist;
+  auto d_numneigh = list->d_numneigh;
+  int inum = list->inum;
+
+  int maxneigh = 0;
+  Kokkos::parallel_reduce(inum, LAMMPS_LAMBDA(const int ii, int &maxneigh) {
+    const int i = d_ilist[ii];
+    const int num_neighs = d_numneigh[i];
+    maxneigh = MAX(maxneigh,num_neighs);
+  }, Kokkos::Max<int>(maxneigh));
+
+  return maxneigh;
+}
+
 template<class DeviceType, class FunctorStyle>
-int GetTeamSize(FunctorStyle& KOKKOS_GPU_ARG(functor), int KOKKOS_GPU_ARG(inum),
-                int KOKKOS_GPU_ARG(reduce_flag), int team_size, int KOKKOS_GPU_ARG(vector_length)) {
-
-#ifdef LMP_KOKKOS_GPU
-    int team_size_max;
-
-    if (reduce_flag)
-      team_size_max = Kokkos::TeamPolicy<DeviceType>(inum,Kokkos::AUTO).team_size_max(functor,Kokkos::ParallelReduceTag());
-    else
-      team_size_max = Kokkos::TeamPolicy<DeviceType>(inum,Kokkos::AUTO).team_size_max(functor,Kokkos::ParallelForTag());
-
-    if (team_size*vector_length > team_size_max)
-      team_size = team_size_max/vector_length;
-#else
-    team_size = 1;
-#endif
-    return team_size;
+void GetMaxTeamSize(FunctorStyle& functor, int inum,
+                int &teamsize_max_for, int &teamsize_max_reduce)
+{
+  teamsize_max_for = Kokkos::TeamPolicy<DeviceType>(inum,Kokkos::AUTO).team_size_max(functor,Kokkos::ParallelForTag());
+  teamsize_max_reduce = Kokkos::TeamPolicy<DeviceType>(inum,Kokkos::AUTO).team_size_max(functor,Kokkos::ParallelReduceTag());
 }
 
 // Submit ParallelFor for NEIGHFLAG=HALF,HALFTHREAD,FULL
@@ -782,38 +939,77 @@ template<class PairStyle, unsigned NEIGHFLAG, int ZEROFLAG = 0, class Specialisa
 EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0, NeighListKokkos<typename PairStyle::device_type>*> list) {
   EV_FLOAT ev;
 
+  const int inum = list->inum;
+
   if (!fpair->lmp->kokkos->neigh_thread_set)
-    if (list->inum <= 16384 && NEIGHFLAG == FULL)
-      fpair->lmp->kokkos->neigh_thread = 1;
+    if (fpair->lmp->kokkos->ngpus && inum <= 16000)
+      if (NEIGHFLAG == FULL || !fpair->newton_pair)
+        fpair->lmp->kokkos->neigh_thread = 1;
 
   if (fpair->lmp->kokkos->neigh_thread) {
 
-    int vector_length = 8;
-    int atoms_per_team = 32;
+    static int vectorsize = 0;
+    static int atoms_per_team = 0;
+
+#if defined(LMP_KOKKOS_GPU)
+    static int lastcall = -1;
+    if (!vectorsize || lastcall < fpair->lmp->neighbor->lastcall) {
+      lastcall = fpair->lmp->update->ntimestep;
+      vectorsize = GetMaxNeighs(list);
+      vectorsize = MathSpecial::powint(2,(int(log2(vectorsize) + 0.5))); // round to nearest power of 2
+
+  #if defined(KOKKOS_ENABLE_HIP)
+      int max_vectorsize = 64;
+  #else
+      int max_vectorsize = 32;
+  #endif
+
+      vectorsize = MIN(vectorsize,max_vectorsize);
+
+      int teamsize_max_for,teamsize_max_reduce;
+      if (fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
+        PairComputeFunctor<PairStyle,NEIGHFLAG,false,ZEROFLAG,Specialisation > ff(fpair,list);
+        GetMaxTeamSize<typename PairStyle::device_type>(ff, inum, teamsize_max_for, teamsize_max_reduce);
+      } else {
+        PairComputeFunctor<PairStyle,NEIGHFLAG,true,ZEROFLAG,Specialisation > ff(fpair,list);
+        GetMaxTeamSize<typename PairStyle::device_type>(ff, inum, teamsize_max_for, teamsize_max_reduce);
+      }
+
+      int teamsize_max = teamsize_max_for;
+      if (fpair->eflag || fpair->vflag)
+        teamsize_max = teamsize_max_reduce;
+      atoms_per_team = teamsize_max/vectorsize;
+    }
+#else
+    vectorsize = 1;
+    atoms_per_team = 1;
+#endif
+
+    const int num_teams = inum / atoms_per_team + (inum % atoms_per_team ? 1 : 0);
 
     if (fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
       PairComputeFunctor<PairStyle,NEIGHFLAG,false,ZEROFLAG,Specialisation > ff(fpair,list);
-      atoms_per_team = GetTeamSize<typename PairStyle::device_type>(ff, list->inum, (fpair->eflag || fpair->vflag), atoms_per_team, vector_length);
-      Kokkos::TeamPolicy<typename PairStyle::device_type,Kokkos::IndexType<int> > policy(list->inum,atoms_per_team,vector_length);
+      Kokkos::TeamPolicy<typename PairStyle::device_type,Kokkos::IndexType<int> > policy(num_teams,atoms_per_team,vectorsize);
       if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(policy,ff,ev);
       else                              Kokkos::parallel_for(policy,ff);
+      ff.contribute();
     } else {
       PairComputeFunctor<PairStyle,NEIGHFLAG,true,ZEROFLAG,Specialisation > ff(fpair,list);
-      atoms_per_team = GetTeamSize<typename PairStyle::device_type>(ff, list->inum, (fpair->eflag || fpair->vflag), atoms_per_team, vector_length);
-      Kokkos::TeamPolicy<typename PairStyle::device_type,Kokkos::IndexType<int> > policy(list->inum,atoms_per_team,vector_length);
+      Kokkos::TeamPolicy<typename PairStyle::device_type,Kokkos::IndexType<int> > policy(num_teams,atoms_per_team,vectorsize);
       if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(policy,ff,ev);
       else                              Kokkos::parallel_for(policy,ff);
+      ff.contribute();
     }
   } else {
     if (fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
       PairComputeFunctor<PairStyle,NEIGHFLAG,false,ZEROFLAG,Specialisation > ff(fpair,list);
-      if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
-      else                              Kokkos::parallel_for(list->inum,ff);
+      if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(inum,ff,ev);
+      else                              Kokkos::parallel_for(inum,ff);
       ff.contribute();
     } else {
       PairComputeFunctor<PairStyle,NEIGHFLAG,true,ZEROFLAG,Specialisation > ff(fpair,list);
-      if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
-      else                              Kokkos::parallel_for(list->inum,ff);
+      if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(inum,ff,ev);
+      else                              Kokkos::parallel_for(inum,ff);
       ff.contribute();
     }
   }
diff --git a/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp
index 4caab0ef55..c7e10d39ef 100644
--- a/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp
+++ b/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp
@@ -214,9 +214,7 @@ compute_evdwl(const F_FLOAT& rsq, const int& /*i*/, const int& /*j*/,
       (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
     englj *= switch1;
   }
-
   return englj;
-
 }
 
 /* ----------------------------------------------------------------------
@@ -488,4 +486,3 @@ template class PairLJCharmmCoulLongKokkos<LMPDeviceType>;
 template class PairLJCharmmCoulLongKokkos<LMPHostType>;
 #endif
 }
-
diff --git a/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp
new file mode 100644
index 0000000000..f412721411
--- /dev/null
+++ b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.cpp
@@ -0,0 +1,497 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Mitch Murphy (alphataubio)
+
+   Based on serial kspace lj-fsw sections (force-switched) provided by
+   Robert Meissner and Lucio Colombi Ciacchi of Bremen University, Germany,
+   with additional assistance from Robert A. Latour, Clemson University
+
+ ------------------------------------------------------------------------- */
+
+#include "pair_lj_charmmfsw_coul_long_kokkos.h"
+
+#include "atom_kokkos.h"
+#include "atom_masks.h"
+#include "error.h"
+#include "force.h"
+#include "kokkos.h"
+#include "memory_kokkos.h"
+#include "neigh_list.h"
+#include "neigh_request.h"
+#include "neighbor.h"
+#include "respa.h"
+#include "update.h"
+
+#include <cmath>
+#include <cstring>
+
+using namespace LAMMPS_NS;
+
+
+#define EWALD_F   1.12837917
+#define EWALD_P   0.3275911
+#define A1        0.254829592
+#define A2       -0.284496736
+#define A3        1.421413741
+#define A4       -1.453152027
+#define A5        1.061405429
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairLJCharmmfswCoulLongKokkos<DeviceType>::PairLJCharmmfswCoulLongKokkos(LAMMPS *lmp):PairLJCharmmfswCoulLong(lmp)
+{
+  respa_enable = 0;
+
+  kokkosable = 1;
+  atomKK = (AtomKokkos *) atom;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+  datamask_read = X_MASK | F_MASK | TYPE_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK;
+  datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+PairLJCharmmfswCoulLongKokkos<DeviceType>::~PairLJCharmmfswCoulLongKokkos()
+{
+  if (copymode) return;
+
+  if (allocated) {
+    memoryKK->destroy_kokkos(k_eatom,eatom);
+    memoryKK->destroy_kokkos(k_vatom,vatom);
+    memoryKK->destroy_kokkos(k_cutsq,cutsq);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairLJCharmmfswCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
+{
+  eflag = eflag_in;
+  vflag = vflag_in;
+
+  if (neighflag == FULL) no_virial_fdotr_compute = 1;
+
+  ev_init(eflag,vflag,0);
+
+  // reallocate per-atom arrays if necessary
+
+  if (eflag_atom) {
+    memoryKK->destroy_kokkos(k_eatom,eatom);
+    memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
+    d_eatom = k_eatom.view<DeviceType>();
+  }
+  if (vflag_atom) {
+    memoryKK->destroy_kokkos(k_vatom,vatom);
+    memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom");
+    d_vatom = k_vatom.view<DeviceType>();
+  }
+
+  atomKK->sync(execution_space,datamask_read);
+  k_cutsq.template sync<DeviceType>();
+  k_params.template sync<DeviceType>();
+  if (eflag || vflag) atomKK->modified(execution_space,datamask_modify);
+  else atomKK->modified(execution_space,F_MASK);
+
+  x = atomKK->k_x.view<DeviceType>();
+  c_x = atomKK->k_x.view<DeviceType>();
+  f = atomKK->k_f.view<DeviceType>();
+  q = atomKK->k_q.view<DeviceType>();
+  type = atomKK->k_type.view<DeviceType>();
+  nlocal = atom->nlocal;
+  nall = atom->nlocal + atom->nghost;
+  special_lj[0] = force->special_lj[0];
+  special_lj[1] = force->special_lj[1];
+  special_lj[2] = force->special_lj[2];
+  special_lj[3] = force->special_lj[3];
+  special_coul[0] = force->special_coul[0];
+  special_coul[1] = force->special_coul[1];
+  special_coul[2] = force->special_coul[2];
+  special_coul[3] = force->special_coul[3];
+  qqrd2e = force->qqrd2e;
+  newton_pair = force->newton_pair;
+
+  // loop over neighbors of my atoms
+
+  copymode = 1;
+
+  EV_FLOAT ev;
+  if (ncoultablebits)
+    ev = pair_compute<PairLJCharmmfswCoulLongKokkos<DeviceType>,CoulLongTable<1> >
+      (this,(NeighListKokkos<DeviceType>*)list);
+  else
+    ev = pair_compute<PairLJCharmmfswCoulLongKokkos<DeviceType>,CoulLongTable<0> >
+      (this,(NeighListKokkos<DeviceType>*)list);
+
+
+  if (eflag) {
+    eng_vdwl += ev.evdwl;
+    eng_coul += ev.ecoul;
+  }
+  if (vflag_global) {
+    virial[0] += ev.v[0];
+    virial[1] += ev.v[1];
+    virial[2] += ev.v[2];
+    virial[3] += ev.v[3];
+    virial[4] += ev.v[4];
+    virial[5] += ev.v[5];
+  }
+
+  if (eflag_atom) {
+    k_eatom.template modify<DeviceType>();
+    k_eatom.template sync<LMPHostType>();
+  }
+
+  if (vflag_atom) {
+    k_vatom.template modify<DeviceType>();
+    k_vatom.template sync<LMPHostType>();
+  }
+
+  if (vflag_fdotr) pair_virial_fdotr_compute(this);
+
+  copymode = 0;
+}
+
+/* ----------------------------------------------------------------------
+   compute LJ CHARMM pair force between atoms i and j
+   ---------------------------------------------------------------------- */
+template<class DeviceType>
+template<bool STACKPARAMS, class Specialisation>
+KOKKOS_INLINE_FUNCTION
+F_FLOAT PairLJCharmmfswCoulLongKokkos<DeviceType>::
+compute_fpair(const F_FLOAT& rsq, const int& /*i*/, const int& /*j*/,
+              const int& itype, const int& jtype) const {
+  const F_FLOAT r2inv = 1.0/rsq;
+  const F_FLOAT r6inv = r2inv*r2inv*r2inv;
+  F_FLOAT forcelj, switch1;
+
+  forcelj = r6inv *
+    ((STACKPARAMS?m_params[itype][jtype].lj1:params(itype,jtype).lj1)*r6inv -
+     (STACKPARAMS?m_params[itype][jtype].lj2:params(itype,jtype).lj2));
+
+  if (rsq > cut_lj_innersq) {
+    switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
+              (cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
+    forcelj = forcelj*switch1;
+  }
+
+  return forcelj*r2inv;
+}
+
+/* ----------------------------------------------------------------------
+   compute LJ CHARMM pair potential energy between atoms i and j
+   ---------------------------------------------------------------------- */
+template<class DeviceType>
+template<bool STACKPARAMS, class Specialisation>
+KOKKOS_INLINE_FUNCTION
+F_FLOAT PairLJCharmmfswCoulLongKokkos<DeviceType>::
+compute_evdwl(const F_FLOAT& rsq, const int& /*i*/, const int& /*j*/,
+              const int& itype, const int& jtype) const {
+  const F_FLOAT r2inv = 1.0/rsq;
+  const F_FLOAT r6inv = r2inv*r2inv*r2inv;
+  const F_FLOAT r = sqrt(rsq);
+  const F_FLOAT rinv = 1.0/r;
+  const F_FLOAT r3inv = rinv*rinv*rinv;
+  F_FLOAT englj, englj12, englj6;
+
+  if (rsq > cut_lj_innersq) {
+    englj12 = (STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*cut_lj6*
+      denom_lj12 * (r6inv - cut_lj6inv)*(r6inv - cut_lj6inv);
+    englj6 = -(STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)*
+      cut_lj3*denom_lj6 * (r3inv - cut_lj3inv)*(r3inv - cut_lj3inv);
+    englj = englj12 + englj6;
+  } else {
+    englj12 = r6inv*(STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*r6inv -
+    (STACKPARAMS?m_params[itype][jtype].lj3:params(itype,jtype).lj3)*cut_lj_inner6inv*cut_lj6inv;
+    englj6 = -(STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)*r6inv +
+      (STACKPARAMS?m_params[itype][jtype].lj4:params(itype,jtype).lj4)*
+      cut_lj_inner3inv*cut_lj3inv;
+    englj = englj12 + englj6;
+  }
+  return englj;
+}
+
+/* ----------------------------------------------------------------------
+   compute coulomb pair force between atoms i and j
+   ---------------------------------------------------------------------- */
+template<class DeviceType>
+template<bool STACKPARAMS,  class Specialisation>
+KOKKOS_INLINE_FUNCTION
+F_FLOAT PairLJCharmmfswCoulLongKokkos<DeviceType>::
+compute_fcoul(const F_FLOAT& rsq, const int& /*i*/, const int&j,
+              const int& /*itype*/, const int& /*jtype*/,
+              const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const {
+  if (Specialisation::DoTable && rsq > tabinnersq) {
+    union_int_float_t rsq_lookup;
+    rsq_lookup.f = rsq;
+    const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits;
+    const F_FLOAT fraction = (rsq_lookup.f - d_rtable[itable]) * d_drtable[itable];
+    const F_FLOAT table = d_ftable[itable] + fraction*d_dftable[itable];
+    F_FLOAT forcecoul = qtmp*q[j] * table;
+    if (factor_coul < 1.0) {
+      const F_FLOAT table = d_ctable[itable] + fraction*d_dctable[itable];
+      const F_FLOAT prefactor = qtmp*q[j] * table;
+      forcecoul -= (1.0-factor_coul)*prefactor;
+    }
+    return forcecoul/rsq;
+  } else {
+    const F_FLOAT r = sqrt(rsq);
+    const F_FLOAT grij = g_ewald * r;
+    const F_FLOAT expm2 = exp(-grij*grij);
+    const F_FLOAT t = 1.0 / (1.0 + EWALD_P*grij);
+    const F_FLOAT rinv = 1.0/r;
+    const F_FLOAT erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+    const F_FLOAT prefactor = qqrd2e * qtmp*q[j]*rinv;
+    F_FLOAT forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
+    if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
+
+    return forcecoul*rinv*rinv;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   compute coulomb pair potential energy between atoms i and j
+   ---------------------------------------------------------------------- */
+template<class DeviceType>
+template<bool STACKPARAMS, class Specialisation>
+KOKKOS_INLINE_FUNCTION
+F_FLOAT PairLJCharmmfswCoulLongKokkos<DeviceType>::
+compute_ecoul(const F_FLOAT& rsq, const int& /*i*/, const int&j,
+              const int& /*itype*/, const int& /*jtype*/, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const {
+  if (Specialisation::DoTable && rsq > tabinnersq) {
+    union_int_float_t rsq_lookup;
+    rsq_lookup.f = rsq;
+    const int itable = (rsq_lookup.i & ncoulmask) >> ncoulshiftbits;
+    const F_FLOAT fraction = (rsq_lookup.f - d_rtable[itable]) * d_drtable[itable];
+    const F_FLOAT table = d_etable[itable] + fraction*d_detable[itable];
+    F_FLOAT ecoul = qtmp*q[j] * table;
+    if (factor_coul < 1.0) {
+      const F_FLOAT table = d_ctable[itable] + fraction*d_dctable[itable];
+      const F_FLOAT prefactor = qtmp*q[j] * table;
+      ecoul -= (1.0-factor_coul)*prefactor;
+    }
+    return ecoul;
+  } else {
+    const F_FLOAT r = sqrt(rsq);
+    const F_FLOAT grij = g_ewald * r;
+    const F_FLOAT expm2 = exp(-grij*grij);
+    const F_FLOAT t = 1.0 / (1.0 + EWALD_P*grij);
+    const F_FLOAT erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+    const F_FLOAT prefactor = qqrd2e * qtmp*q[j]/r;
+    F_FLOAT ecoul = prefactor * erfc;
+    if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
+    return ecoul;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   allocate all arrays
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairLJCharmmfswCoulLongKokkos<DeviceType>::allocate()
+{
+  PairLJCharmmfswCoulLong::allocate();
+
+  int n = atom->ntypes;
+
+  memory->destroy(cutsq);
+  memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq");
+  d_cutsq = k_cutsq.template view<DeviceType>();
+
+  d_cut_ljsq = typename AT::t_ffloat_2d("pair:cut_ljsq",n+1,n+1);
+
+  d_cut_coulsq = typename AT::t_ffloat_2d("pair:cut_coulsq",n+1,n+1);
+
+  k_params = Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType>("PairLJCharmmfswCoulLong::params",n+1,n+1);
+  params = k_params.template view<DeviceType>();
+}
+
+template<class DeviceType>
+void PairLJCharmmfswCoulLongKokkos<DeviceType>::init_tables(double cut_coul, double *cut_respa)
+{
+  Pair::init_tables(cut_coul,cut_respa);
+
+  typedef typename ArrayTypes<DeviceType>::t_ffloat_1d table_type;
+  typedef typename ArrayTypes<LMPHostType>::t_ffloat_1d host_table_type;
+
+  int ntable = 1;
+  for (int i = 0; i < ncoultablebits; i++) ntable *= 2;
+
+
+  // Copy rtable and drtable
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+  for (int i = 0; i < ntable; i++) {
+    h_table(i) = rtable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_rtable = d_table;
+  }
+
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+  for (int i = 0; i < ntable; i++) {
+    h_table(i) = drtable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_drtable = d_table;
+  }
+
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+
+  // Copy ftable and dftable
+  for (int i = 0; i < ntable; i++) {
+    h_table(i) = ftable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_ftable = d_table;
+  }
+
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+
+  for (int i = 0; i < ntable; i++) {
+    h_table(i) = dftable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_dftable = d_table;
+  }
+
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+
+  // Copy ctable and dctable
+  for (int i = 0; i < ntable; i++) {
+    h_table(i) = ctable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_ctable = d_table;
+  }
+
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+
+  for (int i = 0; i < ntable; i++) {
+    h_table(i) = dctable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_dctable = d_table;
+  }
+
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+
+  // Copy etable and detable
+  for (int i = 0; i < ntable; i++) {
+    h_table(i) = etable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_etable = d_table;
+  }
+
+  {
+  host_table_type h_table("HostTable",ntable);
+  table_type d_table("DeviceTable",ntable);
+
+  for (int i = 0; i < ntable; i++) {
+    h_table(i) = detable[i];
+  }
+  Kokkos::deep_copy(d_table,h_table);
+  d_detable = d_table;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairLJCharmmfswCoulLongKokkos<DeviceType>::init_style()
+{
+  PairLJCharmmfswCoulLong::init_style();
+
+  Kokkos::deep_copy(d_cut_ljsq,cut_ljsq);
+  Kokkos::deep_copy(d_cut_coulsq,cut_coulsq);
+
+  // error if rRESPA with inner levels
+
+  if (update->whichflag == 1 && utils::strmatch(update->integrate_style,"^respa")) {
+    int respa = 0;
+    if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
+    if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
+    if (respa)
+      error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle");
+  }
+
+  // adjust neighbor list request for KOKKOS
+
+  neighflag = lmp->kokkos->neighflag;
+  auto request = neighbor->find_request(this);
+  request->set_kokkos_host(std::is_same_v<DeviceType,LMPHostType> &&
+                           !std::is_same_v<DeviceType,LMPDeviceType>);
+  request->set_kokkos_device(std::is_same_v<DeviceType,LMPDeviceType>);
+  if (neighflag == FULL) request->enable_full();
+}
+
+/* ----------------------------------------------------------------------
+   init for one type pair i,j and corresponding j,i
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+double PairLJCharmmfswCoulLongKokkos<DeviceType>::init_one(int i, int j)
+{
+  double cutone = PairLJCharmmfswCoulLong::init_one(i,j);
+
+  k_params.h_view(i,j).lj1 = lj1[i][j];
+  k_params.h_view(i,j).lj2 = lj2[i][j];
+  k_params.h_view(i,j).lj3 = lj3[i][j];
+  k_params.h_view(i,j).lj4 = lj4[i][j];
+  //k_params.h_view(i,j).offset = offset[i][j];
+  k_params.h_view(i,j).cut_ljsq = cut_ljsq;
+  k_params.h_view(i,j).cut_coulsq = cut_coulsq;
+
+  k_params.h_view(j,i) = k_params.h_view(i,j);
+  if (i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
+    m_params[i][j] = m_params[j][i] = k_params.h_view(i,j);
+    m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone;
+    m_cut_ljsq[j][i] = m_cut_ljsq[i][j] = cut_ljsq;
+    m_cut_coulsq[j][i] = m_cut_coulsq[i][j] = cut_coulsq;
+  }
+
+  k_cutsq.h_view(i,j) = k_cutsq.h_view(j,i) = cutone*cutone;
+  k_cutsq.template modify<LMPHostType>();
+  k_params.template modify<LMPHostType>();
+
+  return cutone;
+}
+
+namespace LAMMPS_NS {
+template class PairLJCharmmfswCoulLongKokkos<LMPDeviceType>;
+#ifdef LMP_KOKKOS_GPU
+template class PairLJCharmmfswCoulLongKokkos<LMPHostType>;
+#endif
+}
diff --git a/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.h b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.h
new file mode 100644
index 0000000000..7533f40dbc
--- /dev/null
+++ b/src/KOKKOS/pair_lj_charmmfsw_coul_long_kokkos.h
@@ -0,0 +1,145 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+// clang-format off
+PairStyle(lj/charmmfsw/coul/long/kk,PairLJCharmmfswCoulLongKokkos<LMPDeviceType>);
+PairStyle(lj/charmmfsw/coul/long/kk/device,PairLJCharmmfswCoulLongKokkos<LMPDeviceType>);
+PairStyle(lj/charmmfsw/coul/long/kk/host,PairLJCharmmfswCoulLongKokkos<LMPHostType>);
+// clang-format on
+#else
+
+// clang-format off
+#ifndef LMP_PAIR_LJ_CHARMMFSW_COUL_LONG_KOKKOS_H
+#define LMP_PAIR_LJ_CHARMMFSW_COUL_LONG_KOKKOS_H
+
+#include "pair_kokkos.h"
+#include "pair_lj_charmmfsw_coul_long.h"
+#include "neigh_list_kokkos.h"
+
+namespace LAMMPS_NS {
+
+template<class DeviceType>
+class PairLJCharmmfswCoulLongKokkos : public PairLJCharmmfswCoulLong {
+ public:
+  enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF};
+  enum {COUL_FLAG=1};
+  typedef DeviceType device_type;
+  typedef ArrayTypes<DeviceType> AT;
+  PairLJCharmmfswCoulLongKokkos(class LAMMPS *);
+  ~PairLJCharmmfswCoulLongKokkos() override;
+
+  void compute(int, int) override;
+
+  void init_tables(double cut_coul, double *cut_respa) override;
+  void init_style() override;
+  double init_one(int, int) override;
+
+ protected:
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j,
+                        const int& itype, const int& jtype) const;
+
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_fcoul(const F_FLOAT& rsq, const int& i, const int&j, const int& itype,
+                        const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const;
+
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j,
+                        const int& itype, const int& jtype) const;
+
+  template<bool STACKPARAMS, class Specialisation>
+  KOKKOS_INLINE_FUNCTION
+  F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j,
+                        const int& itype, const int& jtype, const F_FLOAT& factor_coul, const F_FLOAT& qtmp) const;
+
+  Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType> k_params;
+  typename Kokkos::DualView<params_lj_coul**,
+    Kokkos::LayoutRight,DeviceType>::t_dev_const_um params;
+  // hardwired to space for 12 atom types
+  params_lj_coul m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+
+  F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+  F_FLOAT m_cut_ljsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+  F_FLOAT m_cut_coulsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1];
+  typename AT::t_x_array_randomread x;
+  typename AT::t_x_array c_x;
+  typename AT::t_f_array f;
+  typename AT::t_int_1d_randomread type;
+  typename AT::t_float_1d_randomread q;
+
+  DAT::tdual_efloat_1d k_eatom;
+  DAT::tdual_virial_array k_vatom;
+  typename AT::t_efloat_1d d_eatom;
+  typename AT::t_virial_array d_vatom;
+
+  int newton_pair;
+
+  typename AT::tdual_ffloat_2d k_cutsq;
+  typename AT::t_ffloat_2d d_cutsq;
+  typename AT::t_ffloat_2d d_cut_ljsq;
+  typename AT::t_ffloat_2d d_cut_coulsq;
+
+  typename AT::t_ffloat_1d_randomread
+    d_rtable, d_drtable, d_ftable, d_dftable,
+    d_ctable, d_dctable, d_etable, d_detable;
+
+  int neighflag;
+  int nlocal,nall,eflag,vflag;
+
+  double special_coul[4];
+  double special_lj[4];
+  double qqrd2e;
+
+  void allocate() override;
+
+  friend struct PairComputeFunctor<PairLJCharmmfswCoulLongKokkos,FULL,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCharmmfswCoulLongKokkos,FULL,true,1,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCharmmfswCoulLongKokkos,HALF,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCharmmfswCoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCharmmfswCoulLongKokkos,FULL,false,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCharmmfswCoulLongKokkos,FULL,false,1,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCharmmfswCoulLongKokkos,HALF,false,0,CoulLongTable<1>>;
+  friend struct PairComputeFunctor<PairLJCharmmfswCoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<1>>;
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmfswCoulLongKokkos,FULL,0,CoulLongTable<1>>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmfswCoulLongKokkos,FULL,1,CoulLongTable<1>>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmfswCoulLongKokkos,HALF,0,CoulLongTable<1>>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmfswCoulLongKokkos,HALFTHREAD,0,CoulLongTable<1>>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairLJCharmmfswCoulLongKokkos,CoulLongTable<1>>(PairLJCharmmfswCoulLongKokkos*,
+                                                            NeighListKokkos<DeviceType>*);
+  friend struct PairComputeFunctor<PairLJCharmmfswCoulLongKokkos,FULL,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCharmmfswCoulLongKokkos,FULL,true,1,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCharmmfswCoulLongKokkos,HALF,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCharmmfswCoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCharmmfswCoulLongKokkos,FULL,false,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCharmmfswCoulLongKokkos,FULL,false,1,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCharmmfswCoulLongKokkos,HALF,false,0,CoulLongTable<0>>;
+  friend struct PairComputeFunctor<PairLJCharmmfswCoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<0>>;
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmfswCoulLongKokkos,FULL,0,CoulLongTable<0>>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmfswCoulLongKokkos,FULL,1,CoulLongTable<0>>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmfswCoulLongKokkos,HALF,0,CoulLongTable<0>>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute_neighlist<PairLJCharmmfswCoulLongKokkos,HALFTHREAD,0,CoulLongTable<0>>(PairLJCharmmfswCoulLongKokkos*,NeighListKokkos<DeviceType>*);
+  friend EV_FLOAT pair_compute<PairLJCharmmfswCoulLongKokkos,CoulLongTable<0>>(PairLJCharmmfswCoulLongKokkos*,
+                                                            NeighListKokkos<DeviceType>*);
+  friend void pair_virial_fdotr_compute<PairLJCharmmfswCoulLongKokkos>(PairLJCharmmfswCoulLongKokkos*);
+
+};
+
+}
+
+#endif
+#endif
+
diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp
index f487b0c84e..9f0b1dd747 100644
--- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp
+++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp
@@ -43,7 +43,7 @@ using MathConst::MY_PI;
 
 enum{NONE,RLINEAR,RSQ};
 
-#define MAXLINE 1024
+static constexpr int MAXLINE = 1024;
 
 #ifdef DBL_EPSILON
   #define MY_EPSILON (10.0*DBL_EPSILON)
diff --git a/src/KOKKOS/pair_pace_extrapolation_kokkos.cpp b/src/KOKKOS/pair_pace_extrapolation_kokkos.cpp
index 61722bf62d..ef747ef95c 100644
--- a/src/KOKKOS/pair_pace_extrapolation_kokkos.cpp
+++ b/src/KOKKOS/pair_pace_extrapolation_kokkos.cpp
@@ -106,7 +106,8 @@ void PairPACEExtrapolationKokkos<DeviceType>::grow(int natom, int maxneigh)
 
   if ((int)A.extent(0) < natom) {
 
-    MemKK::realloc_kokkos(A, "pace:A", natom, nelements, nradmax + 1, (lmax + 1) * (lmax + 1));
+    MemKK::realloc_kokkos(A_sph, "pace:A_sph", natom, nelements, idx_sph_max, nradmax + 1);
+    MemKK::realloc_kokkos(A, "pace:A", natom, nelements, (lmax + 1) * (lmax + 1), nradmax + 1);
     MemKK::realloc_kokkos(A_rank1, "pace:A_rank1", natom, nelements, nradbase);
 
     MemKK::realloc_kokkos(A_list, "pace:A_list", natom, idx_ms_combs_max, basis_set->rankmax);
@@ -117,25 +118,29 @@ void PairPACEExtrapolationKokkos<DeviceType>::grow(int natom, int maxneigh)
     MemKK::realloc_kokkos(rhos, "pace:rhos", natom, basis_set->ndensitymax + 1); // +1 density for core repulsion
     MemKK::realloc_kokkos(dF_drho, "pace:dF_drho", natom, basis_set->ndensitymax + 1); // +1 density for core repulsion
 
-    MemKK::realloc_kokkos(weights, "pace:weights", natom, nelements, nradmax + 1, (lmax + 1) * (lmax + 1));
+    MemKK::realloc_kokkos(weights, "pace:weights", natom, nelements, idx_sph_max, nradmax + 1);
     MemKK::realloc_kokkos(weights_rank1, "pace:weights_rank1", natom, nelements, nradbase);
 
     // hard-core repulsion
     MemKK::realloc_kokkos(rho_core, "pace:rho_core", natom);
     MemKK::realloc_kokkos(dF_drho_core, "pace:dF_drho_core", natom);
+    MemKK::realloc_kokkos(dF_dfcut, "pace:dF_dfcut", natom);
+    MemKK::realloc_kokkos(d_d_min, "pace:r_min_pair", natom);
+    MemKK::realloc_kokkos(d_jj_min, "pace:j_min_pair", natom);
+    MemKK::realloc_kokkos(d_corerep, "pace:corerep", natom); // per-atom corerep
 
     MemKK::realloc_kokkos(dB_flatten, "pace:dB_flatten", natom, idx_ms_combs_max, basis_set->rankmax);
 
-    //B-projections
+    // B-projections
     MemKK::realloc_kokkos(projections, "pace:projections", natom, total_num_functions_max); // per-atom B-projections
     MemKK::realloc_kokkos(d_gamma, "pace:gamma", natom); // per-atom gamma
   }
 
-  if (((int)ylm.extent(0) < natom) || ((int)ylm.extent(1) < maxneigh)) {
+  if (((int)fr.extent(0) < natom) || ((int)fr.extent(1) < maxneigh)) {
 
     // radial functions
-    MemKK::realloc_kokkos(fr, "pace:fr", natom, maxneigh, nradmax, lmax + 1);
-    MemKK::realloc_kokkos(dfr, "pace:dfr", natom, maxneigh, nradmax, lmax + 1);
+    MemKK::realloc_kokkos(fr, "pace:fr", natom, maxneigh, lmax + 1, nradmax);
+    MemKK::realloc_kokkos(dfr, "pace:dfr", natom, maxneigh, lmax + 1, nradmax);
     MemKK::realloc_kokkos(gr, "pace:gr", natom, maxneigh, nradbase);
     MemKK::realloc_kokkos(dgr, "pace:dgr", natom, maxneigh, nradbase);
     const int max_num_functions = MAX(nradbase, nradmax*(lmax + 1));
@@ -146,12 +151,6 @@ void PairPACEExtrapolationKokkos<DeviceType>::grow(int natom, int maxneigh)
     MemKK::realloc_kokkos(cr, "pace:cr", natom, maxneigh);
     MemKK::realloc_kokkos(dcr, "pace:dcr", natom, maxneigh);
 
-    // spherical harmonics
-    MemKK::realloc_kokkos(plm, "pace:plm", natom, maxneigh, (lmax + 1) * (lmax + 1));
-    MemKK::realloc_kokkos(dplm, "pace:dplm", natom, maxneigh, (lmax + 1) * (lmax + 1));
-    MemKK::realloc_kokkos(ylm, "pace:ylm", natom, maxneigh, (lmax + 1) * (lmax + 1));
-    MemKK::realloc_kokkos(dylm, "pace:dylm", natom, maxneigh, (lmax + 1) * (lmax + 1));
-
     // short neigh list
     MemKK::realloc_kokkos(d_ncount, "pace:ncount", natom);
     MemKK::realloc_kokkos(d_mu, "pace:mu", natom, maxneigh);
@@ -219,6 +218,23 @@ void PairPACEExtrapolationKokkos<DeviceType>::copy_pertype()
 
   Kokkos::deep_copy(d_wpre, h_wpre);
   Kokkos::deep_copy(d_mexp, h_mexp);
+
+  // ZBL core-rep
+  MemKK::realloc_kokkos(d_cut_in, "pace:d_cut_in", nelements, nelements);
+  MemKK::realloc_kokkos(d_dcut_in, "pace:d_dcut_in", nelements, nelements);
+  auto h_cut_in = Kokkos::create_mirror_view(d_cut_in);
+  auto h_dcut_in = Kokkos::create_mirror_view(d_dcut_in);
+
+  for (int mu_i = 0; mu_i < nelements; ++mu_i) {
+    for (int mu_j = 0; mu_j < nelements; ++mu_j) {
+      h_cut_in(mu_i,mu_j) = basis_set->map_bond_specifications.at({mu_i,mu_j}).rcut_in;
+      h_dcut_in(mu_i,mu_j) = basis_set->map_bond_specifications.at({mu_i,mu_j}).dcut_in;
+    }
+  }
+  Kokkos::deep_copy(d_cut_in, h_cut_in);
+  Kokkos::deep_copy(d_dcut_in, h_dcut_in);
+
+  is_zbl = basis_set->radial_functions->inner_cutoff_type == "zbl";
 }
 
 /* ---------------------------------------------------------------------- */
@@ -244,6 +260,9 @@ void PairPACEExtrapolationKokkos<DeviceType>::copy_splines()
 
   ACERadialFunctions* radial_functions = dynamic_cast<ACERadialFunctions*>(basis_set->radial_functions);
 
+  if (radial_functions == nullptr)
+    error->all(FLERR,"Chosen radial basis style not supported by pair style pace/kk");
+
   for (int i = 0; i < nelements; i++) {
     for (int j = 0; j < nelements; j++) {
       k_splines_gk.h_view(i, j) = radial_functions->splines_gk(i, j);
@@ -275,8 +294,9 @@ void PairPACEExtrapolationKokkos<DeviceType>::copy_tilde()
   total_num_functions_max = 0;
 
   MemKK::realloc_kokkos(d_idx_ms_combs_count, "pace:idx_ms_combs_count", nelements);
-  MemKK::realloc_kokkos(d_total_basis_size, "pace:total_basis_size", nelements);
   auto h_idx_ms_combs_count = Kokkos::create_mirror_view(d_idx_ms_combs_count);
+
+  MemKK::realloc_kokkos(d_total_basis_size, "pace:total_basis_size", nelements);
   auto h_total_basis_size = Kokkos::create_mirror_view(d_total_basis_size);
 
   for (int mu = 0; mu < nelements; mu++) {
@@ -291,8 +311,8 @@ void PairPACEExtrapolationKokkos<DeviceType>::copy_tilde()
       idx_ms_combs++;
 
     // rank > 1
-    for (int func_ind = 0; func_ind < total_basis_size; ++func_ind) {
-      ACEBBasisFunction *func = &basis[func_ind];
+    for (int idx_func = 0; idx_func < total_basis_size; ++idx_func) {
+      ACEBBasisFunction *func = &basis[idx_func];
 
       // loop over {ms} combinations in sum
       for (int ms_ind = 0; ms_ind < func->num_ms_combs; ++ms_ind)
@@ -309,7 +329,7 @@ void PairPACEExtrapolationKokkos<DeviceType>::copy_tilde()
 
   MemKK::realloc_kokkos(d_rank, "pace:rank", nelements, total_num_functions_max);
   MemKK::realloc_kokkos(d_num_ms_combs, "pace:num_ms_combs", nelements, total_num_functions_max);
-  MemKK::realloc_kokkos(d_func_inds, "pace:func_inds", nelements, idx_ms_combs_max);
+  MemKK::realloc_kokkos(d_idx_funcs, "pace:idx_funcs", nelements, idx_ms_combs_max);
   MemKK::realloc_kokkos(d_mus, "pace:mus", nelements, total_num_functions_max, basis_set->rankmax);
   MemKK::realloc_kokkos(d_ns, "pace:ns", nelements, total_num_functions_max, basis_set->rankmax);
   MemKK::realloc_kokkos(d_ls, "pace:ls", nelements, total_num_functions_max, basis_set->rankmax);
@@ -322,7 +342,7 @@ void PairPACEExtrapolationKokkos<DeviceType>::copy_tilde()
 
   auto h_rank = Kokkos::create_mirror_view(d_rank);
   auto h_num_ms_combs = Kokkos::create_mirror_view(d_num_ms_combs);
-  auto h_func_inds = Kokkos::create_mirror_view(d_func_inds);
+  auto h_idx_funcs = Kokkos::create_mirror_view(d_idx_funcs);
   auto h_mus = Kokkos::create_mirror_view(d_mus);
   auto h_ns = Kokkos::create_mirror_view(d_ns);
   auto h_ls = Kokkos::create_mirror_view(d_ls);
@@ -343,55 +363,52 @@ void PairPACEExtrapolationKokkos<DeviceType>::copy_tilde()
 
     const int ndensity = basis_set->map_embedding_specifications.at(mu).ndensity;
 
-    int idx_ms_comb = 0;
+    int idx_ms_combs = 0;
 
     // rank=1
-    for (int func_ind = 0; func_ind < total_basis_size_rank1; ++func_ind) {
-      ACEBBasisFunction *func = &basis_rank1[func_ind];
-      h_rank(mu, func_ind) = 1;
-      h_mus(mu, func_ind, 0) = func->mus[0];
-      h_ns(mu, func_ind, 0) = func->ns[0];
+    for (int idx_func = 0; idx_func < total_basis_size_rank1; ++idx_func) {
+      ACEBBasisFunction *func = &basis_rank1[idx_func];
+      h_rank(mu, idx_func) = 1;
+      h_mus(mu, idx_func, 0) = func->mus[0];
+      h_ns(mu, idx_func, 0) = func->ns[0];
 
       for (int p = 0; p < ndensity; ++p)
-            h_coeffs(mu, func_ind, p) = func->coeff[p];
+        h_coeffs(mu, idx_func, p) = func->coeff[p];
 
-      h_gen_cgs(mu, idx_ms_comb) = func->gen_cgs[0];
+      h_gen_cgs(mu, idx_ms_combs) = func->gen_cgs[0];
 
-      h_func_inds(mu, idx_ms_comb) = func_ind;
-      idx_ms_comb++;
+      h_idx_funcs(mu, idx_ms_combs) = idx_func;
+      idx_ms_combs++;
     }
 
     // rank > 1
-    for (int func_ind = 0; func_ind < total_basis_size; ++func_ind) {
-      ACEBBasisFunction *func = &basis[func_ind];
+    for (int idx_func = 0; idx_func < total_basis_size; ++idx_func) {
+      ACEBBasisFunction *func = &basis[idx_func];
       // TODO: check if func->ctildes are zero, then skip
 
-      const int func_ind_through = total_basis_size_rank1 + func_ind;
+      const int idx_func_through = total_basis_size_rank1 + idx_func;
 
-      const int rank = h_rank(mu, func_ind_through) = func->rank;
-      h_num_ms_combs(mu, func_ind_through) = func->num_ms_combs;
+      const int rank = h_rank(mu, idx_func_through) = func->rank;
+      h_num_ms_combs(mu, idx_func_through) = func->num_ms_combs;
       for (int t = 0; t < rank; t++) {
-        h_mus(mu, func_ind_through, t) = func->mus[t];
-        h_ns(mu, func_ind_through, t) = func->ns[t];
-        h_ls(mu, func_ind_through, t) = func->ls[t];
+        h_mus(mu, idx_func_through, t) = func->mus[t];
+        h_ns(mu, idx_func_through, t) = func->ns[t];
+        h_ls(mu, idx_func_through, t) = func->ls[t];
       }
 
       for (int p = 0; p < ndensity; ++p)
-        h_coeffs(mu, func_ind_through, p) = func->coeff[p];
-
+        h_coeffs(mu, idx_func_through, p) = func->coeff[p];
 
       // loop over {ms} combinations in sum
       for (int ms_ind = 0; ms_ind < func->num_ms_combs; ++ms_ind) {
         auto ms = &func->ms_combs[ms_ind * rank]; // current ms-combination (of length = rank)
         for (int t = 0; t < rank; t++)
-          h_ms_combs(mu, idx_ms_comb, t) = ms[t];
+          h_ms_combs(mu, idx_ms_combs, t) = ms[t];
 
+        h_gen_cgs(mu, idx_ms_combs) = func->gen_cgs[ms_ind];
 
-        h_gen_cgs(mu, idx_ms_comb) = func->gen_cgs[ms_ind];
-
-
-        h_func_inds(mu, idx_ms_comb) = func_ind_through;
-        idx_ms_comb++;
+        h_idx_funcs(mu, idx_ms_combs) = idx_func_through;
+        idx_ms_combs++;
       }
     }
 
@@ -405,7 +422,7 @@ void PairPACEExtrapolationKokkos<DeviceType>::copy_tilde()
 
   Kokkos::deep_copy(d_rank, h_rank);
   Kokkos::deep_copy(d_num_ms_combs, h_num_ms_combs);
-  Kokkos::deep_copy(d_func_inds, h_func_inds);
+  Kokkos::deep_copy(d_idx_funcs, h_idx_funcs);
   Kokkos::deep_copy(d_mus, h_mus);
   Kokkos::deep_copy(d_ns, h_ns);
   Kokkos::deep_copy(d_ls, h_ls);
@@ -455,6 +472,7 @@ void PairPACEExtrapolationKokkos<DeviceType>::init_style()
 
   // spherical harmonics
 
+  MemKK::realloc_kokkos(d_idx_sph, "pace:idx_sph", (lmax + 1) * (lmax + 1));
   MemKK::realloc_kokkos(alm, "pace:alm", (lmax + 1) * (lmax + 1));
   MemKK::realloc_kokkos(blm, "pace:blm", (lmax + 1) * (lmax + 1));
   MemKK::realloc_kokkos(cl, "pace:cl", lmax + 1);
@@ -553,6 +571,7 @@ void PairPACEExtrapolationKokkos<DeviceType>::compute(int eflag_in, int vflag_in
     atomKK->modified(Host,F_MASK);
     return;
   }
+
   eflag = eflag_in;
   vflag = vflag_in;
 
@@ -573,7 +592,7 @@ void PairPACEExtrapolationKokkos<DeviceType>::compute(int eflag_in, int vflag_in
     d_vatom = k_vatom.view<DeviceType>();
   }
 
-  if (gamma_flag && atom->nlocal > nmax) {
+  if (flag_compute_extrapolation_grade && atom->nlocal > nmax) {
         memory->destroy(extrapolation_grade_gamma);
         nmax = atom->nlocal;
         memory->create(extrapolation_grade_gamma, nmax, "pace/atom:gamma");
@@ -581,6 +600,14 @@ void PairPACEExtrapolationKokkos<DeviceType>::compute(int eflag_in, int vflag_in
         memset(extrapolation_grade_gamma, 0, nmax * sizeof(*extrapolation_grade_gamma));
   }
 
+  if (flag_corerep_factor && atom->nlocal > nmax_corerep) {
+    memory->destroy(corerep_factor);
+    nmax_corerep = atom->nlocal;
+    memory->create(corerep_factor, nmax_corerep, "pace/atom:corerep");
+    //zeroify array
+    memset(corerep_factor, 0, nmax_corerep * sizeof(*corerep_factor));
+  }
+
   copymode = 1;
   if (!force->newton_pair)
     error->all(FLERR,"PairPACEExtrapolationKokkos requires 'newton on'");
@@ -618,7 +645,6 @@ void PairPACEExtrapolationKokkos<DeviceType>::compute(int eflag_in, int vflag_in
   chunk_size = MIN(chunksize,inum); // "chunksize" variable is set by user
   chunk_offset = 0;
 
-
   grow(chunk_size, maxneigh);
 
   EV_FLOAT ev;
@@ -627,12 +653,15 @@ void PairPACEExtrapolationKokkos<DeviceType>::compute(int eflag_in, int vflag_in
 
     Kokkos::deep_copy(weights, 0.0);
     Kokkos::deep_copy(weights_rank1, 0.0);
-    Kokkos::deep_copy(A, 0.0);
+    Kokkos::deep_copy(A_sph, 0.0);
     Kokkos::deep_copy(A_rank1, 0.0);
     Kokkos::deep_copy(rhos, 0.0);
-
+    Kokkos::deep_copy(rho_core, 0.0);
+    Kokkos::deep_copy(d_d_min, PairPACEExtrapolation::aceimpl->basis_set->cutoffmax);
+    Kokkos::deep_copy(d_jj_min, -1);
     Kokkos::deep_copy(projections, 0.0);
     Kokkos::deep_copy(d_gamma, 0.0);
+    Kokkos::deep_copy(d_corerep, 0.0);
 
     EV_FLOAT ev_tmp;
 
@@ -659,15 +688,6 @@ void PairPACEExtrapolationKokkos<DeviceType>::compute(int eflag_in, int vflag_in
       Kokkos::parallel_for("ComputeRadial",policy_radial,*this);
     }
 
-    //ComputeYlm
-    {
-      int vector_length = vector_length_default;
-      int team_size = 16;
-      check_team_size_for<TagPairPACEComputeYlm>(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length);
-      typename Kokkos::TeamPolicy<DeviceType, TagPairPACEComputeYlm> policy_ylm(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length);
-      Kokkos::parallel_for("ComputeYlm",policy_ylm,*this);
-    }
-
     //ComputeAi
     {
       int vector_length = vector_length_default;
@@ -696,14 +716,14 @@ void PairPACEExtrapolationKokkos<DeviceType>::compute(int eflag_in, int vflag_in
     }
 
     //ComputeGamma
-    if (gamma_flag) {
+    if (flag_compute_extrapolation_grade) {
       typename Kokkos::RangePolicy<DeviceType,TagPairPACEComputeGamma> policy_gamma(0,chunk_size);
       Kokkos::parallel_for("ComputeGamma",policy_gamma,*this);
     }
 
     //ComputeWeights
     {
-      typename Kokkos::RangePolicy<DeviceType,TagPairPACEComputeWeights> policy_weights(0, chunk_size * idx_ms_combs_max);
+      typename Kokkos::RangePolicy<DeviceType,TagPairPACEComputeWeights> policy_weights(0,chunk_size * idx_ms_combs_max);
       Kokkos::parallel_for("ComputeWeights",policy_weights,*this);
     }
 
@@ -712,7 +732,7 @@ void PairPACEExtrapolationKokkos<DeviceType>::compute(int eflag_in, int vflag_in
       int vector_length = vector_length_default;
       int team_size = team_size_default;
       check_team_size_for<TagPairPACEComputeDerivative>(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length);
-      typename Kokkos::TeamPolicy<DeviceType, TagPairPACEComputeDerivative> policy_derivative(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length);
+      typename Kokkos::TeamPolicy<DeviceType,TagPairPACEComputeDerivative> policy_derivative(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length);
       Kokkos::parallel_for("ComputeDerivative",policy_derivative,*this);
     }
 
@@ -738,13 +758,20 @@ void PairPACEExtrapolationKokkos<DeviceType>::compute(int eflag_in, int vflag_in
     }
     ev += ev_tmp;
 
-    //if gamma_flag - copy current d_gamma to extrapolation_grade_gamma
-    if (gamma_flag){
+    // if flag_compute_extrapolation_grade - copy current d_gamma to extrapolation_grade_gamma
+
+    if (flag_compute_extrapolation_grade){
         h_gamma = Kokkos::create_mirror_view(d_gamma);
         Kokkos::deep_copy(h_gamma, d_gamma);
         memcpy(extrapolation_grade_gamma+chunk_offset, (void *) h_gamma.data(), sizeof(double)*chunk_size);
     }
 
+    if (flag_corerep_factor) {
+      h_corerep = Kokkos::create_mirror_view(d_corerep);
+      Kokkos::deep_copy(h_corerep,d_corerep);
+      memcpy(corerep_factor+chunk_offset, (void *) h_corerep.data(), sizeof(double)*chunk_size);
+    }
+
     chunk_offset += chunk_size;
   } // end while
 
@@ -799,6 +826,7 @@ void PairPACEExtrapolationKokkos<DeviceType>::operator() (TagPairPACEComputeNeig
   const X_FLOAT ytmp = x(i,1);
   const X_FLOAT ztmp = x(i,2);
   const int jnum = d_numneigh[i];
+  const int mu_i = d_map(type(i));
 
   // get a pointer to scratch memory
   // This is used to cache whether or not an atom is within the cutoff
@@ -858,6 +886,35 @@ void PairPACEExtrapolationKokkos<DeviceType>::operator() (TagPairPACEComputeNeig
     }
     offset++;
   });
+
+  if (is_zbl) {
+    //adapted from https://www.osti.gov/servlets/purl/1429450
+    if (ncount > 0) {
+      using minloc_value_type=Kokkos::MinLoc<F_FLOAT,int>::value_type;
+      minloc_value_type djjmin;
+      djjmin.val=1e20;
+      djjmin.loc=-1;
+      Kokkos::MinLoc<F_FLOAT,int> reducer_scalar(djjmin);
+      // loop over ncount (actual neighbours withing cutoff) rather than jnum (total number of neigh in cutoff+skin)
+      Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, ncount),
+               [&](const int offset, minloc_value_type &min_d_dist) {
+                 int j = d_nearest(ii,offset);
+                 j &= NEIGHMASK;
+                 auto r = d_rnorms(ii,offset);
+                 const int mu_j = d_map(type(j));
+                 const F_FLOAT d = r - (d_cut_in(mu_i, mu_j) - d_dcut_in(mu_i, mu_j));
+                 if (d < min_d_dist.val) {
+                   min_d_dist.val = d;
+                   min_d_dist.loc = offset;
+                 }
+       }, reducer_scalar);
+      d_d_min(ii) = djjmin.val;
+      d_jj_min(ii) = djjmin.loc;// d_jj_min should be NOT in 0..jnum range, but in 0..d_ncount(<=jnum)
+    } else {
+      d_d_min(ii) = 1e20;
+      d_jj_min(ii) = -1;
+    }
+  }
 }
 
 /* ---------------------------------------------------------------------- */
@@ -886,28 +943,6 @@ void PairPACEExtrapolationKokkos<DeviceType>::operator() (TagPairPACEComputeRadi
 
 /* ---------------------------------------------------------------------- */
 
-template<class DeviceType>
-KOKKOS_INLINE_FUNCTION
-void PairPACEExtrapolationKokkos<DeviceType>::operator() (TagPairPACEComputeYlm, const typename Kokkos::TeamPolicy<DeviceType, TagPairPACEComputeYlm>::member_type& team) const
-{
-  // Extract the atom number
-  int ii = team.team_rank() + team.team_size() * (team.league_rank() %
-           ((chunk_size+team.team_size()-1)/team.team_size()));
-  if (ii >= chunk_size) return;
-
-  // Extract the neighbor number
-  const int jj = team.league_rank() / ((chunk_size+team.team_size()-1)/team.team_size());
-  const int ncount = d_ncount(ii);
-  if (jj >= ncount) return;
-
-  const double xn = d_rhats(ii, jj, 0);
-  const double yn = d_rhats(ii, jj, 1);
-  const double zn = d_rhats(ii, jj, 2);
-  compute_ylm(ii,jj,xn,yn,zn,lmax);
-}
-
-/* ---------------------------------------------------------------------- */
-
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void PairPACEExtrapolationKokkos<DeviceType>::operator() (TagPairPACEComputeAi, const typename Kokkos::TeamPolicy<DeviceType, TagPairPACEComputeAi>::member_type& team) const
@@ -929,13 +964,127 @@ void PairPACEExtrapolationKokkos<DeviceType>::operator() (TagPairPACEComputeAi,
     Kokkos::atomic_add(&A_rank1(ii, mu_j, n), gr(ii, jj, n) * Y00);
 
   // rank > 1
-  for (int n = 0; n < nradmax; n++) {
-    for (int l = 0; l <= lmax; l++) {
-      for (int m = 0; m <= l; m++) {
-        const int idx = l * (l + 1) + m; // (l, m)
-        Kokkos::atomic_add(&A(ii, mu_j, n, idx).re, fr(ii, jj, n, l) * ylm(ii, jj, idx).re);
-        Kokkos::atomic_add(&A(ii, mu_j, n, idx).im, fr(ii, jj, n, l) * ylm(ii, jj, idx).im);
+
+  // Compute plm and ylm
+
+  // requires rx^2 + ry^2 + rz^2 = 1 , NO CHECKING IS PERFORMED !!!!!!!!!
+  // requires -1 <= rz <= 1 , NO CHECKING IS PERFORMED !!!!!!!!!
+  // prefactors include 1/sqrt(2) factor compared to reference
+
+  complex ylm, phase;
+  complex phasem, mphasem1;
+  complex dyx, dyy, dyz;
+  complex rdy;
+
+  const double rx = d_rhats(ii, jj, 0);
+  const double ry = d_rhats(ii, jj, 1);
+  const double rz = d_rhats(ii, jj, 2);
+
+  phase.re = rx;
+  phase.im = ry;
+
+  double plm_idx,plm_idx1,plm_idx2;
+
+  plm_idx = plm_idx1 = plm_idx2 = 0.0;
+
+  int idx_sph = 0;
+
+  // m = 0
+  for (int l = 0; l <= lmax; l++) {
+    // const int idx = l * (l + 1);
+
+    if (l == 0) {
+      // l=0, m=0
+      // plm[0] = Y00/sq1o4pi; //= sq1o4pi;
+      plm_idx = Y00; //= 1;
+    } else if (l == 1) {
+      // l=1, m=0
+      plm_idx = Y00 * sq3 * rz;
+    } else {
+      // l>=2, m=0
+      plm_idx = alm(idx_sph) * (rz * plm_idx1 + blm(idx_sph) * plm_idx2);
+    }
+
+    ylm.re = plm_idx;
+    ylm.im = 0.0;
+
+    for (int n = 0; n < nradmax; n++) {
+      Kokkos::atomic_add(&A_sph(ii, mu_j, idx_sph, n).re, fr(ii, jj, l, n) * ylm.re);
+      Kokkos::atomic_add(&A_sph(ii, mu_j, idx_sph, n).im, fr(ii, jj, l, n) * ylm.im);
+    }
+
+    plm_idx2 = plm_idx1;
+    plm_idx1 = plm_idx;
+
+    idx_sph++;
+  }
+
+  plm_idx = plm_idx1 = plm_idx2 = 0.0;
+
+  // m = 1
+  for (int l = 1; l <= lmax; l++) {
+    // const int idx = l * (l + 1) + 1; // (l, 1)
+
+    if (l == 1) {
+      // l=1, m=1
+      plm_idx = -sq3o2 * Y00;
+    } else if (l == 2) {
+      const double t = dl(l) * plm_idx1;
+      plm_idx = t * rz;
+    } else {
+      plm_idx = alm(idx_sph) * (rz * plm_idx1 + blm(idx_sph) * plm_idx2);
+    }
+
+    ylm = phase * plm_idx;
+
+    for (int n = 0; n < nradmax; n++) {
+      Kokkos::atomic_add(&A_sph(ii, mu_j, idx_sph, n).re, fr(ii, jj, l, n) * ylm.re);
+      Kokkos::atomic_add(&A_sph(ii, mu_j, idx_sph, n).im, fr(ii, jj, l, n) * ylm.im);
+    }
+
+    plm_idx2 = plm_idx1;
+    plm_idx1 = plm_idx;
+
+    idx_sph++;
+  }
+
+  plm_idx = plm_idx1 = plm_idx2 = 0.0;
+
+  double plm_mm1_mm1 = -sq3o2 * Y00; // (1, 1)
+
+  // m > 1
+  phasem = phase;
+  for (int m = 2; m <= lmax; m++) {
+
+    mphasem1.re = phasem.re * double(m);
+    mphasem1.im = phasem.im * double(m);
+    phasem = phasem * phase;
+
+    for (int l = m; l <= lmax; l++) {
+      // const int idx = l * (l + 1) + m;
+
+      if (l == m) {
+        plm_idx = cl(l) * plm_mm1_mm1; // (m+1, m)
+        plm_mm1_mm1 = plm_idx;
+      } else if (l == (m + 1)) {
+        const double t = dl(l) * plm_mm1_mm1; // (m - 1, m - 1)
+        plm_idx = t * rz; // (m, m)
+      } else {
+        plm_idx = alm(idx_sph) * (rz * plm_idx1 + blm(idx_sph) * plm_idx2);
       }
+
+      ylm.re = phasem.re * plm_idx;
+      ylm.im = phasem.im * plm_idx;
+
+      for (int n = 0; n < nradmax; n++) {
+        Kokkos::atomic_add(&A_sph(ii, mu_j, idx_sph, n).re, fr(ii, jj, l, n) * ylm.re);
+        Kokkos::atomic_add(&A_sph(ii, mu_j, idx_sph, n).im, fr(ii, jj, l, n) * ylm.im);
+      }
+
+      plm_idx2 = plm_idx1;
+      plm_idx1 = plm_idx;
+
+      idx_sph++;
     }
   }
 
@@ -949,17 +1098,35 @@ template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void PairPACEExtrapolationKokkos<DeviceType>::operator() (TagPairPACEConjugateAi, const int& ii) const
 {
-  //complex conjugate A's (for NEGATIVE (-m) terms)
-  // for rank > 1
   for (int mu_j = 0; mu_j < nelements; mu_j++) {
-    for (int n = 0; n < nradmax; n++) {
-      for (int l = 0; l <= lmax; l++) {
+
+    // transpose
+
+    int idx_sph = 0;
+
+    for (int m = 0; m <= lmax; m++) {
+      for (int l = m; l <= lmax; l++) {
+        const int idx = l * (l + 1) + m;
+        for (int n = 0; n < nradmax; n++) {
+          A(ii, mu_j, idx, n) = A_sph(ii, mu_j, idx_sph, n);
+        }
+
+        idx_sph++;
+      }
+    }
+
+    // complex conjugate A's (for NEGATIVE (-m) terms)
+    //  for rank > 1
+
+    for (int l = 0; l <= lmax; l++) {
         //fill in -m part in the outer loop using the same m <-> -m symmetry as for Ylm
-        for (int m = 1; m <= l; m++) {
-          const int idx = l * (l + 1) + m; // (l, m)
-          const int idxm = l * (l + 1) - m; // (l, -m)
-          const int factor = m % 2 == 0 ? 1 : -1;
-          A(ii, mu_j, n, idxm) = A(ii, mu_j, n, idx).conj() * (double)factor;
+      for (int m = 1; m <= l; m++) {
+        const int idx = l * (l + 1) + m; // (l, m)
+        const int idxm = l * (l + 1) - m; // (l, -m)
+        const int idx_sph = d_idx_sph(idx);
+        const int factor = m % 2 == 0 ? 1 : -1;
+        for (int n = 0; n < nradmax; n++) {
+          A(ii, mu_j, idxm, n) = A_sph(ii, mu_j, idx_sph, n).conj() * (double)factor;
         }
       }
     }
@@ -972,73 +1139,72 @@ template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void PairPACEExtrapolationKokkos<DeviceType>::operator() (TagPairPACEComputeRho, const int& iter) const
 {
-  const int idx_ms_comb = iter / chunk_size;
+  const int idx_ms_combs = iter / chunk_size;
   const int ii = iter % chunk_size;
 
   const int i = d_ilist[ii + chunk_offset];
   const int mu_i = d_map(type(i));
 
-  if (idx_ms_comb >= d_idx_ms_combs_count(mu_i)) return;
+  if (idx_ms_combs >= d_idx_ms_combs_count(mu_i)) return;
 
   const int ndensity = d_ndensity(mu_i);
 
-  const int func_ind = d_func_inds(mu_i, idx_ms_comb);
-  const int rank = d_rank(mu_i, func_ind);
+  const int idx_func = d_idx_funcs(mu_i, idx_ms_combs);
+  const int rank = d_rank(mu_i, idx_func);
   const int r = rank - 1;
 
   // Basis functions B with iterative product and density rho(p) calculation
   if (rank == 1) {
-    const int mu = d_mus(mu_i, func_ind, 0);
-    const int n = d_ns(mu_i, func_ind, 0);
+    const int mu = d_mus(mu_i, idx_func, 0);
+    const int n = d_ns(mu_i, idx_func, 0);
     double A_cur = A_rank1(ii, mu, n - 1);
     for (int p = 0; p < ndensity; ++p) {
       //for rank=1 (r=0) only 1 ms-combination exists (ms_ind=0), so index of func.ctildes is 0..ndensity-1
-      Kokkos::atomic_add(&rhos(ii, p), d_coeffs(mu_i, func_ind, p) * d_gen_cgs(mu_i, idx_ms_comb) * A_cur);
+      Kokkos::atomic_add(&rhos(ii, p), d_coeffs(mu_i, idx_func, p) * d_gen_cgs(mu_i, idx_ms_combs) * A_cur);
     }
 
-
-    //gamma_i
-    if (gamma_flag)
-        Kokkos::atomic_add(&projections(ii, func_ind),  d_gen_cgs(mu_i, idx_ms_comb) * A_cur);
+    // gamma_i
+    if (flag_compute_extrapolation_grade)
+      Kokkos::atomic_add(&projections(ii, idx_func),  d_gen_cgs(mu_i, idx_ms_combs) * A_cur);
 
   } else { // rank > 1
     // loop over {ms} combinations in sum
 
     // loop over m, collect B  = product of A with given ms
-    A_forward_prod(ii, idx_ms_comb, 0) = complex::one();
+    A_forward_prod(ii, idx_ms_combs, 0) = complex::one();
 
     // fill forward A-product triangle
     for (int t = 0; t < rank; t++) {
       //TODO: optimize ns[t]-1 -> ns[t] during functions construction
-      const int mu = d_mus(mu_i, func_ind, t);
-      const int n = d_ns(mu_i, func_ind, t);
-      const int l = d_ls(mu_i, func_ind, t);
-      const int m = d_ms_combs(mu_i, idx_ms_comb, t); // current ms-combination (of length = rank)
+      const int mu = d_mus(mu_i, idx_func, t);
+      const int n = d_ns(mu_i, idx_func, t);
+      const int l = d_ls(mu_i, idx_func, t);
+      const int m = d_ms_combs(mu_i, idx_ms_combs, t); // current ms-combination (of length = rank)
       const int idx = l * (l + 1) + m; // (l, m)
-      A_list(ii, idx_ms_comb, t) = A(ii, mu, n - 1, idx);
-      A_forward_prod(ii, idx_ms_comb, t + 1) = A_forward_prod(ii, idx_ms_comb, t) * A_list(ii, idx_ms_comb, t);
+      A_list(ii, idx_ms_combs, t) = A(ii, mu, idx, n - 1);
+      A_forward_prod(ii, idx_ms_combs, t + 1) = A_forward_prod(ii, idx_ms_combs, t) * A_list(ii, idx_ms_combs, t);
     }
 
     complex A_backward_prod = complex::one();
 
     // fill backward A-product triangle
     for (int t = r; t >= 1; t--) {
-      const complex dB = A_forward_prod(ii, idx_ms_comb, t) * A_backward_prod; // dB - product of all A's except t-th
-      dB_flatten(ii, idx_ms_comb, t) = dB;
+      const complex dB = A_forward_prod(ii, idx_ms_combs, t) * A_backward_prod; // dB - product of all A's except t-th
+      dB_flatten(ii, idx_ms_combs, t) = dB;
 
-      A_backward_prod = A_backward_prod * A_list(ii, idx_ms_comb, t);
+      A_backward_prod = A_backward_prod * A_list(ii, idx_ms_combs, t);
     }
-    dB_flatten(ii, idx_ms_comb, 0) = A_forward_prod(ii, idx_ms_comb, 0) * A_backward_prod;
+    dB_flatten(ii, idx_ms_combs, 0) = A_forward_prod(ii, idx_ms_combs, 0) * A_backward_prod;
 
-    const complex B = A_forward_prod(ii, idx_ms_comb, rank);
+    const complex B = A_forward_prod(ii, idx_ms_combs, rank);
 
     for (int p = 0; p < ndensity; ++p) {
       // real-part only multiplication
-      Kokkos::atomic_add(&rhos(ii, p), B.real_part_product(d_coeffs(mu_i, func_ind, p) * d_gen_cgs(mu_i, idx_ms_comb)));
+      Kokkos::atomic_add(&rhos(ii, p), B.real_part_product(d_coeffs(mu_i, idx_func, p) * d_gen_cgs(mu_i, idx_ms_combs)));
     }
-    //gamma_i
-    if (gamma_flag)
-      Kokkos::atomic_add(&projections(ii, func_ind),  B.real_part_product(d_gen_cgs(mu_i, idx_ms_comb)));
+    // gamma_i
+    if (flag_compute_extrapolation_grade)
+      Kokkos::atomic_add(&projections(ii, idx_func),  B.real_part_product(d_gen_cgs(mu_i, idx_ms_combs)));
   }
 }
 
@@ -1056,23 +1222,43 @@ void PairPACEExtrapolationKokkos<DeviceType>::operator() (TagPairPACEComputeFS,
   const int ndensity = d_ndensity(mu_i);
 
   double evdwl, fcut, dfcut;
+  double evdwl_cut;
   evdwl = fcut = dfcut = 0.0;
 
-  inner_cutoff(rho_core(ii), rho_cut, drho_cut, fcut, dfcut);
   FS_values_and_derivatives(ii, evdwl, mu_i);
 
-  dF_drho_core(ii) = evdwl * dfcut + 1;
+  if (is_zbl) {
+    if (d_jj_min(ii) != -1) {
+      const int mu_jmin = d_mu(ii,d_jj_min(ii));
+      F_FLOAT dcutin = d_dcut_in(mu_i, mu_jmin);
+      F_FLOAT transition_coordinate =  dcutin  - d_d_min(ii); // == cutin - r_min
+      cutoff_func_poly(transition_coordinate, dcutin, dcutin, fcut, dfcut);
+      dfcut = -dfcut; // invert, because rho_core = cutin - r_min
+    } else {
+      // no neighbours
+      fcut = 1;
+      dfcut = 0;
+    }
+    evdwl_cut = evdwl * fcut + rho_core(ii) * (1 - fcut); // evdwl * fcut + rho_core_uncut  - rho_core_uncut* fcut
+    dF_drho_core(ii) = 1 - fcut;
+    dF_dfcut(ii) = evdwl * dfcut - rho_core(ii) * dfcut;
+  } else {
+    inner_cutoff(rho_core(ii), rho_cut, drho_cut, fcut, dfcut);
+    dF_drho_core(ii) = evdwl * dfcut + 1;
+    evdwl_cut = evdwl * fcut + rho_core(ii);
+  }
   for (int p = 0; p < ndensity; ++p)
     dF_drho(ii, p) *= fcut;
 
-
   // tally energy contribution
   if (eflag) {
-    double evdwl_cut = evdwl * fcut + rho_core(ii);
     // E0 shift
     evdwl_cut += d_E0vals(mu_i);
     e_atom(ii) = evdwl_cut;
   }
+
+  if (flag_corerep_factor)
+    d_corerep(ii) = 1-fcut;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -1109,52 +1295,58 @@ template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void PairPACEExtrapolationKokkos<DeviceType>::operator() (TagPairPACEComputeWeights, const int& iter) const
 {
-  const int idx_ms_comb = iter / chunk_size;
+  const int idx_ms_combs = iter / chunk_size;
   const int ii = iter % chunk_size;
 
   const int i = d_ilist[ii + chunk_offset];
   const int mu_i = d_map(type(i));
 
-  if (idx_ms_comb >= d_idx_ms_combs_count(mu_i)) return;
+  if (idx_ms_combs >= d_idx_ms_combs_count(mu_i)) return;
 
   const int ndensity = d_ndensity(mu_i);
 
-  const int func_ind = d_func_inds(mu_i, idx_ms_comb);
-  const int rank = d_rank(mu_i, func_ind);
+  const int idx_func = d_idx_funcs(mu_i, idx_ms_combs);
+  const int rank = d_rank(mu_i, idx_func);
 
   // Weights and theta calculation
 
   if (rank == 1) {
-    const int mu = d_mus(mu_i, func_ind, 0);
-    const int n = d_ns(mu_i, func_ind, 0);
+    const int mu = d_mus(mu_i, idx_func, 0);
+    const int n = d_ns(mu_i, idx_func, 0);
     double theta = 0.0;
     for (int p = 0; p < ndensity; ++p) {
       // for rank=1 (r=0) only 1 ms-combination exists (ms_ind=0), so index of func.ctildes is 0..ndensity-1
-      theta += dF_drho(ii, p) * d_coeffs(mu_i, func_ind, p) * d_gen_cgs(mu_i, idx_ms_comb);
+      theta += dF_drho(ii, p) * d_coeffs(mu_i, idx_func, p) * d_gen_cgs(mu_i, idx_ms_combs);
     }
     Kokkos::atomic_add(&weights_rank1(ii, mu, n - 1), theta);
   } else { // rank > 1
     double theta = 0.0;
     for (int p = 0; p < ndensity; ++p)
-      theta += dF_drho(ii, p) * d_coeffs(mu_i, func_ind, p) * d_gen_cgs(mu_i, idx_ms_comb);
+      theta += dF_drho(ii, p) * d_coeffs(mu_i, idx_func, p) * d_gen_cgs(mu_i, idx_ms_combs);
 
     theta *= 0.5; // 0.5 factor due to possible double counting ???
     for (int t = 0; t < rank; ++t) {
-      const int m_t = d_ms_combs(mu_i, idx_ms_comb, t);
+      const int m_t = d_ms_combs(mu_i, idx_ms_combs, t);
       const int factor = (m_t % 2 == 0 ? 1 : -1);
-      const complex dB = dB_flatten(ii, idx_ms_comb, t);
-      const int mu_t = d_mus(mu_i, func_ind, t);
-      const int n_t = d_ns(mu_i, func_ind, t);
-      const int l_t = d_ls(mu_i, func_ind, t);
+      const complex dB = dB_flatten(ii, idx_ms_combs, t);
+      const int mu_t = d_mus(mu_i, idx_func, t);
+      const int n_t = d_ns(mu_i, idx_func, t);
+      const int l_t = d_ls(mu_i, idx_func, t);
       const int idx = l_t * (l_t + 1) + m_t; // (l, m)
-      const complex value = theta * dB;
-      Kokkos::atomic_add(&(weights(ii, mu_t, n_t - 1, idx).re), value.re);
-      Kokkos::atomic_add(&(weights(ii, mu_t, n_t - 1, idx).im), value.im);
+      const int idx_sph = d_idx_sph(idx);
+      if (idx_sph >= 0) {
+        const complex value = theta * dB;
+        Kokkos::atomic_add(&(weights(ii, mu_t, idx_sph, n_t - 1).re), value.re);
+        Kokkos::atomic_add(&(weights(ii, mu_t, idx_sph, n_t - 1).im), value.im);
+      }
       // update -m_t (that could also be positive), because the basis is half_basis
       const int idxm = l_t * (l_t + 1) - m_t; // (l, -m)
-      const complex valuem = theta * dB.conj() * (double)factor;
-      Kokkos::atomic_add(&(weights(ii, mu_t, n_t - 1, idxm).re), valuem.re);
-      Kokkos::atomic_add(&(weights(ii, mu_t, n_t - 1, idxm).im), valuem.im);
+      const int idxm_sph = d_idx_sph(idxm);
+      if (idxm_sph >= 0) {
+        const complex valuem = theta * dB.conj() * (double)factor;
+        Kokkos::atomic_add(&(weights(ii, mu_t, idxm_sph, n_t - 1).re), valuem.re);
+        Kokkos::atomic_add(&(weights(ii, mu_t, idxm_sph, n_t - 1).im), valuem.im);
+      }
     }
   }
 }
@@ -1201,37 +1393,239 @@ void PairPACEExtrapolationKokkos<DeviceType>::operator() (TagPairPACEComputeDeri
   }
 
   // for rank > 1
-  for (int n = 0; n < nradmax; n++) {
-    for (int l = 0; l <= lmax; l++) {
-      const double R_over_r = fr(ii, jj, n, l) * rinv;
-      const double DR = dfr(ii, jj, n, l);
 
-      // for m >= 0
-      for (int m = 0; m <= l; m++) {
-        const int idx = l * (l + 1) + m; // (l, m)
-        complex w = weights(ii, mu_j, n, idx);
+  // compute plm, dplm, ylm and dylm
+  // requires rx^2 + ry^2 + rz^2 = 1 , NO CHECKING IS PERFORMED !!!!!!!!!
+  // requires -1 <= rz <= 1 , NO CHECKING IS PERFORMED !!!!!!!!!
+  // prefactors include 1/sqrt(2) factor compared to reference
+
+  complex ylm,dylm[3];
+  complex phase;
+  complex phasem, mphasem1;
+  complex dyx, dyy, dyz;
+  complex rdy;
+
+  const double rx = d_rhats(ii, jj, 0);
+  const double ry = d_rhats(ii, jj, 1);
+  const double rz = d_rhats(ii, jj, 2);
+
+  phase.re = rx;
+  phase.im = ry;
+
+  double plm_idx,plm_idx1,plm_idx2;
+  double dplm_idx,dplm_idx1,dplm_idx2;
+
+  plm_idx = plm_idx1 = plm_idx2 = 0.0;
+  dplm_idx = dplm_idx1 = dplm_idx2 = 0.0;
+
+  int idx_sph = 0;
+
+  // m = 0
+  for (int l = 0; l <= lmax; l++) {
+    // const int idx = l * (l + 1);
+
+    if (l == 0) {
+      // l=0, m=0
+      // plm[0] = Y00/sq1o4pi; //= sq1o4pi;
+      plm_idx = Y00; //= 1;
+      dplm_idx = 0.0;
+    } else if (l == 1) {
+      // l=1, m=0
+      plm_idx = Y00 * sq3 * rz;
+      dplm_idx = Y00 * sq3;
+    } else {
+      // l>=2, m=0
+      plm_idx = alm(idx_sph) * (rz * plm_idx1 + blm(idx_sph) * plm_idx2);
+      dplm_idx = alm(idx_sph) * (plm_idx1 + rz * dplm_idx1 + blm(idx_sph) * dplm_idx2);
+    }
+
+    ylm.re = plm_idx;
+    ylm.im = 0.0;
+
+    dyz.re = dplm_idx;
+    rdy.re = dyz.re * rz;
+
+    dylm[0].re = -rdy.re * rx;
+    dylm[0].im = 0.0;
+    dylm[1].re = -rdy.re * ry;
+    dylm[1].im = 0.0;
+    dylm[2].re = dyz.re - rdy.re * rz;
+    dylm[2].im = 0;
+
+    for (int n = 0; n < nradmax; n++) {
+
+      const double R_over_r = fr(ii, jj, l, n) * rinv;
+      const double DR = dfr(ii, jj, l, n);
+      const complex Y_DR = ylm * DR;
+
+      complex w = weights(ii, mu_j, idx_sph, n);
+      if (w.re == 0.0 && w.im == 0.0) continue;
+
+      complex grad_phi_nlm[3];
+      grad_phi_nlm[0] = Y_DR * r_hat[0] + dylm[0] * R_over_r;
+      grad_phi_nlm[1] = Y_DR * r_hat[1] + dylm[1] * R_over_r;
+      grad_phi_nlm[2] = Y_DR * r_hat[2] + dylm[2] * R_over_r;
+      // real-part multiplication only
+      f_ji[0] += w.real_part_product(grad_phi_nlm[0]);
+      f_ji[1] += w.real_part_product(grad_phi_nlm[1]);
+      f_ji[2] += w.real_part_product(grad_phi_nlm[2]);
+    }
+
+    plm_idx2 = plm_idx1;
+    dplm_idx2 = dplm_idx1;
+
+    plm_idx1 = plm_idx;
+    dplm_idx1 = dplm_idx;
+
+    idx_sph++;
+  }
+
+  plm_idx = plm_idx1 = plm_idx2 = 0.0;
+  dplm_idx = dplm_idx1 = dplm_idx2 = 0.0;
+
+  // m = 1
+  for (int l = 1; l <= lmax; l++) {
+    // const int idx = l * (l + 1) + 1; // (l, 1)
+
+    if (l == 1) {
+      // l=1, m=1
+      plm_idx = -sq3o2 * Y00;
+      dplm_idx = 0.0;
+    } else if (l == 2) {
+      const double t = dl(l) * plm_idx1;
+      plm_idx = t * rz;
+      dplm_idx = t;
+    } else {
+      plm_idx = alm(idx_sph) * (rz * plm_idx1 + blm(idx_sph) * plm_idx2);
+      dplm_idx = alm(idx_sph) * (plm_idx1 + rz * dplm_idx1 + blm(idx_sph) * dplm_idx2);
+    }
+
+    ylm = phase * plm_idx;
+
+    dyx.re = plm_idx;
+    dyx.im = 0.0;
+    dyy.re = 0.0;
+    dyy.im = plm_idx;
+    dyz.re = phase.re * dplm_idx;
+    dyz.im = phase.im * dplm_idx;
+
+    rdy.re = rx * dyx.re + +rz * dyz.re;
+    rdy.im = ry * dyy.im + rz * dyz.im;
+
+    dylm[0].re = dyx.re - rdy.re * rx;
+    dylm[0].im = -rdy.im * rx;
+    dylm[1].re = -rdy.re * ry;
+    dylm[1].im = dyy.im - rdy.im * ry;
+    dylm[2].re = dyz.re - rdy.re * rz;
+    dylm[2].im = dyz.im - rdy.im * rz;
+
+    for (int n = 0; n < nradmax; n++) {
+
+      const double R_over_r = fr(ii, jj, l, n) * rinv;
+      const double DR = dfr(ii, jj, l, n);
+      const complex Y_DR = ylm * DR;
+
+      complex w = weights(ii, mu_j, idx_sph, n);
+      if (w.re == 0.0 && w.im == 0.0) continue;
+      // counting for -m cases if m > 0
+      w.re *= 2.0;
+      w.im *= 2.0;
+
+      complex grad_phi_nlm[3];
+      grad_phi_nlm[0] = Y_DR * r_hat[0] + dylm[0] * R_over_r;
+      grad_phi_nlm[1] = Y_DR * r_hat[1] + dylm[1] * R_over_r;
+      grad_phi_nlm[2] = Y_DR * r_hat[2] + dylm[2] * R_over_r;
+      // real-part multiplication only
+      f_ji[0] += w.real_part_product(grad_phi_nlm[0]);
+      f_ji[1] += w.real_part_product(grad_phi_nlm[1]);
+      f_ji[2] += w.real_part_product(grad_phi_nlm[2]);
+    }
+
+    plm_idx2 = plm_idx1;
+    dplm_idx2 = dplm_idx1;
+
+    plm_idx1 = plm_idx;
+    dplm_idx1 = dplm_idx;
+
+    idx_sph++;
+  }
+
+  plm_idx = plm_idx1 = plm_idx2 = 0.0;
+  dplm_idx = dplm_idx1 = dplm_idx2 = 0.0;
+
+  double plm_mm1_mm1 = -sq3o2 * Y00; // (1, 1)
+
+  // m > 1
+  phasem = phase;
+  for (int m = 2; m <= lmax; m++) {
+
+    mphasem1.re = phasem.re * double(m);
+    mphasem1.im = phasem.im * double(m);
+    phasem = phasem * phase;
+
+    for (int l = m; l <= lmax; l++) {
+      // const int idx = l * (l + 1) + m;
+
+      if (l == m) {
+        plm_idx = cl(l) * plm_mm1_mm1; // (m+1, m)
+        dplm_idx = 0.0;
+        plm_mm1_mm1 = plm_idx;
+      } else if (l == (m + 1)) {
+        const double t = dl(l) * plm_mm1_mm1; // (m - 1, m - 1)
+        plm_idx = t * rz; // (m, m)
+        dplm_idx = t;
+      } else {
+        plm_idx = alm(idx_sph) * (rz * plm_idx1 + blm(idx_sph) * plm_idx2);
+        dplm_idx = alm(idx_sph) * (plm_idx1 + rz * dplm_idx1 + blm(idx_sph) * dplm_idx2);
+      }
+
+      ylm.re = phasem.re * plm_idx;
+      ylm.im = phasem.im * plm_idx;
+
+      dyx = mphasem1 * plm_idx;
+      dyy.re = -dyx.im;
+      dyy.im = dyx.re;
+      dyz = phasem * dplm_idx;
+
+      rdy.re = rx * dyx.re + ry * dyy.re + rz * dyz.re;
+      rdy.im = rx * dyx.im + ry * dyy.im + rz * dyz.im;
+
+      dylm[0].re = dyx.re - rdy.re * rx;
+      dylm[0].im = dyx.im - rdy.im * rx;
+      dylm[1].re = dyy.re - rdy.re * ry;
+      dylm[1].im = dyy.im - rdy.im * ry;
+      dylm[2].re = dyz.re - rdy.re * rz;
+      dylm[2].im = dyz.im - rdy.im * rz;
+
+      for (int n = 0; n < nradmax; n++) {
+
+        const double R_over_r = fr(ii, jj, l, n) * rinv;
+        const double DR = dfr(ii, jj, l, n);
+        const complex Y_DR = ylm * DR;
+
+        complex w = weights(ii, mu_j, idx_sph, n);
         if (w.re == 0.0 && w.im == 0.0) continue;
         // counting for -m cases if m > 0
-        if (m > 0) {
-          w.re *= 2.0;
-          w.im *= 2.0;
-        }
-
-        complex DY[3];
-        DY[0] = dylm(ii, jj, idx, 0);
-        DY[1] = dylm(ii, jj, idx, 1);
-        DY[2] = dylm(ii, jj, idx, 2);
-        const complex Y_DR = ylm(ii, jj, idx) * DR;
+        w.re *= 2.0;
+        w.im *= 2.0;
 
         complex grad_phi_nlm[3];
-        grad_phi_nlm[0] = Y_DR * r_hat[0] + DY[0] * R_over_r;
-        grad_phi_nlm[1] = Y_DR * r_hat[1] + DY[1] * R_over_r;
-        grad_phi_nlm[2] = Y_DR * r_hat[2] + DY[2] * R_over_r;
+        grad_phi_nlm[0] = Y_DR * r_hat[0] + dylm[0] * R_over_r;
+        grad_phi_nlm[1] = Y_DR * r_hat[1] + dylm[1] * R_over_r;
+        grad_phi_nlm[2] = Y_DR * r_hat[2] + dylm[2] * R_over_r;
         // real-part multiplication only
         f_ji[0] += w.real_part_product(grad_phi_nlm[0]);
         f_ji[1] += w.real_part_product(grad_phi_nlm[1]);
         f_ji[2] += w.real_part_product(grad_phi_nlm[2]);
       }
+
+      plm_idx2 = plm_idx1;
+      dplm_idx2 = dplm_idx1;
+
+      plm_idx1 = plm_idx;
+      dplm_idx1 = dplm_idx;
+
+      idx_sph++;
     }
   }
 
@@ -1240,6 +1634,15 @@ void PairPACEExtrapolationKokkos<DeviceType>::operator() (TagPairPACEComputeDeri
   f_ij(ii, jj, 0) = scale * f_ji[0] + fpair * r_hat[0];
   f_ij(ii, jj, 1) = scale * f_ji[1] + fpair * r_hat[1];
   f_ij(ii, jj, 2) = scale * f_ji[2] + fpair * r_hat[2];
+
+  if (is_zbl) {
+    if (jj==d_jj_min(ii)) {
+      // DCRU = 1.0
+      f_ij(ii, jj, 0) += dF_dfcut(ii) * r_hat[0];
+      f_ij(ii, jj, 1) += dF_dfcut(ii) * r_hat[1];
+      f_ij(ii, jj, 2) += dF_dfcut(ii) * r_hat[2];
+    }
+  }
 }
 
 /* ---------------------------------------------------------------------- */
@@ -1360,31 +1763,46 @@ void PairPACEExtrapolationKokkos<DeviceType>::v_tally_xyz(EV_FLOAT &ev, const in
 template<class DeviceType>
 void PairPACEExtrapolationKokkos<DeviceType>::pre_compute_harmonics(int lmax)
 {
+  auto h_idx_sph = Kokkos::create_mirror_view(d_idx_sph);
   auto h_alm = Kokkos::create_mirror_view(alm);
   auto h_blm = Kokkos::create_mirror_view(blm);
   auto h_cl = Kokkos::create_mirror_view(cl);
   auto h_dl = Kokkos::create_mirror_view(dl);
 
-  for (int l = 1; l <= lmax; l++) {
-    const double lsq = l * l;
-    const double ld = 2 * l;
-    const double l1 = (4 * lsq - 1);
-    const double l2 = lsq - ld + 1;
-    for (int m = 0; m < l - 1; m++) {
-      const double msq = m * m;
-      const double a = sqrt((double(l1)) / (double(lsq - msq)));
-      const double b = -sqrt((double(l2 - msq)) / (double(4 * l2 - 1)));
+  Kokkos::deep_copy(h_idx_sph,-1);
+
+  int idx_sph = 0;
+  for (int m = 0; m <= lmax; m++) {
+    const double msq = m * m;
+    for (int l = m; l <= lmax; l++) {
       const int idx = l * (l + 1) + m; // (l, m)
-      h_alm(idx) = a;
-      h_blm(idx) = b;
+      h_idx_sph(idx) = idx_sph;
+
+      double a = 0.0;
+      double b = 0.0;
+
+      if (l > 1 && l != m) {
+        const double lsq = l * l;
+        const double ld = 2 * l;
+        const double l1 = (4 * lsq - 1);
+        const double l2 = lsq - ld + 1;
+
+        a = sqrt((double(l1)) / (double(lsq - msq)));
+        b = -sqrt((double(l2 - msq)) / (double(4 * l2 - 1)));
+      }
+      h_alm(idx_sph) = a;
+      h_blm(idx_sph) = b;
+      idx_sph++;
     }
   }
+  idx_sph_max = idx_sph;
 
   for (int l = 1; l <= lmax; l++) {
     h_cl(l) = -sqrt(1.0 + 0.5 / (double(l)));
     h_dl(l) = sqrt(double(2 * (l - 1) + 3));
   }
 
+  Kokkos::deep_copy(d_idx_sph, h_idx_sph);
   Kokkos::deep_copy(alm, h_alm);
   Kokkos::deep_copy(blm, h_blm);
   Kokkos::deep_copy(cl, h_cl);
@@ -1393,143 +1811,6 @@ void PairPACEExtrapolationKokkos<DeviceType>::pre_compute_harmonics(int lmax)
 
 /* ---------------------------------------------------------------------- */
 
-template<class DeviceType>
-KOKKOS_INLINE_FUNCTION
-void PairPACEExtrapolationKokkos<DeviceType>::compute_barplm(int ii, int jj, double rz, int lmax) const
-{
-  // requires -1 <= rz <= 1 , NO CHECKING IS PERFORMED !!!!!!!!!
-  // prefactors include 1/sqrt(2) factor compared to reference
-
-  // l=0, m=0
-  // plm(ii, jj, 0, 0) = Y00/sq1o4pi; //= sq1o4pi;
-  plm(ii, jj, 0) = Y00; //= 1;
-  dplm(ii, jj, 0) = 0.0;
-
-  if (lmax > 0) {
-
-    // l=1, m=0
-    plm(ii, jj, 2) = Y00 * sq3 * rz;
-    dplm(ii, jj, 2) = Y00 * sq3;
-
-    // l=1, m=1
-    plm(ii, jj, 3) = -sq3o2 * Y00;
-    dplm(ii, jj, 3) = 0.0;
-
-    // loop l = 2, lmax
-    for (int l = 2; l <= lmax; l++) {
-      for (int m = 0; m < l - 1; m++) {
-        const int idx = l * (l + 1) + m; // (l, m)
-        const int idx1 = (l - 1) * l + m; // (l - 1, m)
-        const int idx2 = (l - 2) * (l - 1) + m; // (l - 2, m)
-        plm(ii, jj, idx) = alm(idx) * (rz * plm(ii, jj, idx1) + blm(idx) * plm(ii, jj, idx2));
-        dplm(ii, jj, idx) = alm(idx) * (plm(ii, jj, idx1) + rz * dplm(ii, jj, idx1) + blm(idx) * dplm(ii, jj, idx2));
-      }
-      const int idx = l * (l + 1) + l; // (l, l)
-      const int idx1 = l * (l + 1) + l - 1; // (l, l - 1)
-      const int idx2 = (l - 1) * l + l - 1; // (l - 1, l - 1)
-      const double t = dl(l) * plm(ii, jj, idx2);
-      plm(ii, jj, idx1) = t * rz;
-      dplm(ii, jj, idx1) = t;
-      plm(ii, jj, idx) = cl(l) * plm(ii, jj, idx2);
-      dplm(ii, jj, idx) = 0.0;
-    }
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-
-template<class DeviceType>
-KOKKOS_INLINE_FUNCTION
-void PairPACEExtrapolationKokkos<DeviceType>::compute_ylm(int ii, int jj, double rx, double ry, double rz, int lmax) const
-{
-  // requires rx^2 + ry^2 + rz^2 = 1 , NO CHECKING IS PERFORMED !!!!!!!!!
-
-  complex phase;
-  complex phasem, mphasem1;
-  complex dyx, dyy, dyz;
-  complex rdy;
-
-  phase.re = rx;
-  phase.im = ry;
-
-  // compute barplm
-  compute_barplm(ii, jj, rz, lmax);
-
-  // m = 0
-  for (int l = 0; l <= lmax; l++) {
-    const int idx = l * (l + 1);
-
-    ylm(ii, jj, idx).re = plm(ii, jj, idx);
-    ylm(ii, jj, idx).im = 0.0;
-
-    dyz.re = dplm(ii, jj, idx);
-    rdy.re = dyz.re * rz;
-
-    dylm(ii, jj, idx, 0).re = -rdy.re * rx;
-    dylm(ii, jj, idx, 0).im = 0.0;
-    dylm(ii, jj, idx, 1).re = -rdy.re * ry;
-    dylm(ii, jj, idx, 1).im = 0.0;
-    dylm(ii, jj, idx, 2).re = dyz.re - rdy.re * rz;
-    dylm(ii, jj, idx, 2).im = 0;
-  }
-  // m = 1
-  for (int l = 1; l <= lmax; l++) {
-    const int idx = l * (l + 1) + 1;
-
-    ylm(ii, jj, idx) = phase * plm(ii, jj, idx);
-
-    dyx.re = plm(ii, jj, idx);
-    dyx.im = 0.0;
-    dyy.re = 0.0;
-    dyy.im = plm(ii, jj, idx);
-    dyz.re = phase.re * dplm(ii, jj, idx);
-    dyz.im = phase.im * dplm(ii, jj, idx);
-
-    rdy.re = rx * dyx.re + +rz * dyz.re;
-    rdy.im = ry * dyy.im + rz * dyz.im;
-
-    dylm(ii, jj, idx, 0).re = dyx.re - rdy.re * rx;
-    dylm(ii, jj, idx, 0).im = -rdy.im * rx;
-    dylm(ii, jj, idx, 1).re = -rdy.re * ry;
-    dylm(ii, jj, idx, 1).im = dyy.im - rdy.im * ry;
-    dylm(ii, jj, idx, 2).re = dyz.re - rdy.re * rz;
-    dylm(ii, jj, idx, 2).im = dyz.im - rdy.im * rz;
-  }
-
-  // m > 1
-  phasem = phase;
-  for (int m = 2; m <= lmax; m++) {
-
-    mphasem1.re = phasem.re * double(m);
-    mphasem1.im = phasem.im * double(m);
-    phasem = phasem * phase;
-
-    for (int l = m; l <= lmax; l++) {
-      const int idx = l * (l + 1) + m;
-
-      ylm(ii, jj, idx).re = phasem.re * plm(ii, jj, idx);
-      ylm(ii, jj, idx).im = phasem.im * plm(ii, jj, idx);
-
-      dyx = mphasem1 * plm(ii, jj, idx);
-      dyy.re = -dyx.im;
-      dyy.im = dyx.re;
-      dyz = phasem * dplm(ii, jj, idx);
-
-      rdy.re = rx * dyx.re + ry * dyy.re + rz * dyz.re;
-      rdy.im = rx * dyx.im + ry * dyy.im + rz * dyz.im;
-
-      dylm(ii, jj, idx, 0).re = dyx.re - rdy.re * rx;
-      dylm(ii, jj, idx, 0).im = dyx.im - rdy.im * rx;
-      dylm(ii, jj, idx, 1).re = dyy.re - rdy.re * ry;
-      dylm(ii, jj, idx, 1).im = dyy.im - rdy.im * ry;
-      dylm(ii, jj, idx, 2).re = dyz.re - rdy.re * rz;
-      dylm(ii, jj, idx, 2).im = dyz.im - rdy.im * rz;
-    }
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void PairPACEExtrapolationKokkos<DeviceType>::cutoff_func_poly(const double r, const double r_in, const double delta_in, double &fc, double &dfc) const
@@ -1658,11 +1939,11 @@ void PairPACEExtrapolationKokkos<DeviceType>::evaluate_splines(const int ii, con
   spline_gk.calcSplines(ii, jj, r, gr, dgr);
 
   spline_rnl.calcSplines(ii, jj, r, d_values, d_derivatives);
-  for (int kk = 0; kk < (int)fr.extent(2); kk++) {
-    for (int ll = 0; ll < (int)fr.extent(3); ll++) {
-      const int flatten = kk*fr.extent(3) + ll;
-      fr(ii, jj, kk, ll) = d_values(ii, jj, flatten);
-      dfr(ii, jj, kk, ll) = d_derivatives(ii, jj, flatten);
+  for (int ll = 0; ll < (int)fr.extent(2); ll++) {
+    for (int kk = 0; kk < (int)fr.extent(3); kk++) {
+      const int flatten = kk*fr.extent(2) + ll;
+      fr(ii, jj, ll, kk) = d_values(ii, jj, flatten);
+      dfr(ii, jj, ll, kk) = d_derivatives(ii, jj, flatten);
     }
   }
 
@@ -1682,7 +1963,7 @@ void PairPACEExtrapolationKokkos<DeviceType>::SplineInterpolatorKokkos::operator
     rscalelookup = spline.rscalelookup;
     num_of_functions = spline.num_of_functions;
 
-    lookupTable = t_ace_3d4("lookupTable", ntot+1, num_of_functions);
+    lookupTable = t_ace_3d4_lr("lookupTable", ntot+1, num_of_functions);
     auto h_lookupTable = Kokkos::create_mirror_view(lookupTable);
     for (int i = 0; i < ntot+1; i++)
         for (int j = 0; j < num_of_functions; j++)
@@ -1777,6 +2058,8 @@ double PairPACEExtrapolationKokkos<DeviceType>::memory_usage()
   bytes += MemKK::memory_usage(weights_rank1);
   bytes += MemKK::memory_usage(rho_core);
   bytes += MemKK::memory_usage(dF_drho_core);
+  bytes += MemKK::memory_usage(dF_dfcut);
+  bytes += MemKK::memory_usage(d_corerep);
   bytes += MemKK::memory_usage(dB_flatten);
   bytes += MemKK::memory_usage(fr);
   bytes += MemKK::memory_usage(dfr);
@@ -1786,14 +2069,12 @@ double PairPACEExtrapolationKokkos<DeviceType>::memory_usage()
   bytes += MemKK::memory_usage(d_derivatives);
   bytes += MemKK::memory_usage(cr);
   bytes += MemKK::memory_usage(dcr);
-  bytes += MemKK::memory_usage(plm);
-  bytes += MemKK::memory_usage(dplm);
-  bytes += MemKK::memory_usage(ylm);
-  bytes += MemKK::memory_usage(dylm);
   bytes += MemKK::memory_usage(d_ncount);
   bytes += MemKK::memory_usage(d_mu);
   bytes += MemKK::memory_usage(d_rhats);
   bytes += MemKK::memory_usage(d_rnorms);
+  bytes += MemKK::memory_usage(d_d_min);
+  bytes += MemKK::memory_usage(d_jj_min);
   bytes += MemKK::memory_usage(d_nearest);
   bytes += MemKK::memory_usage(f_ij);
   bytes += MemKK::memory_usage(d_rho_core_cutoff);
@@ -1806,7 +2087,7 @@ double PairPACEExtrapolationKokkos<DeviceType>::memory_usage()
   bytes += MemKK::memory_usage(d_idx_ms_combs_count);
   bytes += MemKK::memory_usage(d_rank);
   bytes += MemKK::memory_usage(d_num_ms_combs);
-  bytes += MemKK::memory_usage(d_func_inds);
+  bytes += MemKK::memory_usage(d_idx_funcs);
   bytes += MemKK::memory_usage(d_mus);
   bytes += MemKK::memory_usage(d_ns);
   bytes += MemKK::memory_usage(d_ls);
@@ -1835,42 +2116,6 @@ double PairPACEExtrapolationKokkos<DeviceType>::memory_usage()
   return bytes;
 }
 
-/* ----------------------------------------------------------------------
-    extract method for extracting value of scale variable
- ---------------------------------------------------------------------- */
-
-template<class DeviceType>
-void *PairPACEExtrapolationKokkos<DeviceType>::extract(const char *str, int &dim)
-{
-  //check if str=="gamma_flag" then compute extrapolation grades on this iteration
-  dim = 0;
-  if (strcmp(str, "gamma_flag") == 0) return (void *) &gamma_flag;
-
-  dim = 2;
-  if (strcmp(str, "scale") == 0) return (void *) scale;
-  return nullptr;
-}
-
-/* ----------------------------------------------------------------------
-   peratom requests from FixPair
-   return ptr to requested data
-   also return ncol = # of quantites per atom
-     0 = per-atom vector
-     1 or more = # of columns in per-atom array
-   return NULL if str is not recognized
----------------------------------------------------------------------- */
-
-template<class DeviceType>
-void *PairPACEExtrapolationKokkos<DeviceType>::extract_peratom(const char *str, int &ncol)
-{
-  if (strcmp(str, "gamma") == 0) {
-    ncol = 0;
-    return (void *) extrapolation_grade_gamma;
-  }
-
-  return nullptr;
-}
-
 /* ---------------------------------------------------------------------- */
 
 namespace LAMMPS_NS {
@@ -1879,4 +2124,3 @@ template class PairPACEExtrapolationKokkos<LMPDeviceType>;
 template class PairPACEExtrapolationKokkos<LMPHostType>;
 #endif
 }
-
diff --git a/src/KOKKOS/pair_pace_extrapolation_kokkos.h b/src/KOKKOS/pair_pace_extrapolation_kokkos.h
index 55bcf4fead..df8a0c1740 100644
--- a/src/KOKKOS/pair_pace_extrapolation_kokkos.h
+++ b/src/KOKKOS/pair_pace_extrapolation_kokkos.h
@@ -36,7 +36,6 @@ class PairPACEExtrapolationKokkos : public PairPACEExtrapolation {
  public:
   struct TagPairPACEComputeNeigh{};
   struct TagPairPACEComputeRadial{};
-  struct TagPairPACEComputeYlm{};
   struct TagPairPACEComputeAi{};
   struct TagPairPACEConjugateAi{};
   struct TagPairPACEComputeRho{};
@@ -67,9 +66,6 @@ class PairPACEExtrapolationKokkos : public PairPACEExtrapolation {
   KOKKOS_INLINE_FUNCTION
   void operator() (TagPairPACEComputeRadial,const typename Kokkos::TeamPolicy<DeviceType, TagPairPACEComputeRadial>::member_type& team) const;
 
-  KOKKOS_INLINE_FUNCTION
-  void operator() (TagPairPACEComputeYlm,const typename Kokkos::TeamPolicy<DeviceType, TagPairPACEComputeYlm>::member_type& team) const;
-
   KOKKOS_INLINE_FUNCTION
   void operator() (TagPairPACEComputeAi,const typename Kokkos::TeamPolicy<DeviceType, TagPairPACEComputeAi>::member_type& team) const;
 
@@ -99,14 +95,9 @@ class PairPACEExtrapolationKokkos : public PairPACEExtrapolation {
   KOKKOS_INLINE_FUNCTION
   void operator() (TagPairPACEComputeForce<NEIGHFLAG,EVFLAG>,const int& ii, EV_FLOAT&) const;
 
-
-  void *extract(const char *str, int &dim) override;
-  void *extract_peratom(const char *str, int &ncol) override;
-
  protected:
-  int inum, maxneigh, chunk_size, chunk_offset, idx_ms_combs_max, total_num_functions_max;
+  int inum, maxneigh, chunk_size, chunk_offset, idx_ms_combs_max, total_num_functions_max, idx_sph_max;
   int host_flag;
-  int gamma_flag;
 
   int eflag, vflag;
 
@@ -130,6 +121,7 @@ class PairPACEExtrapolationKokkos : public PairPACEExtrapolation {
   tdual_fparams k_cutsq, k_scale;
   typedef Kokkos::View<F_FLOAT**, DeviceType> t_fparams;
   t_fparams d_cutsq, d_scale;
+  t_fparams d_cut_in, d_dcut_in; // inner cutoff
 
   typename AT::t_int_1d d_map;
 
@@ -165,12 +157,6 @@ class PairPACEExtrapolationKokkos : public PairPACEExtrapolation {
       const F_FLOAT &fx, const F_FLOAT &fy, const F_FLOAT &fz,
       const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const;
 
-  KOKKOS_INLINE_FUNCTION
-  void compute_barplm(int, int, double, int) const;
-
-  KOKKOS_INLINE_FUNCTION
-  void compute_ylm(int, int, double, double, double, int) const;
-
   KOKKOS_INLINE_FUNCTION
   void cutoff_func_poly(const double, const double, const double, double &, double &) const;
 
@@ -202,15 +188,19 @@ class PairPACEExtrapolationKokkos : public PairPACEExtrapolation {
 
   typedef Kokkos::View<int*, DeviceType> t_ace_1i;
   typedef Kokkos::View<int**, DeviceType> t_ace_2i;
+  typedef Kokkos::View<int**, Kokkos::LayoutRight, DeviceType> t_ace_2i_lr;
   typedef Kokkos::View<int***, DeviceType> t_ace_3i;
+  typedef Kokkos::View<int***, Kokkos::LayoutRight, DeviceType> t_ace_3i_lr;
   typedef Kokkos::View<int****, DeviceType> t_ace_4i;
   typedef Kokkos::View<double*, DeviceType> t_ace_1d;
   typedef Kokkos::View<double**, DeviceType> t_ace_2d;
+  typedef Kokkos::View<double**, Kokkos::LayoutRight, DeviceType> t_ace_2d_lr;
   typedef Kokkos::View<double*[3], DeviceType> t_ace_2d3;
   typedef Kokkos::View<double***, DeviceType> t_ace_3d;
   typedef Kokkos::View<const double***, DeviceType> tc_ace_3d;
   typedef Kokkos::View<double**[3], DeviceType> t_ace_3d3;
   typedef Kokkos::View<double**[4], DeviceType> t_ace_3d4;
+  typedef Kokkos::View<double**[4], Kokkos::LayoutRight, DeviceType> t_ace_3d4_lr;
   typedef Kokkos::View<double****, DeviceType> t_ace_4d;
   typedef Kokkos::View<complex*, DeviceType> t_ace_1c;
   typedef Kokkos::View<complex**, DeviceType> t_ace_2c;
@@ -234,12 +224,16 @@ class PairPACEExtrapolationKokkos : public PairPACEExtrapolation {
   t_ace_2d rhos;
   t_ace_2d dF_drho;
 
+  t_ace_3c dB_flatten;
+
   // hard-core repulsion
   t_ace_1d rho_core;
-  t_ace_3c dB_flatten;
   t_ace_2d cr;
   t_ace_2d dcr;
   t_ace_1d dF_drho_core;
+  t_ace_1d dF_dfcut;
+  t_ace_1d d_corerep;
+  th_ace_1d h_corerep;
 
   // radial functions
   t_ace_4d fr;
@@ -256,25 +250,16 @@ class PairPACEExtrapolationKokkos : public PairPACEExtrapolation {
   th_ace_1d h_gamma;
 
   // Spherical Harmonics
+
   void pre_compute_harmonics(int);
 
-  KOKKOS_INLINE_FUNCTION
-  void compute_barplm(double rz, int lmaxi);
-
-  KOKKOS_INLINE_FUNCTION
-  void compute_ylm(double rx, double ry, double rz, int lmaxi);
-
+  t_ace_4c A_sph;
+  t_ace_1d d_idx_sph;
   t_ace_1d alm;
   t_ace_1d blm;
   t_ace_1d cl;
   t_ace_1d dl;
 
-  t_ace_3d plm;
-  t_ace_3d dplm;
-
-  t_ace_3c ylm;
-  t_ace_4c3 dylm;
-
   // short neigh list
   t_ace_1i d_ncount;
   t_ace_2d d_mu;
@@ -282,26 +267,30 @@ class PairPACEExtrapolationKokkos : public PairPACEExtrapolation {
   t_ace_3d3 d_rhats;
   t_ace_2i d_nearest;
 
+  // for ZBL core-rep implementation
+  t_ace_1d  d_d_min; // [i] -> min-d for atom ii, d=d = r - (cut_in(mu_i, mu_j) - dcut_in(mu_i, mu_j))
+  t_ace_1i  d_jj_min; // [i] -> jj-index of nearest neigh (by r-(cut_in-dcut_in) criterion)
+  bool is_zbl;
+
   // per-type
   t_ace_1i d_ndensity;
   t_ace_1i d_npoti;
   t_ace_1d d_rho_core_cutoff;
   t_ace_1d d_drho_core_cutoff;
   t_ace_1d d_E0vals;
-  t_ace_2d d_wpre;
-  t_ace_2d d_mexp;
+  t_ace_2d_lr d_wpre;
+  t_ace_2d_lr d_mexp;
 
   // tilde
   t_ace_1i d_idx_ms_combs_count;
   t_ace_1i d_total_basis_size;
-  t_ace_2i d_rank;
-  t_ace_2i d_num_ms_combs;
-  t_ace_2i d_func_inds;
-  t_ace_3i d_mus;
-  t_ace_3i d_ns;
-  t_ace_3i d_ls;
-  t_ace_3i d_ms_combs;
-//  t_ace_3d d_ctildes;
+  t_ace_2i_lr d_rank;
+  t_ace_2i_lr d_num_ms_combs;
+  t_ace_2i_lr d_idx_funcs;
+  t_ace_3i_lr d_mus;
+  t_ace_3i_lr d_ns;
+  t_ace_3i_lr d_ls;
+  t_ace_3i_lr d_ms_combs;
   t_ace_2d d_gen_cgs;
   t_ace_3d d_coeffs;
 
@@ -312,12 +301,12 @@ class PairPACEExtrapolationKokkos : public PairPACEExtrapolation {
     int ntot, nlut, num_of_functions;
     double cutoff, deltaSplineBins, invrscalelookup, rscalelookup;
 
-    t_ace_3d4 lookupTable;
+    t_ace_3d4_lr lookupTable;
 
     void operator=(const SplineInterpolator &spline);
 
     void deallocate() {
-      lookupTable = t_ace_3d4();
+      lookupTable = t_ace_3d4_lr();
     }
 
     double memory_usage() {
diff --git a/src/KOKKOS/pair_pace_kokkos.cpp b/src/KOKKOS/pair_pace_kokkos.cpp
index 153a6d0333..4046649375 100644
--- a/src/KOKKOS/pair_pace_kokkos.cpp
+++ b/src/KOKKOS/pair_pace_kokkos.cpp
@@ -29,11 +29,13 @@
 #include "neighbor_kokkos.h"
 #include "neigh_request.h"
 
+#include "ace-evaluator/ace_version.h"
+#include "ace-evaluator/ace_radial.h"
+
 #include "ace-evaluator/ace_c_basis.h"
 #include "ace-evaluator/ace_evaluator.h"
 #include "ace-evaluator/ace_recursive.h"
-#include "ace-evaluator/ace_version.h"
-#include "ace-evaluator/ace_radial.h"
+
 #include <cstring>
 
 namespace LAMMPS_NS {
@@ -104,31 +106,37 @@ void PairPACEKokkos<DeviceType>::grow(int natom, int maxneigh)
 
   if ((int)A.extent(0) < natom) {
 
-    MemKK::realloc_kokkos(A, "pace:A", natom, nelements, nradmax + 1, (lmax + 1) * (lmax + 1));
+    MemKK::realloc_kokkos(A_sph, "pace:A_sph", natom, nelements, idx_sph_max, nradmax + 1);
+    MemKK::realloc_kokkos(A, "pace:A", natom, nelements, (lmax + 1) * (lmax + 1), nradmax + 1);
     MemKK::realloc_kokkos(A_rank1, "pace:A_rank1", natom, nelements, nradbase);
 
-    MemKK::realloc_kokkos(A_list, "pace:A_list", natom, idx_rho_max, basis_set->rankmax);
+    MemKK::realloc_kokkos(A_list, "pace:A_list", natom, idx_ms_combs_max, basis_set->rankmax);
     //size is +1 of max to avoid out-of-boundary array access in double-triangular scheme
-    MemKK::realloc_kokkos(A_forward_prod, "pace:A_forward_prod", natom, idx_rho_max, basis_set->rankmax + 1);
+    MemKK::realloc_kokkos(A_forward_prod, "pace:A_forward_prod", natom, idx_ms_combs_max, basis_set->rankmax + 1);
 
     MemKK::realloc_kokkos(e_atom, "pace:e_atom", natom);
     MemKK::realloc_kokkos(rhos, "pace:rhos", natom, basis_set->ndensitymax + 1); // +1 density for core repulsion
     MemKK::realloc_kokkos(dF_drho, "pace:dF_drho", natom, basis_set->ndensitymax + 1); // +1 density for core repulsion
 
-    MemKK::realloc_kokkos(weights, "pace:weights", natom, nelements, nradmax + 1, (lmax + 1) * (lmax + 1));
+    MemKK::realloc_kokkos(weights, "pace:weights", natom, nelements, idx_sph_max, nradmax + 1);
     MemKK::realloc_kokkos(weights_rank1, "pace:weights_rank1", natom, nelements, nradbase);
 
     // hard-core repulsion
     MemKK::realloc_kokkos(rho_core, "pace:rho_core", natom);
     MemKK::realloc_kokkos(dF_drho_core, "pace:dF_drho_core", natom);
-    MemKK::realloc_kokkos(dB_flatten, "pace:dB_flatten", natom, idx_rho_max, basis_set->rankmax);
+    MemKK::realloc_kokkos(dF_dfcut, "pace:dF_dfcut", natom);
+    MemKK::realloc_kokkos(d_d_min, "pace:r_min_pair", natom);
+    MemKK::realloc_kokkos(d_jj_min, "pace:j_min_pair", natom);
+    MemKK::realloc_kokkos(d_corerep, "pace:corerep", natom); // per-atom corerep
+
+    MemKK::realloc_kokkos(dB_flatten, "pace:dB_flatten", natom, idx_ms_combs_max, basis_set->rankmax);
   }
 
-  if (((int)ylm.extent(0) < natom) || ((int)ylm.extent(1) < maxneigh)) {
+  if (((int)fr.extent(0) < natom) || ((int)fr.extent(1) < maxneigh)) {
 
     // radial functions
-    MemKK::realloc_kokkos(fr, "pace:fr", natom, maxneigh, nradmax, lmax + 1);
-    MemKK::realloc_kokkos(dfr, "pace:dfr", natom, maxneigh, nradmax, lmax + 1);
+    MemKK::realloc_kokkos(fr, "pace:fr", natom, maxneigh, lmax + 1, nradmax);
+    MemKK::realloc_kokkos(dfr, "pace:dfr", natom, maxneigh, lmax + 1, nradmax);
     MemKK::realloc_kokkos(gr, "pace:gr", natom, maxneigh, nradbase);
     MemKK::realloc_kokkos(dgr, "pace:dgr", natom, maxneigh, nradbase);
     const int max_num_functions = MAX(nradbase, nradmax*(lmax + 1));
@@ -139,12 +147,6 @@ void PairPACEKokkos<DeviceType>::grow(int natom, int maxneigh)
     MemKK::realloc_kokkos(cr, "pace:cr", natom, maxneigh);
     MemKK::realloc_kokkos(dcr, "pace:dcr", natom, maxneigh);
 
-    // spherical harmonics
-    MemKK::realloc_kokkos(plm, "pace:plm", natom, maxneigh, (lmax + 1) * (lmax + 1));
-    MemKK::realloc_kokkos(dplm, "pace:dplm", natom, maxneigh, (lmax + 1) * (lmax + 1));
-    MemKK::realloc_kokkos(ylm, "pace:ylm", natom, maxneigh, (lmax + 1) * (lmax + 1));
-    MemKK::realloc_kokkos(dylm, "pace:dylm", natom, maxneigh, (lmax + 1) * (lmax + 1));
-
     // short neigh list
     MemKK::realloc_kokkos(d_ncount, "pace:ncount", natom);
     MemKK::realloc_kokkos(d_mu, "pace:mu", natom, maxneigh);
@@ -179,7 +181,7 @@ void PairPACEKokkos<DeviceType>::copy_pertype()
     h_rho_core_cutoff[n] = basis_set->map_embedding_specifications.at(n).rho_core_cutoff;
     h_drho_core_cutoff[n] = basis_set->map_embedding_specifications.at(n).drho_core_cutoff;
 
-    h_E0vals(n)= basis_set->E0vals(n);
+    h_E0vals(n) = basis_set->E0vals(n);
 
     h_ndensity(n) = basis_set->map_embedding_specifications.at(n).ndensity;
 
@@ -212,6 +214,23 @@ void PairPACEKokkos<DeviceType>::copy_pertype()
 
   Kokkos::deep_copy(d_wpre, h_wpre);
   Kokkos::deep_copy(d_mexp, h_mexp);
+
+  // ZBL core-rep
+  MemKK::realloc_kokkos(d_cut_in, "pace:d_cut_in", nelements, nelements);
+  MemKK::realloc_kokkos(d_dcut_in, "pace:d_dcut_in", nelements, nelements);
+  auto h_cut_in = Kokkos::create_mirror_view(d_cut_in);
+  auto h_dcut_in = Kokkos::create_mirror_view(d_dcut_in);
+
+  for (int mu_i = 0; mu_i < nelements; ++mu_i) {
+    for (int mu_j = 0; mu_j < nelements; ++mu_j) {
+      h_cut_in(mu_i,mu_j) = basis_set->map_bond_specifications.at({mu_i,mu_j}).rcut_in;
+      h_dcut_in(mu_i,mu_j) = basis_set->map_bond_specifications.at({mu_i,mu_j}).dcut_in;
+    }
+  }
+  Kokkos::deep_copy(d_cut_in, h_cut_in);
+  Kokkos::deep_copy(d_dcut_in, h_dcut_in);
+
+  is_zbl = basis_set->radial_functions->inner_cutoff_type == "zbl";
 }
 
 /* ---------------------------------------------------------------------- */
@@ -266,50 +285,50 @@ void PairPACEKokkos<DeviceType>::copy_tilde()
 
   // flatten loops, get per-element count and max
 
-  idx_rho_max = 0;
+  idx_ms_combs_max = 0;
   int total_basis_size_max = 0;
 
-  MemKK::realloc_kokkos(d_idx_rho_count, "pace:idx_rho_count", nelements);
-  auto h_idx_rho_count = Kokkos::create_mirror_view(d_idx_rho_count);
+  MemKK::realloc_kokkos(d_idx_ms_combs_count, "pace:idx_ms_combs_count", nelements);
+  auto h_idx_ms_combs_count = Kokkos::create_mirror_view(d_idx_ms_combs_count);
 
-  for (int n = 0; n < nelements; n++) {
-    int idx_rho = 0;
-    const int total_basis_size_rank1 = basis_set->total_basis_size_rank1[n];
-    const int total_basis_size = basis_set->total_basis_size[n];
+  for (int mu = 0; mu < nelements; mu++) {
+    int idx_ms_combs = 0;
+    const int total_basis_size_rank1 = basis_set->total_basis_size_rank1[mu];
+    const int total_basis_size = basis_set->total_basis_size[mu];
 
-    ACECTildeBasisFunction *basis = basis_set->basis[n];
+    ACECTildeBasisFunction *basis = basis_set->basis[mu];
 
     // rank=1
     for (int func_rank1_ind = 0; func_rank1_ind < total_basis_size_rank1; ++func_rank1_ind)
-      idx_rho++;
+      idx_ms_combs++;
 
     // rank > 1
-    for (int func_ind = 0; func_ind < total_basis_size; ++func_ind) {
-      ACECTildeBasisFunction *func = &basis[func_ind];
+    for (int idx_func = 0; idx_func < total_basis_size; ++idx_func) {
+      ACECTildeBasisFunction *func = &basis[idx_func];
 
       // loop over {ms} combinations in sum
       for (int ms_ind = 0; ms_ind < func->num_ms_combs; ++ms_ind)
-        idx_rho++;
+        idx_ms_combs++;
     }
-    h_idx_rho_count(n) = idx_rho;
-    idx_rho_max = MAX(idx_rho_max, idx_rho);
+    h_idx_ms_combs_count(mu) = idx_ms_combs;
+    idx_ms_combs_max = MAX(idx_ms_combs_max, idx_ms_combs);
     total_basis_size_max = MAX(total_basis_size_max, total_basis_size_rank1 + total_basis_size);
   }
 
-  Kokkos::deep_copy(d_idx_rho_count, h_idx_rho_count);
+  Kokkos::deep_copy(d_idx_ms_combs_count, h_idx_ms_combs_count);
 
   MemKK::realloc_kokkos(d_rank, "pace:rank", nelements, total_basis_size_max);
   MemKK::realloc_kokkos(d_num_ms_combs, "pace:num_ms_combs", nelements, total_basis_size_max);
-  MemKK::realloc_kokkos(d_offsets, "pace:offsets", nelements, idx_rho_max);
+  MemKK::realloc_kokkos(d_idx_funcs, "pace:idx_func", nelements, idx_ms_combs_max);
   MemKK::realloc_kokkos(d_mus, "pace:mus", nelements, total_basis_size_max, basis_set->rankmax);
   MemKK::realloc_kokkos(d_ns, "pace:ns", nelements, total_basis_size_max, basis_set->rankmax);
   MemKK::realloc_kokkos(d_ls, "pace:ls", nelements, total_basis_size_max, basis_set->rankmax);
-  MemKK::realloc_kokkos(d_ms_combs, "pace:ms_combs", nelements, idx_rho_max, basis_set->rankmax);
-  MemKK::realloc_kokkos(d_ctildes, "pace:ctildes", nelements, idx_rho_max, basis_set->ndensitymax);
+  MemKK::realloc_kokkos(d_ms_combs, "pace:ms_combs", nelements, idx_ms_combs_max, basis_set->rankmax);
+  MemKK::realloc_kokkos(d_ctildes, "pace:ctildes", nelements, idx_ms_combs_max, basis_set->ndensitymax);
 
   auto h_rank = Kokkos::create_mirror_view(d_rank);
   auto h_num_ms_combs = Kokkos::create_mirror_view(d_num_ms_combs);
-  auto h_offsets = Kokkos::create_mirror_view(d_offsets);
+  auto h_idx_funcs = Kokkos::create_mirror_view(d_idx_funcs);
   auto h_mus = Kokkos::create_mirror_view(d_mus);
   auto h_ns = Kokkos::create_mirror_view(d_ns);
   auto h_ls = Kokkos::create_mirror_view(d_ls);
@@ -318,63 +337,66 @@ void PairPACEKokkos<DeviceType>::copy_tilde()
 
   // copy values on host
 
-  for (int n = 0; n < nelements; n++) {
-    const int total_basis_size_rank1 = basis_set->total_basis_size_rank1[n];
-    const int total_basis_size = basis_set->total_basis_size[n];
+  for (int mu = 0; mu < nelements; mu++) {
+    const int total_basis_size_rank1 = basis_set->total_basis_size_rank1[mu];
+    const int total_basis_size = basis_set->total_basis_size[mu];
 
-    ACECTildeBasisFunction *basis_rank1 = basis_set->basis_rank1[n];
-    ACECTildeBasisFunction *basis = basis_set->basis[n];
+    ACECTildeBasisFunction *basis_rank1 = basis_set->basis_rank1[mu];
+    ACECTildeBasisFunction *basis = basis_set->basis[mu];
 
-    const int ndensity = basis_set->map_embedding_specifications.at(n).ndensity;
+    const int ndensity = basis_set->map_embedding_specifications.at(mu).ndensity;
 
-    int idx_rho = 0;
+    int idx_ms_combs = 0;
 
     // rank=1
-    for (int offset = 0; offset < total_basis_size_rank1; ++offset) {
-      ACECTildeBasisFunction *func = &basis_rank1[offset];
-      h_rank(n, offset) = 1;
-      h_mus(n, offset, 0) = func->mus[0];
-      h_ns(n, offset, 0) = func->ns[0];
-      for (int p = 0; p < ndensity; p++)
-        h_ctildes(n, idx_rho, p) = func->ctildes[p];
-      h_offsets(n, idx_rho) = offset;
-      idx_rho++;
+    for (int idx_func = 0; idx_func < total_basis_size_rank1; ++idx_func) {
+      ACECTildeBasisFunction *func = &basis_rank1[idx_func];
+      h_rank(mu, idx_func) = 1;
+      h_mus(mu, idx_func, 0) = func->mus[0];
+      h_ns(mu, idx_func, 0) = func->ns[0];
+
+      for (int p = 0; p < ndensity; ++p)
+        h_ctildes(mu, idx_ms_combs, p) = func->ctildes[p];
+
+      h_idx_funcs(mu, idx_ms_combs) = idx_func;
+      idx_ms_combs++;
     }
 
     // rank > 1
-    for (int func_ind = 0; func_ind < total_basis_size; ++func_ind) {
-      ACECTildeBasisFunction *func = &basis[func_ind];
+    for (int idx_func = 0; idx_func < total_basis_size; ++idx_func) {
+      ACECTildeBasisFunction *func = &basis[idx_func];
       // TODO: check if func->ctildes are zero, then skip
 
-      const int offset = total_basis_size_rank1 + func_ind;
+      const int idx_func_through = total_basis_size_rank1 + idx_func;
 
-      const int rank = h_rank(n, offset) = func->rank;
-      h_num_ms_combs(n, offset) = func->num_ms_combs;
+      const int rank = h_rank(mu, idx_func_through) = func->rank;
+      h_num_ms_combs(mu, idx_func_through) = func->num_ms_combs;
       for (int t = 0; t < rank; t++) {
-        h_mus(n, offset, t) = func->mus[t];
-        h_ns(n, offset, t) = func->ns[t];
-        h_ls(n, offset, t) = func->ls[t];
+        h_mus(mu, idx_func_through, t) = func->mus[t];
+        h_ns(mu, idx_func_through, t) = func->ns[t];
+        h_ls(mu, idx_func_through, t) = func->ls[t];
       }
 
       // loop over {ms} combinations in sum
       for (int ms_ind = 0; ms_ind < func->num_ms_combs; ++ms_ind) {
         auto ms = &func->ms_combs[ms_ind * rank]; // current ms-combination (of length = rank)
         for (int t = 0; t < rank; t++)
-          h_ms_combs(n, idx_rho, t) = ms[t];
+          h_ms_combs(mu, idx_ms_combs, t) = ms[t];
 
         for (int p = 0; p < ndensity; ++p) {
           // real-part only multiplication
-          h_ctildes(n, idx_rho, p) = func->ctildes[ms_ind * ndensity + p];
+          h_ctildes(mu, idx_ms_combs, p) = func->ctildes[ms_ind * ndensity + p];
         }
-        h_offsets(n, idx_rho) = offset;
-        idx_rho++;
+
+        h_idx_funcs(mu, idx_ms_combs) = idx_func_through;
+        idx_ms_combs++;
       }
     }
   }
 
   Kokkos::deep_copy(d_rank, h_rank);
   Kokkos::deep_copy(d_num_ms_combs, h_num_ms_combs);
-  Kokkos::deep_copy(d_offsets, h_offsets);
+  Kokkos::deep_copy(d_idx_funcs, h_idx_funcs);
   Kokkos::deep_copy(d_mus, h_mus);
   Kokkos::deep_copy(d_ns, h_ns);
   Kokkos::deep_copy(d_ls, h_ls);
@@ -421,6 +443,7 @@ void PairPACEKokkos<DeviceType>::init_style()
 
   // spherical harmonics
 
+  MemKK::realloc_kokkos(d_idx_sph, "pace:idx_sph", (lmax + 1) * (lmax + 1));
   MemKK::realloc_kokkos(alm, "pace:alm", (lmax + 1) * (lmax + 1));
   MemKK::realloc_kokkos(blm, "pace:blm", (lmax + 1) * (lmax + 1));
   MemKK::realloc_kokkos(cl, "pace:cl", lmax + 1);
@@ -535,6 +558,13 @@ void PairPACEKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
     memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom");
     d_vatom = k_vatom.view<DeviceType>();
   }
+  if (flag_corerep_factor && atom->nlocal > nmax_corerep) {
+    memory->destroy(corerep_factor);
+    nmax_corerep = atom->nlocal;
+    memory->create(corerep_factor, nmax_corerep, "pace/atom:corerep");
+    //zeroify array
+    memset(corerep_factor, 0, nmax_corerep * sizeof(*corerep_factor));
+  }
 
   copymode = 1;
   if (!force->newton_pair)
@@ -584,10 +614,13 @@ void PairPACEKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
 
     Kokkos::deep_copy(weights, 0.0);
     Kokkos::deep_copy(weights_rank1, 0.0);
-    Kokkos::deep_copy(A, 0.0);
+    Kokkos::deep_copy(A_sph, 0.0);
     Kokkos::deep_copy(A_rank1, 0.0);
     Kokkos::deep_copy(rhos, 0.0);
     Kokkos::deep_copy(rho_core, 0.0);
+    Kokkos::deep_copy(d_d_min, PairPACE::aceimpl->basis_set->cutoffmax);
+    Kokkos::deep_copy(d_jj_min, -1);
+    Kokkos::deep_copy(d_corerep, 0.0);
 
     EV_FLOAT ev_tmp;
 
@@ -614,15 +647,6 @@ void PairPACEKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       Kokkos::parallel_for("ComputeRadial",policy_radial,*this);
     }
 
-    //ComputeYlm
-    {
-      int vector_length = vector_length_default;
-      int team_size = 16;
-      check_team_size_for<TagPairPACEComputeYlm>(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length);
-      typename Kokkos::TeamPolicy<DeviceType, TagPairPACEComputeYlm> policy_ylm(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length);
-      Kokkos::parallel_for("ComputeYlm",policy_ylm,*this);
-    }
-
     //ComputeAi
     {
       int vector_length = vector_length_default;
@@ -640,7 +664,7 @@ void PairPACEKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
 
     //ComputeRho
     {
-      typename Kokkos::RangePolicy<DeviceType,TagPairPACEComputeRho> policy_rho(0,chunk_size*idx_rho_max);
+      typename Kokkos::RangePolicy<DeviceType,TagPairPACEComputeRho> policy_rho(0,chunk_size*idx_ms_combs_max);
       Kokkos::parallel_for("ComputeRho",policy_rho,*this);
     }
 
@@ -652,7 +676,7 @@ void PairPACEKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
 
     //ComputeWeights
     {
-      typename Kokkos::RangePolicy<DeviceType,TagPairPACEComputeWeights> policy_weights(0,chunk_size*idx_rho_max);
+      typename Kokkos::RangePolicy<DeviceType,TagPairPACEComputeWeights> policy_weights(0,chunk_size * idx_ms_combs_max);
       Kokkos::parallel_for("ComputeWeights",policy_weights,*this);
     }
 
@@ -661,7 +685,7 @@ void PairPACEKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       int vector_length = vector_length_default;
       int team_size = team_size_default;
       check_team_size_for<TagPairPACEComputeDerivative>(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length);
-      typename Kokkos::TeamPolicy<DeviceType, TagPairPACEComputeDerivative> policy_derivative(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length);
+      typename Kokkos::TeamPolicy<DeviceType,TagPairPACEComputeDerivative> policy_derivative(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length);
       Kokkos::parallel_for("ComputeDerivative",policy_derivative,*this);
     }
 
@@ -686,8 +710,14 @@ void PairPACEKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
       }
     }
     ev += ev_tmp;
-    chunk_offset += chunk_size;
 
+    if (flag_corerep_factor) {
+      h_corerep = Kokkos::create_mirror_view(d_corerep);
+      Kokkos::deep_copy(h_corerep,d_corerep);
+      memcpy(corerep_factor+chunk_offset, (void *) h_corerep.data(), sizeof(double)*chunk_size);
+    }
+
+    chunk_offset += chunk_size;
   } // end while
 
   if (need_dup)
@@ -741,6 +771,7 @@ void PairPACEKokkos<DeviceType>::operator() (TagPairPACEComputeNeigh,const typen
   const X_FLOAT ytmp = x(i,1);
   const X_FLOAT ztmp = x(i,2);
   const int jnum = d_numneigh[i];
+  const int mu_i = d_map(type(i));
 
   // get a pointer to scratch memory
   // This is used to cache whether or not an atom is within the cutoff
@@ -800,6 +831,35 @@ void PairPACEKokkos<DeviceType>::operator() (TagPairPACEComputeNeigh,const typen
     }
     offset++;
   });
+
+  if (is_zbl) {
+    //adapted from https://www.osti.gov/servlets/purl/1429450
+    if (ncount > 0) {
+      using minloc_value_type=Kokkos::MinLoc<F_FLOAT,int>::value_type;
+      minloc_value_type djjmin;
+      djjmin.val=1e20;
+      djjmin.loc=-1;
+      Kokkos::MinLoc<F_FLOAT,int> reducer_scalar(djjmin);
+      // loop over ncount (actual neighbours withing cutoff) rather than jnum (total number of neigh in cutoff+skin)
+      Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, ncount),
+               [&](const int offset, minloc_value_type &min_d_dist) {
+                 int j = d_nearest(ii,offset);
+                 j &= NEIGHMASK;
+                 auto r = d_rnorms(ii,offset);
+                 const int mu_j = d_map(type(j));
+                 const F_FLOAT d = r - (d_cut_in(mu_i, mu_j) - d_dcut_in(mu_i, mu_j));
+                 if (d < min_d_dist.val) {
+                   min_d_dist.val = d;
+                   min_d_dist.loc = offset;
+                 }
+       }, reducer_scalar);
+      d_d_min(ii) = djjmin.val;
+      d_jj_min(ii) = djjmin.loc;// d_jj_min should be NOT in 0..jnum range, but in 0..d_ncount(<=jnum)
+    } else {
+      d_d_min(ii) = 1e20;
+      d_jj_min(ii) = -1;
+    }
+  }
 }
 
 /* ---------------------------------------------------------------------- */
@@ -828,28 +888,6 @@ void PairPACEKokkos<DeviceType>::operator() (TagPairPACEComputeRadial, const typ
 
 /* ---------------------------------------------------------------------- */
 
-template<class DeviceType>
-KOKKOS_INLINE_FUNCTION
-void PairPACEKokkos<DeviceType>::operator() (TagPairPACEComputeYlm, const typename Kokkos::TeamPolicy<DeviceType, TagPairPACEComputeYlm>::member_type& team) const
-{
-  // Extract the atom number
-  int ii = team.team_rank() + team.team_size() * (team.league_rank() %
-           ((chunk_size+team.team_size()-1)/team.team_size()));
-  if (ii >= chunk_size) return;
-
-  // Extract the neighbor number
-  const int jj = team.league_rank() / ((chunk_size+team.team_size()-1)/team.team_size());
-  const int ncount = d_ncount(ii);
-  if (jj >= ncount) return;
-
-  const double xn = d_rhats(ii, jj, 0);
-  const double yn = d_rhats(ii, jj, 1);
-  const double zn = d_rhats(ii, jj, 2);
-  compute_ylm(ii,jj,xn,yn,zn,lmax);
-}
-
-/* ---------------------------------------------------------------------- */
-
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void PairPACEKokkos<DeviceType>::operator() (TagPairPACEComputeAi, const typename Kokkos::TeamPolicy<DeviceType, TagPairPACEComputeAi>::member_type& team) const
@@ -871,13 +909,127 @@ void PairPACEKokkos<DeviceType>::operator() (TagPairPACEComputeAi, const typenam
     Kokkos::atomic_add(&A_rank1(ii, mu_j, n), gr(ii, jj, n) * Y00);
 
   // rank > 1
-  for (int n = 0; n < nradmax; n++) {
-    for (int l = 0; l <= lmax; l++) {
-      for (int m = 0; m <= l; m++) {
-        const int idx = l * (l + 1) + m; // (l, m)
-        Kokkos::atomic_add(&A(ii, mu_j, n, idx).re, fr(ii, jj, n, l) * ylm(ii, jj, idx).re);
-        Kokkos::atomic_add(&A(ii, mu_j, n, idx).im, fr(ii, jj, n, l) * ylm(ii, jj, idx).im);
+
+  // Compute plm and ylm
+
+  // requires rx^2 + ry^2 + rz^2 = 1 , NO CHECKING IS PERFORMED !!!!!!!!!
+  // requires -1 <= rz <= 1 , NO CHECKING IS PERFORMED !!!!!!!!!
+  // prefactors include 1/sqrt(2) factor compared to reference
+
+  complex ylm, phase;
+  complex phasem, mphasem1;
+  complex dyx, dyy, dyz;
+  complex rdy;
+
+  const double rx = d_rhats(ii, jj, 0);
+  const double ry = d_rhats(ii, jj, 1);
+  const double rz = d_rhats(ii, jj, 2);
+
+  phase.re = rx;
+  phase.im = ry;
+
+  double plm_idx,plm_idx1,plm_idx2;
+
+  plm_idx = plm_idx1 = plm_idx2 = 0.0;
+
+  int idx_sph = 0;
+
+  // m = 0
+  for (int l = 0; l <= lmax; l++) {
+    // const int idx = l * (l + 1);
+
+    if (l == 0) {
+      // l=0, m=0
+      // plm[0] = Y00/sq1o4pi; //= sq1o4pi;
+      plm_idx = Y00; //= 1;
+    } else if (l == 1) {
+      // l=1, m=0
+      plm_idx = Y00 * sq3 * rz;
+    } else {
+      // l>=2, m=0
+      plm_idx = alm(idx_sph) * (rz * plm_idx1 + blm(idx_sph) * plm_idx2);
+    }
+
+    ylm.re = plm_idx;
+    ylm.im = 0.0;
+
+    for (int n = 0; n < nradmax; n++) {
+      Kokkos::atomic_add(&A_sph(ii, mu_j, idx_sph, n).re, fr(ii, jj, l, n) * ylm.re);
+      Kokkos::atomic_add(&A_sph(ii, mu_j, idx_sph, n).im, fr(ii, jj, l, n) * ylm.im);
+    }
+
+    plm_idx2 = plm_idx1;
+    plm_idx1 = plm_idx;
+
+    idx_sph++;
+  }
+
+  plm_idx = plm_idx1 = plm_idx2 = 0.0;
+
+  // m = 1
+  for (int l = 1; l <= lmax; l++) {
+    // const int idx = l * (l + 1) + 1; // (l, 1)
+
+    if (l == 1) {
+      // l=1, m=1
+      plm_idx = -sq3o2 * Y00;
+    } else if (l == 2) {
+      const double t = dl(l) * plm_idx1;
+      plm_idx = t * rz;
+    } else {
+      plm_idx = alm(idx_sph) * (rz * plm_idx1 + blm(idx_sph) * plm_idx2);
+    }
+
+    ylm = phase * plm_idx;
+
+    for (int n = 0; n < nradmax; n++) {
+      Kokkos::atomic_add(&A_sph(ii, mu_j, idx_sph, n).re, fr(ii, jj, l, n) * ylm.re);
+      Kokkos::atomic_add(&A_sph(ii, mu_j, idx_sph, n).im, fr(ii, jj, l, n) * ylm.im);
+    }
+
+    plm_idx2 = plm_idx1;
+    plm_idx1 = plm_idx;
+
+    idx_sph++;
+  }
+
+  plm_idx = plm_idx1 = plm_idx2 = 0.0;
+
+  double plm_mm1_mm1 = -sq3o2 * Y00; // (1, 1)
+
+  // m > 1
+  phasem = phase;
+  for (int m = 2; m <= lmax; m++) {
+
+    mphasem1.re = phasem.re * double(m);
+    mphasem1.im = phasem.im * double(m);
+    phasem = phasem * phase;
+
+    for (int l = m; l <= lmax; l++) {
+      // const int idx = l * (l + 1) + m;
+
+      if (l == m) {
+        plm_idx = cl(l) * plm_mm1_mm1; // (m+1, m)
+        plm_mm1_mm1 = plm_idx;
+      } else if (l == (m + 1)) {
+        const double t = dl(l) * plm_mm1_mm1; // (m - 1, m - 1)
+        plm_idx = t * rz; // (m, m)
+      } else {
+        plm_idx = alm(idx_sph) * (rz * plm_idx1 + blm(idx_sph) * plm_idx2);
       }
+
+      ylm.re = phasem.re * plm_idx;
+      ylm.im = phasem.im * plm_idx;
+
+      for (int n = 0; n < nradmax; n++) {
+        Kokkos::atomic_add(&A_sph(ii, mu_j, idx_sph, n).re, fr(ii, jj, l, n) * ylm.re);
+        Kokkos::atomic_add(&A_sph(ii, mu_j, idx_sph, n).im, fr(ii, jj, l, n) * ylm.im);
+      }
+
+      plm_idx2 = plm_idx1;
+      plm_idx1 = plm_idx;
+
+      idx_sph++;
     }
   }
 
@@ -891,17 +1043,35 @@ template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void PairPACEKokkos<DeviceType>::operator() (TagPairPACEConjugateAi, const int& ii) const
 {
-  //complex conjugate A's (for NEGATIVE (-m) terms)
-  // for rank > 1
   for (int mu_j = 0; mu_j < nelements; mu_j++) {
-    for (int n = 0; n < nradmax; n++) {
-      for (int l = 0; l <= lmax; l++) {
+
+    // transpose
+
+    int idx_sph = 0;
+
+    for (int m = 0; m <= lmax; m++) {
+      for (int l = m; l <= lmax; l++) {
+        const int idx = l * (l + 1) + m;
+        for (int n = 0; n < nradmax; n++) {
+          A(ii, mu_j, idx, n) = A_sph(ii, mu_j, idx_sph, n);
+        }
+
+        idx_sph++;
+      }
+    }
+
+    // complex conjugate A's (for NEGATIVE (-m) terms)
+    //  for rank > 1
+
+    for (int l = 0; l <= lmax; l++) {
         //fill in -m part in the outer loop using the same m <-> -m symmetry as for Ylm
-        for (int m = 1; m <= l; m++) {
-          const int idx = l * (l + 1) + m; // (l, m)
-          const int idxm = l * (l + 1) - m; // (l, -m)
-          const int factor = m % 2 == 0 ? 1 : -1;
-          A(ii, mu_j, n, idxm) = A(ii, mu_j, n, idx).conj() * (double)factor;
+      for (int m = 1; m <= l; m++) {
+        const int idx = l * (l + 1) + m; // (l, m)
+        const int idxm = l * (l + 1) - m; // (l, -m)
+        const int idx_sph = d_idx_sph(idx);
+        const int factor = m % 2 == 0 ? 1 : -1;
+        for (int n = 0; n < nradmax; n++) {
+          A(ii, mu_j, idxm, n) = A_sph(ii, mu_j, idx_sph, n).conj() * (double)factor;
         }
       }
     }
@@ -914,70 +1084,69 @@ template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void PairPACEKokkos<DeviceType>::operator() (TagPairPACEComputeRho, const int& iter) const
 {
-  const int idx_rho = iter / chunk_size;
+  const int idx_ms_combs = iter / chunk_size;
   const int ii = iter % chunk_size;
 
   const int i = d_ilist[ii + chunk_offset];
   const int mu_i = d_map(type(i));
 
-  if (idx_rho >= d_idx_rho_count(mu_i)) return;
+  if (idx_ms_combs >= d_idx_ms_combs_count(mu_i)) return;
 
   const int ndensity = d_ndensity(mu_i);
 
-  const int offset = d_offsets(mu_i, idx_rho);
-  const int rank = d_rank(mu_i, offset);
+  const int idx_func = d_idx_funcs(mu_i, idx_ms_combs);
+  const int rank = d_rank(mu_i, idx_func);
   const int r = rank - 1;
 
   // Basis functions B with iterative product and density rho(p) calculation
   if (rank == 1) {
-    const int mu = d_mus(mu_i, offset, 0);
-    const int n = d_ns(mu_i, offset, 0);
+    const int mu = d_mus(mu_i, idx_func, 0);
+    const int n = d_ns(mu_i, idx_func, 0);
     double A_cur = A_rank1(ii, mu, n - 1);
     for (int p = 0; p < ndensity; ++p) {
       //for rank=1 (r=0) only 1 ms-combination exists (ms_ind=0), so index of func.ctildes is 0..ndensity-1
-      Kokkos::atomic_add(&rhos(ii, p), d_ctildes(mu_i, idx_rho, p) * A_cur);
+      Kokkos::atomic_add(&rhos(ii, p), d_ctildes(mu_i, idx_ms_combs, p) * A_cur);
     }
   } else { // rank > 1
     // loop over {ms} combinations in sum
 
     // loop over m, collect B  = product of A with given ms
-    A_forward_prod(ii, idx_rho, 0) = complex::one();
+    A_forward_prod(ii, idx_ms_combs, 0) = complex::one();
 
     // fill forward A-product triangle
     for (int t = 0; t < rank; t++) {
       //TODO: optimize ns[t]-1 -> ns[t] during functions construction
-      const int mu = d_mus(mu_i, offset, t);
-      const int n = d_ns(mu_i, offset, t);
-      const int l = d_ls(mu_i, offset, t);
-      const int m = d_ms_combs(mu_i, idx_rho, t); // current ms-combination (of length = rank)
+      const int mu = d_mus(mu_i, idx_func, t);
+      const int n = d_ns(mu_i, idx_func, t);
+      const int l = d_ls(mu_i, idx_func, t);
+      const int m = d_ms_combs(mu_i, idx_ms_combs, t); // current ms-combination (of length = rank)
       const int idx = l * (l + 1) + m; // (l, m)
-      A_list(ii, idx_rho, t) = A(ii, mu, n - 1, idx);
-      A_forward_prod(ii, idx_rho, t + 1) = A_forward_prod(ii, idx_rho, t) * A_list(ii, idx_rho, t);
+      A_list(ii, idx_ms_combs, t) = A(ii, mu, idx, n - 1);
+      A_forward_prod(ii, idx_ms_combs, t + 1) = A_forward_prod(ii, idx_ms_combs, t) * A_list(ii, idx_ms_combs, t);
     }
 
     complex A_backward_prod = complex::one();
 
     // fill backward A-product triangle
     for (int t = r; t >= 1; t--) {
-      const complex dB = A_forward_prod(ii, idx_rho, t) * A_backward_prod; // dB - product of all A's except t-th
-      dB_flatten(ii, idx_rho, t) = dB;
+      const complex dB = A_forward_prod(ii, idx_ms_combs, t) * A_backward_prod; // dB - product of all A's except t-th
+      dB_flatten(ii, idx_ms_combs, t) = dB;
 
-      A_backward_prod = A_backward_prod * A_list(ii, idx_rho, t);
+      A_backward_prod = A_backward_prod * A_list(ii, idx_ms_combs, t);
     }
-    dB_flatten(ii, idx_rho, 0) = A_forward_prod(ii, idx_rho, 0) * A_backward_prod;
+    dB_flatten(ii, idx_ms_combs, 0) = A_forward_prod(ii, idx_ms_combs, 0) * A_backward_prod;
 
-    const complex B = A_forward_prod(ii, idx_rho, rank);
+    const complex B = A_forward_prod(ii, idx_ms_combs, rank);
 
     for (int p = 0; p < ndensity; ++p) {
       // real-part only multiplication
-      Kokkos::atomic_add(&rhos(ii, p), B.real_part_product(d_ctildes(mu_i, idx_rho, p)));
+      Kokkos::atomic_add(&rhos(ii, p), B.real_part_product(d_ctildes(mu_i, idx_ms_combs, p)));
     }
   }
 }
 
 /* ---------------------------------------------------------------------- */
 
-
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void PairPACEKokkos<DeviceType>::operator() (TagPairPACEComputeFS, const int& ii) const
@@ -990,23 +1159,43 @@ void PairPACEKokkos<DeviceType>::operator() (TagPairPACEComputeFS, const int& ii
   const int ndensity = d_ndensity(mu_i);
 
   double evdwl, fcut, dfcut;
+  double evdwl_cut;
   evdwl = fcut = dfcut = 0.0;
 
-  inner_cutoff(rho_core(ii), rho_cut, drho_cut, fcut, dfcut);
   FS_values_and_derivatives(ii, evdwl, mu_i);
 
-  dF_drho_core(ii) = evdwl * dfcut + 1;
+  if (is_zbl) {
+    if (d_jj_min(ii) != -1) {
+      const int mu_jmin = d_mu(ii,d_jj_min(ii));
+      F_FLOAT dcutin = d_dcut_in(mu_i, mu_jmin);
+      F_FLOAT transition_coordinate =  dcutin  - d_d_min(ii); // == cutin - r_min
+      cutoff_func_poly(transition_coordinate, dcutin, dcutin, fcut, dfcut);
+      dfcut = -dfcut; // invert, because rho_core = cutin - r_min
+    } else {
+      // no neighbours
+      fcut = 1;
+      dfcut = 0;
+    }
+    evdwl_cut = evdwl * fcut + rho_core(ii) * (1 - fcut); // evdwl * fcut + rho_core_uncut  - rho_core_uncut* fcut
+    dF_drho_core(ii) = 1 - fcut;
+    dF_dfcut(ii) = evdwl * dfcut - rho_core(ii) * dfcut;
+  } else {
+    inner_cutoff(rho_core(ii), rho_cut, drho_cut, fcut, dfcut);
+    dF_drho_core(ii) = evdwl * dfcut + 1;
+    evdwl_cut = evdwl * fcut + rho_core(ii);
+  }
   for (int p = 0; p < ndensity; ++p)
     dF_drho(ii, p) *= fcut;
 
-
   // tally energy contribution
   if (eflag) {
-    double evdwl_cut = evdwl * fcut + rho_core(ii);
     // E0 shift
     evdwl_cut += d_E0vals(mu_i);
     e_atom(ii) = evdwl_cut;
   }
+
+  if (flag_corerep_factor)
+    d_corerep(ii) = 1-fcut;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -1015,52 +1204,58 @@ template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void PairPACEKokkos<DeviceType>::operator() (TagPairPACEComputeWeights, const int& iter) const
 {
-  const int idx_rho = iter / chunk_size;
+  const int idx_ms_combs = iter / chunk_size;
   const int ii = iter % chunk_size;
 
   const int i = d_ilist[ii + chunk_offset];
   const int mu_i = d_map(type(i));
 
-  if (idx_rho >= d_idx_rho_count(mu_i)) return;
+  if (idx_ms_combs >= d_idx_ms_combs_count(mu_i)) return;
 
   const int ndensity = d_ndensity(mu_i);
 
-  const int offset = d_offsets(mu_i, idx_rho);
-  const int rank = d_rank(mu_i, offset);
+  const int idx_func = d_idx_funcs(mu_i, idx_ms_combs);
+  const int rank = d_rank(mu_i, idx_func);
 
   // Weights and theta calculation
 
   if (rank == 1) {
-    const int mu = d_mus(mu_i, offset, 0);
-    const int n = d_ns(mu_i, offset, 0);
+    const int mu = d_mus(mu_i, idx_func, 0);
+    const int n = d_ns(mu_i, idx_func, 0);
     double theta = 0.0;
     for (int p = 0; p < ndensity; ++p) {
       // for rank=1 (r=0) only 1 ms-combination exists (ms_ind=0), so index of func.ctildes is 0..ndensity-1
-      theta += dF_drho(ii, p) * d_ctildes(mu_i, idx_rho, p);
+      theta += dF_drho(ii, p) * d_ctildes(mu_i, idx_ms_combs, p);
     }
     Kokkos::atomic_add(&weights_rank1(ii, mu, n - 1), theta);
   } else { // rank > 1
     double theta = 0.0;
     for (int p = 0; p < ndensity; ++p)
-      theta += dF_drho(ii, p) * d_ctildes(mu_i, idx_rho, p);
+      theta += dF_drho(ii, p) * d_ctildes(mu_i, idx_ms_combs, p);
 
     theta *= 0.5; // 0.5 factor due to possible double counting ???
     for (int t = 0; t < rank; ++t) {
-      const int m_t = d_ms_combs(mu_i, idx_rho, t);
+      const int m_t = d_ms_combs(mu_i, idx_ms_combs, t);
       const int factor = (m_t % 2 == 0 ? 1 : -1);
-      const complex dB = dB_flatten(ii, idx_rho, t);
-      const int mu_t = d_mus(mu_i, offset, t);
-      const int n_t = d_ns(mu_i, offset, t);
-      const int l_t = d_ls(mu_i, offset, t);
+      const complex dB = dB_flatten(ii, idx_ms_combs, t);
+      const int mu_t = d_mus(mu_i, idx_func, t);
+      const int n_t = d_ns(mu_i, idx_func, t);
+      const int l_t = d_ls(mu_i, idx_func, t);
       const int idx = l_t * (l_t + 1) + m_t; // (l, m)
-      const complex value = theta * dB;
-      Kokkos::atomic_add(&(weights(ii, mu_t, n_t - 1, idx).re), value.re);
-      Kokkos::atomic_add(&(weights(ii, mu_t, n_t - 1, idx).im), value.im);
+      const int idx_sph = d_idx_sph(idx);
+      if (idx_sph >= 0) {
+        const complex value = theta * dB;
+        Kokkos::atomic_add(&(weights(ii, mu_t, idx_sph, n_t - 1).re), value.re);
+        Kokkos::atomic_add(&(weights(ii, mu_t, idx_sph, n_t - 1).im), value.im);
+      }
       // update -m_t (that could also be positive), because the basis is half_basis
       const int idxm = l_t * (l_t + 1) - m_t; // (l, -m)
-      const complex valuem = theta * dB.conj() * (double)factor;
-      Kokkos::atomic_add(&(weights(ii, mu_t, n_t - 1, idxm).re), valuem.re);
-      Kokkos::atomic_add(&(weights(ii, mu_t, n_t - 1, idxm).im), valuem.im);
+      const int idxm_sph = d_idx_sph(idxm);
+      if (idxm_sph >= 0) {
+        const complex valuem = theta * dB.conj() * (double)factor;
+        Kokkos::atomic_add(&(weights(ii, mu_t, idxm_sph, n_t - 1).re), valuem.re);
+        Kokkos::atomic_add(&(weights(ii, mu_t, idxm_sph, n_t - 1).im), valuem.im);
+      }
     }
   }
 }
@@ -1107,37 +1302,239 @@ void PairPACEKokkos<DeviceType>::operator() (TagPairPACEComputeDerivative, const
   }
 
   // for rank > 1
-  for (int n = 0; n < nradmax; n++) {
-    for (int l = 0; l <= lmax; l++) {
-      const double R_over_r = fr(ii, jj, n, l) * rinv;
-      const double DR = dfr(ii, jj, n, l);
 
-      // for m >= 0
-      for (int m = 0; m <= l; m++) {
-        const int idx = l * (l + 1) + m; // (l, m)
-        complex w = weights(ii, mu_j, n, idx);
+  // compute plm, dplm, ylm and dylm
+  // requires rx^2 + ry^2 + rz^2 = 1 , NO CHECKING IS PERFORMED !!!!!!!!!
+  // requires -1 <= rz <= 1 , NO CHECKING IS PERFORMED !!!!!!!!!
+  // prefactors include 1/sqrt(2) factor compared to reference
+
+  complex ylm,dylm[3];
+  complex phase;
+  complex phasem, mphasem1;
+  complex dyx, dyy, dyz;
+  complex rdy;
+
+  const double rx = d_rhats(ii, jj, 0);
+  const double ry = d_rhats(ii, jj, 1);
+  const double rz = d_rhats(ii, jj, 2);
+
+  phase.re = rx;
+  phase.im = ry;
+
+  double plm_idx,plm_idx1,plm_idx2;
+  double dplm_idx,dplm_idx1,dplm_idx2;
+
+  plm_idx = plm_idx1 = plm_idx2 = 0.0;
+  dplm_idx = dplm_idx1 = dplm_idx2 = 0.0;
+
+  int idx_sph = 0;
+
+  // m = 0
+  for (int l = 0; l <= lmax; l++) {
+    // const int idx = l * (l + 1);
+
+    if (l == 0) {
+      // l=0, m=0
+      // plm[0] = Y00/sq1o4pi; //= sq1o4pi;
+      plm_idx = Y00; //= 1;
+      dplm_idx = 0.0;
+    } else if (l == 1) {
+      // l=1, m=0
+      plm_idx = Y00 * sq3 * rz;
+      dplm_idx = Y00 * sq3;
+    } else {
+      // l>=2, m=0
+      plm_idx = alm(idx_sph) * (rz * plm_idx1 + blm(idx_sph) * plm_idx2);
+      dplm_idx = alm(idx_sph) * (plm_idx1 + rz * dplm_idx1 + blm(idx_sph) * dplm_idx2);
+    }
+
+    ylm.re = plm_idx;
+    ylm.im = 0.0;
+
+    dyz.re = dplm_idx;
+    rdy.re = dyz.re * rz;
+
+    dylm[0].re = -rdy.re * rx;
+    dylm[0].im = 0.0;
+    dylm[1].re = -rdy.re * ry;
+    dylm[1].im = 0.0;
+    dylm[2].re = dyz.re - rdy.re * rz;
+    dylm[2].im = 0;
+
+    for (int n = 0; n < nradmax; n++) {
+
+      const double R_over_r = fr(ii, jj, l, n) * rinv;
+      const double DR = dfr(ii, jj, l, n);
+      const complex Y_DR = ylm * DR;
+
+      complex w = weights(ii, mu_j, idx_sph, n);
+      if (w.re == 0.0 && w.im == 0.0) continue;
+
+      complex grad_phi_nlm[3];
+      grad_phi_nlm[0] = Y_DR * r_hat[0] + dylm[0] * R_over_r;
+      grad_phi_nlm[1] = Y_DR * r_hat[1] + dylm[1] * R_over_r;
+      grad_phi_nlm[2] = Y_DR * r_hat[2] + dylm[2] * R_over_r;
+      // real-part multiplication only
+      f_ji[0] += w.real_part_product(grad_phi_nlm[0]);
+      f_ji[1] += w.real_part_product(grad_phi_nlm[1]);
+      f_ji[2] += w.real_part_product(grad_phi_nlm[2]);
+    }
+
+    plm_idx2 = plm_idx1;
+    dplm_idx2 = dplm_idx1;
+
+    plm_idx1 = plm_idx;
+    dplm_idx1 = dplm_idx;
+
+    idx_sph++;
+  }
+
+  plm_idx = plm_idx1 = plm_idx2 = 0.0;
+  dplm_idx = dplm_idx1 = dplm_idx2 = 0.0;
+
+  // m = 1
+  for (int l = 1; l <= lmax; l++) {
+    // const int idx = l * (l + 1) + 1; // (l, 1)
+
+    if (l == 1) {
+      // l=1, m=1
+      plm_idx = -sq3o2 * Y00;
+      dplm_idx = 0.0;
+    } else if (l == 2) {
+      const double t = dl(l) * plm_idx1;
+      plm_idx = t * rz;
+      dplm_idx = t;
+    } else {
+      plm_idx = alm(idx_sph) * (rz * plm_idx1 + blm(idx_sph) * plm_idx2);
+      dplm_idx = alm(idx_sph) * (plm_idx1 + rz * dplm_idx1 + blm(idx_sph) * dplm_idx2);
+    }
+
+    ylm = phase * plm_idx;
+
+    dyx.re = plm_idx;
+    dyx.im = 0.0;
+    dyy.re = 0.0;
+    dyy.im = plm_idx;
+    dyz.re = phase.re * dplm_idx;
+    dyz.im = phase.im * dplm_idx;
+
+    rdy.re = rx * dyx.re + +rz * dyz.re;
+    rdy.im = ry * dyy.im + rz * dyz.im;
+
+    dylm[0].re = dyx.re - rdy.re * rx;
+    dylm[0].im = -rdy.im * rx;
+    dylm[1].re = -rdy.re * ry;
+    dylm[1].im = dyy.im - rdy.im * ry;
+    dylm[2].re = dyz.re - rdy.re * rz;
+    dylm[2].im = dyz.im - rdy.im * rz;
+
+    for (int n = 0; n < nradmax; n++) {
+
+      const double R_over_r = fr(ii, jj, l, n) * rinv;
+      const double DR = dfr(ii, jj, l, n);
+      const complex Y_DR = ylm * DR;
+
+      complex w = weights(ii, mu_j, idx_sph, n);
+      if (w.re == 0.0 && w.im == 0.0) continue;
+      // counting for -m cases if m > 0
+      w.re *= 2.0;
+      w.im *= 2.0;
+
+      complex grad_phi_nlm[3];
+      grad_phi_nlm[0] = Y_DR * r_hat[0] + dylm[0] * R_over_r;
+      grad_phi_nlm[1] = Y_DR * r_hat[1] + dylm[1] * R_over_r;
+      grad_phi_nlm[2] = Y_DR * r_hat[2] + dylm[2] * R_over_r;
+      // real-part multiplication only
+      f_ji[0] += w.real_part_product(grad_phi_nlm[0]);
+      f_ji[1] += w.real_part_product(grad_phi_nlm[1]);
+      f_ji[2] += w.real_part_product(grad_phi_nlm[2]);
+    }
+
+    plm_idx2 = plm_idx1;
+    dplm_idx2 = dplm_idx1;
+
+    plm_idx1 = plm_idx;
+    dplm_idx1 = dplm_idx;
+
+    idx_sph++;
+  }
+
+  plm_idx = plm_idx1 = plm_idx2 = 0.0;
+  dplm_idx = dplm_idx1 = dplm_idx2 = 0.0;
+
+  double plm_mm1_mm1 = -sq3o2 * Y00; // (1, 1)
+
+  // m > 1
+  phasem = phase;
+  for (int m = 2; m <= lmax; m++) {
+
+    mphasem1.re = phasem.re * double(m);
+    mphasem1.im = phasem.im * double(m);
+    phasem = phasem * phase;
+
+    for (int l = m; l <= lmax; l++) {
+      // const int idx = l * (l + 1) + m;
+
+      if (l == m) {
+        plm_idx = cl(l) * plm_mm1_mm1; // (m+1, m)
+        dplm_idx = 0.0;
+        plm_mm1_mm1 = plm_idx;
+      } else if (l == (m + 1)) {
+        const double t = dl(l) * plm_mm1_mm1; // (m - 1, m - 1)
+        plm_idx = t * rz; // (m, m)
+        dplm_idx = t;
+      } else {
+        plm_idx = alm(idx_sph) * (rz * plm_idx1 + blm(idx_sph) * plm_idx2);
+        dplm_idx = alm(idx_sph) * (plm_idx1 + rz * dplm_idx1 + blm(idx_sph) * dplm_idx2);
+      }
+
+      ylm.re = phasem.re * plm_idx;
+      ylm.im = phasem.im * plm_idx;
+
+      dyx = mphasem1 * plm_idx;
+      dyy.re = -dyx.im;
+      dyy.im = dyx.re;
+      dyz = phasem * dplm_idx;
+
+      rdy.re = rx * dyx.re + ry * dyy.re + rz * dyz.re;
+      rdy.im = rx * dyx.im + ry * dyy.im + rz * dyz.im;
+
+      dylm[0].re = dyx.re - rdy.re * rx;
+      dylm[0].im = dyx.im - rdy.im * rx;
+      dylm[1].re = dyy.re - rdy.re * ry;
+      dylm[1].im = dyy.im - rdy.im * ry;
+      dylm[2].re = dyz.re - rdy.re * rz;
+      dylm[2].im = dyz.im - rdy.im * rz;
+
+      for (int n = 0; n < nradmax; n++) {
+
+        const double R_over_r = fr(ii, jj, l, n) * rinv;
+        const double DR = dfr(ii, jj, l, n);
+        const complex Y_DR = ylm * DR;
+
+        complex w = weights(ii, mu_j, idx_sph, n);
         if (w.re == 0.0 && w.im == 0.0) continue;
         // counting for -m cases if m > 0
-        if (m > 0) {
-          w.re *= 2.0;
-          w.im *= 2.0;
-        }
-
-        complex DY[3];
-        DY[0] = dylm(ii, jj, idx, 0);
-        DY[1] = dylm(ii, jj, idx, 1);
-        DY[2] = dylm(ii, jj, idx, 2);
-        const complex Y_DR = ylm(ii, jj, idx) * DR;
+        w.re *= 2.0;
+        w.im *= 2.0;
 
         complex grad_phi_nlm[3];
-        grad_phi_nlm[0] = Y_DR * r_hat[0] + DY[0] * R_over_r;
-        grad_phi_nlm[1] = Y_DR * r_hat[1] + DY[1] * R_over_r;
-        grad_phi_nlm[2] = Y_DR * r_hat[2] + DY[2] * R_over_r;
+        grad_phi_nlm[0] = Y_DR * r_hat[0] + dylm[0] * R_over_r;
+        grad_phi_nlm[1] = Y_DR * r_hat[1] + dylm[1] * R_over_r;
+        grad_phi_nlm[2] = Y_DR * r_hat[2] + dylm[2] * R_over_r;
         // real-part multiplication only
         f_ji[0] += w.real_part_product(grad_phi_nlm[0]);
         f_ji[1] += w.real_part_product(grad_phi_nlm[1]);
         f_ji[2] += w.real_part_product(grad_phi_nlm[2]);
       }
+
+      plm_idx2 = plm_idx1;
+      dplm_idx2 = dplm_idx1;
+
+      plm_idx1 = plm_idx;
+      dplm_idx1 = dplm_idx;
+
+      idx_sph++;
     }
   }
 
@@ -1146,6 +1543,15 @@ void PairPACEKokkos<DeviceType>::operator() (TagPairPACEComputeDerivative, const
   f_ij(ii, jj, 0) = scale * f_ji[0] + fpair * r_hat[0];
   f_ij(ii, jj, 1) = scale * f_ji[1] + fpair * r_hat[1];
   f_ij(ii, jj, 2) = scale * f_ji[2] + fpair * r_hat[2];
+
+  if (is_zbl) {
+    if (jj==d_jj_min(ii)) {
+      // DCRU = 1.0
+      f_ij(ii, jj, 0) += dF_dfcut(ii) * r_hat[0];
+      f_ij(ii, jj, 1) += dF_dfcut(ii) * r_hat[1];
+      f_ij(ii, jj, 2) += dF_dfcut(ii) * r_hat[2];
+    }
+  }
 }
 
 /* ---------------------------------------------------------------------- */
@@ -1266,31 +1672,46 @@ void PairPACEKokkos<DeviceType>::v_tally_xyz(EV_FLOAT &ev, const int &i, const i
 template<class DeviceType>
 void PairPACEKokkos<DeviceType>::pre_compute_harmonics(int lmax)
 {
+  auto h_idx_sph = Kokkos::create_mirror_view(d_idx_sph);
   auto h_alm = Kokkos::create_mirror_view(alm);
   auto h_blm = Kokkos::create_mirror_view(blm);
   auto h_cl = Kokkos::create_mirror_view(cl);
   auto h_dl = Kokkos::create_mirror_view(dl);
 
-  for (int l = 1; l <= lmax; l++) {
-    const double lsq = l * l;
-    const double ld = 2 * l;
-    const double l1 = (4 * lsq - 1);
-    const double l2 = lsq - ld + 1;
-    for (int m = 0; m < l - 1; m++) {
-      const double msq = m * m;
-      const double a = sqrt((double(l1)) / (double(lsq - msq)));
-      const double b = -sqrt((double(l2 - msq)) / (double(4 * l2 - 1)));
+  Kokkos::deep_copy(h_idx_sph,-1);
+
+  int idx_sph = 0;
+  for (int m = 0; m <= lmax; m++) {
+    const double msq = m * m;
+    for (int l = m; l <= lmax; l++) {
       const int idx = l * (l + 1) + m; // (l, m)
-      h_alm(idx) = a;
-      h_blm(idx) = b;
+      h_idx_sph(idx) = idx_sph;
+
+      double a = 0.0;
+      double b = 0.0;
+
+      if (l > 1 && l != m) {
+        const double lsq = l * l;
+        const double ld = 2 * l;
+        const double l1 = (4 * lsq - 1);
+        const double l2 = lsq - ld + 1;
+
+        a = sqrt((double(l1)) / (double(lsq - msq)));
+        b = -sqrt((double(l2 - msq)) / (double(4 * l2 - 1)));
+      }
+      h_alm(idx_sph) = a;
+      h_blm(idx_sph) = b;
+      idx_sph++;
     }
   }
+  idx_sph_max = idx_sph;
 
   for (int l = 1; l <= lmax; l++) {
     h_cl(l) = -sqrt(1.0 + 0.5 / (double(l)));
     h_dl(l) = sqrt(double(2 * (l - 1) + 3));
   }
 
+  Kokkos::deep_copy(d_idx_sph, h_idx_sph);
   Kokkos::deep_copy(alm, h_alm);
   Kokkos::deep_copy(blm, h_blm);
   Kokkos::deep_copy(cl, h_cl);
@@ -1299,143 +1720,6 @@ void PairPACEKokkos<DeviceType>::pre_compute_harmonics(int lmax)
 
 /* ---------------------------------------------------------------------- */
 
-template<class DeviceType>
-KOKKOS_INLINE_FUNCTION
-void PairPACEKokkos<DeviceType>::compute_barplm(int ii, int jj, double rz, int lmax) const
-{
-  // requires -1 <= rz <= 1 , NO CHECKING IS PERFORMED !!!!!!!!!
-  // prefactors include 1/sqrt(2) factor compared to reference
-
-  // l=0, m=0
-  // plm(ii, jj, 0, 0) = Y00/sq1o4pi; //= sq1o4pi;
-  plm(ii, jj, 0) = Y00; //= 1;
-  dplm(ii, jj, 0) = 0.0;
-
-  if (lmax > 0) {
-
-    // l=1, m=0
-    plm(ii, jj, 2) = Y00 * sq3 * rz;
-    dplm(ii, jj, 2) = Y00 * sq3;
-
-    // l=1, m=1
-    plm(ii, jj, 3) = -sq3o2 * Y00;
-    dplm(ii, jj, 3) = 0.0;
-
-    // loop l = 2, lmax
-    for (int l = 2; l <= lmax; l++) {
-      for (int m = 0; m < l - 1; m++) {
-        const int idx = l * (l + 1) + m; // (l, m)
-        const int idx1 = (l - 1) * l + m; // (l - 1, m)
-        const int idx2 = (l - 2) * (l - 1) + m; // (l - 2, m)
-        plm(ii, jj, idx) = alm(idx) * (rz * plm(ii, jj, idx1) + blm(idx) * plm(ii, jj, idx2));
-        dplm(ii, jj, idx) = alm(idx) * (plm(ii, jj, idx1) + rz * dplm(ii, jj, idx1) + blm(idx) * dplm(ii, jj, idx2));
-      }
-      const int idx = l * (l + 1) + l; // (l, l)
-      const int idx1 = l * (l + 1) + l - 1; // (l, l - 1)
-      const int idx2 = (l - 1) * l + l - 1; // (l - 1, l - 1)
-      const double t = dl(l) * plm(ii, jj, idx2);
-      plm(ii, jj, idx1) = t * rz;
-      dplm(ii, jj, idx1) = t;
-      plm(ii, jj, idx) = cl(l) * plm(ii, jj, idx2);
-      dplm(ii, jj, idx) = 0.0;
-    }
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-
-template<class DeviceType>
-KOKKOS_INLINE_FUNCTION
-void PairPACEKokkos<DeviceType>::compute_ylm(int ii, int jj, double rx, double ry, double rz, int lmax) const
-{
-  // requires rx^2 + ry^2 + rz^2 = 1 , NO CHECKING IS PERFORMED !!!!!!!!!
-
-  complex phase;
-  complex phasem, mphasem1;
-  complex dyx, dyy, dyz;
-  complex rdy;
-
-  phase.re = rx;
-  phase.im = ry;
-
-  // compute barplm
-  compute_barplm(ii, jj, rz, lmax);
-
-  // m = 0
-  for (int l = 0; l <= lmax; l++) {
-    const int idx = l * (l + 1);
-
-    ylm(ii, jj, idx).re = plm(ii, jj, idx);
-    ylm(ii, jj, idx).im = 0.0;
-
-    dyz.re = dplm(ii, jj, idx);
-    rdy.re = dyz.re * rz;
-
-    dylm(ii, jj, idx, 0).re = -rdy.re * rx;
-    dylm(ii, jj, idx, 0).im = 0.0;
-    dylm(ii, jj, idx, 1).re = -rdy.re * ry;
-    dylm(ii, jj, idx, 1).im = 0.0;
-    dylm(ii, jj, idx, 2).re = dyz.re - rdy.re * rz;
-    dylm(ii, jj, idx, 2).im = 0;
-  }
-  // m = 1
-  for (int l = 1; l <= lmax; l++) {
-    const int idx = l * (l + 1) + 1;
-
-    ylm(ii, jj, idx) = phase * plm(ii, jj, idx);
-
-    dyx.re = plm(ii, jj, idx);
-    dyx.im = 0.0;
-    dyy.re = 0.0;
-    dyy.im = plm(ii, jj, idx);
-    dyz.re = phase.re * dplm(ii, jj, idx);
-    dyz.im = phase.im * dplm(ii, jj, idx);
-
-    rdy.re = rx * dyx.re + +rz * dyz.re;
-    rdy.im = ry * dyy.im + rz * dyz.im;
-
-    dylm(ii, jj, idx, 0).re = dyx.re - rdy.re * rx;
-    dylm(ii, jj, idx, 0).im = -rdy.im * rx;
-    dylm(ii, jj, idx, 1).re = -rdy.re * ry;
-    dylm(ii, jj, idx, 1).im = dyy.im - rdy.im * ry;
-    dylm(ii, jj, idx, 2).re = dyz.re - rdy.re * rz;
-    dylm(ii, jj, idx, 2).im = dyz.im - rdy.im * rz;
-  }
-
-  // m > 1
-  phasem = phase;
-  for (int m = 2; m <= lmax; m++) {
-
-    mphasem1.re = phasem.re * double(m);
-    mphasem1.im = phasem.im * double(m);
-    phasem = phasem * phase;
-
-    for (int l = m; l <= lmax; l++) {
-      const int idx = l * (l + 1) + m;
-
-      ylm(ii, jj, idx).re = phasem.re * plm(ii, jj, idx);
-      ylm(ii, jj, idx).im = phasem.im * plm(ii, jj, idx);
-
-      dyx = mphasem1 * plm(ii, jj, idx);
-      dyy.re = -dyx.im;
-      dyy.im = dyx.re;
-      dyz = phasem * dplm(ii, jj, idx);
-
-      rdy.re = rx * dyx.re + ry * dyy.re + rz * dyz.re;
-      rdy.im = rx * dyx.im + ry * dyy.im + rz * dyz.im;
-
-      dylm(ii, jj, idx, 0).re = dyx.re - rdy.re * rx;
-      dylm(ii, jj, idx, 0).im = dyx.im - rdy.im * rx;
-      dylm(ii, jj, idx, 1).re = dyy.re - rdy.re * ry;
-      dylm(ii, jj, idx, 1).im = dyy.im - rdy.im * ry;
-      dylm(ii, jj, idx, 2).re = dyz.re - rdy.re * rz;
-      dylm(ii, jj, idx, 2).im = dyz.im - rdy.im * rz;
-    }
-  }
-}
-
-/* ---------------------------------------------------------------------- */
-
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void PairPACEKokkos<DeviceType>::cutoff_func_poly(const double r, const double r_in, const double delta_in, double &fc, double &dfc) const
@@ -1564,11 +1848,11 @@ void PairPACEKokkos<DeviceType>::evaluate_splines(const int ii, const int jj, do
   spline_gk.calcSplines(ii, jj, r, gr, dgr);
 
   spline_rnl.calcSplines(ii, jj, r, d_values, d_derivatives);
-  for (int kk = 0; kk < (int)fr.extent(2); kk++) {
-    for (int ll = 0; ll < (int)fr.extent(3); ll++) {
-      const int flatten = kk*fr.extent(3) + ll;
-      fr(ii, jj, kk, ll) = d_values(ii, jj, flatten);
-      dfr(ii, jj, kk, ll) = d_derivatives(ii, jj, flatten);
+  for (int ll = 0; ll < (int)fr.extent(2); ll++) {
+    for (int kk = 0; kk < (int)fr.extent(3); kk++) {
+      const int flatten = kk*fr.extent(2) + ll;
+      fr(ii, jj, ll, kk) = d_values(ii, jj, flatten);
+      dfr(ii, jj, ll, kk) = d_derivatives(ii, jj, flatten);
     }
   }
 
@@ -1588,7 +1872,7 @@ void PairPACEKokkos<DeviceType>::SplineInterpolatorKokkos::operator=(const Splin
     rscalelookup = spline.rscalelookup;
     num_of_functions = spline.num_of_functions;
 
-    lookupTable = t_ace_3d4("lookupTable", ntot+1, num_of_functions);
+    lookupTable = t_ace_3d4_lr("lookupTable", ntot+1, num_of_functions);
     auto h_lookupTable = Kokkos::create_mirror_view(lookupTable);
     for (int i = 0; i < ntot+1; i++)
         for (int j = 0; j < num_of_functions; j++)
@@ -1683,6 +1967,8 @@ double PairPACEKokkos<DeviceType>::memory_usage()
   bytes += MemKK::memory_usage(weights_rank1);
   bytes += MemKK::memory_usage(rho_core);
   bytes += MemKK::memory_usage(dF_drho_core);
+  bytes += MemKK::memory_usage(dF_dfcut);
+  bytes += MemKK::memory_usage(d_corerep);
   bytes += MemKK::memory_usage(dB_flatten);
   bytes += MemKK::memory_usage(fr);
   bytes += MemKK::memory_usage(dfr);
@@ -1692,14 +1978,12 @@ double PairPACEKokkos<DeviceType>::memory_usage()
   bytes += MemKK::memory_usage(d_derivatives);
   bytes += MemKK::memory_usage(cr);
   bytes += MemKK::memory_usage(dcr);
-  bytes += MemKK::memory_usage(plm);
-  bytes += MemKK::memory_usage(dplm);
-  bytes += MemKK::memory_usage(ylm);
-  bytes += MemKK::memory_usage(dylm);
   bytes += MemKK::memory_usage(d_ncount);
   bytes += MemKK::memory_usage(d_mu);
   bytes += MemKK::memory_usage(d_rhats);
   bytes += MemKK::memory_usage(d_rnorms);
+  bytes += MemKK::memory_usage(d_d_min);
+  bytes += MemKK::memory_usage(d_jj_min);
   bytes += MemKK::memory_usage(d_nearest);
   bytes += MemKK::memory_usage(f_ij);
   bytes += MemKK::memory_usage(d_rho_core_cutoff);
@@ -1709,10 +1993,10 @@ double PairPACEKokkos<DeviceType>::memory_usage()
   bytes += MemKK::memory_usage(d_npoti);
   bytes += MemKK::memory_usage(d_wpre);
   bytes += MemKK::memory_usage(d_mexp);
-  bytes += MemKK::memory_usage(d_idx_rho_count);
+  bytes += MemKK::memory_usage(d_idx_ms_combs_count);
   bytes += MemKK::memory_usage(d_rank);
   bytes += MemKK::memory_usage(d_num_ms_combs);
-  bytes += MemKK::memory_usage(d_offsets);
+  bytes += MemKK::memory_usage(d_idx_funcs);
   bytes += MemKK::memory_usage(d_mus);
   bytes += MemKK::memory_usage(d_ns);
   bytes += MemKK::memory_usage(d_ls);
diff --git a/src/KOKKOS/pair_pace_kokkos.h b/src/KOKKOS/pair_pace_kokkos.h
index 39cfd100f8..e22c61f0ea 100644
--- a/src/KOKKOS/pair_pace_kokkos.h
+++ b/src/KOKKOS/pair_pace_kokkos.h
@@ -36,7 +36,6 @@ class PairPACEKokkos : public PairPACE {
  public:
   struct TagPairPACEComputeNeigh{};
   struct TagPairPACEComputeRadial{};
-  struct TagPairPACEComputeYlm{};
   struct TagPairPACEComputeAi{};
   struct TagPairPACEConjugateAi{};
   struct TagPairPACEComputeRho{};
@@ -66,9 +65,6 @@ class PairPACEKokkos : public PairPACE {
   KOKKOS_INLINE_FUNCTION
   void operator() (TagPairPACEComputeRadial,const typename Kokkos::TeamPolicy<DeviceType, TagPairPACEComputeRadial>::member_type& team) const;
 
-  KOKKOS_INLINE_FUNCTION
-  void operator() (TagPairPACEComputeYlm,const typename Kokkos::TeamPolicy<DeviceType, TagPairPACEComputeYlm>::member_type& team) const;
-
   KOKKOS_INLINE_FUNCTION
   void operator() (TagPairPACEComputeAi,const typename Kokkos::TeamPolicy<DeviceType, TagPairPACEComputeAi>::member_type& team) const;
 
@@ -96,7 +92,7 @@ class PairPACEKokkos : public PairPACE {
   void operator() (TagPairPACEComputeForce<NEIGHFLAG,EVFLAG>,const int& ii, EV_FLOAT&) const;
 
  protected:
-  int inum, maxneigh, chunk_size, chunk_offset, idx_rho_max;
+  int inum, maxneigh, chunk_size, chunk_offset, idx_ms_combs_max, idx_sph_max;
   int host_flag;
 
   int eflag, vflag;
@@ -121,6 +117,7 @@ class PairPACEKokkos : public PairPACE {
   tdual_fparams k_cutsq, k_scale;
   typedef Kokkos::View<F_FLOAT**, DeviceType> t_fparams;
   t_fparams d_cutsq, d_scale;
+  t_fparams d_cut_in, d_dcut_in; // inner cutoff
 
   typename AT::t_int_1d d_map;
 
@@ -156,12 +153,6 @@ class PairPACEKokkos : public PairPACE {
       const F_FLOAT &fx, const F_FLOAT &fy, const F_FLOAT &fz,
       const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const;
 
-  KOKKOS_INLINE_FUNCTION
-  void compute_barplm(int, int, double, int) const;
-
-  KOKKOS_INLINE_FUNCTION
-  void compute_ylm(int, int, double, double, double, int) const;
-
   KOKKOS_INLINE_FUNCTION
   void cutoff_func_poly(const double, const double, const double, double &, double &) const;
 
@@ -193,14 +184,18 @@ class PairPACEKokkos : public PairPACE {
 
   typedef Kokkos::View<int*, DeviceType> t_ace_1i;
   typedef Kokkos::View<int**, DeviceType> t_ace_2i;
+  typedef Kokkos::View<int**, Kokkos::LayoutRight, DeviceType> t_ace_2i_lr;
   typedef Kokkos::View<int***, DeviceType> t_ace_3i;
+  typedef Kokkos::View<int***, Kokkos::LayoutRight, DeviceType> t_ace_3i_lr;
   typedef Kokkos::View<int****, DeviceType> t_ace_4i;
   typedef Kokkos::View<double*, DeviceType> t_ace_1d;
   typedef Kokkos::View<double**, DeviceType> t_ace_2d;
+  typedef Kokkos::View<double**, Kokkos::LayoutRight, DeviceType> t_ace_2d_lr;
   typedef Kokkos::View<double*[3], DeviceType> t_ace_2d3;
   typedef Kokkos::View<double***, DeviceType> t_ace_3d;
   typedef Kokkos::View<double**[3], DeviceType> t_ace_3d3;
   typedef Kokkos::View<double**[4], DeviceType> t_ace_3d4;
+  typedef Kokkos::View<double**[4], Kokkos::LayoutRight, DeviceType> t_ace_3d4_lr;
   typedef Kokkos::View<double****, DeviceType> t_ace_4d;
   typedef Kokkos::View<complex*, DeviceType> t_ace_1c;
   typedef Kokkos::View<complex**, DeviceType> t_ace_2c;
@@ -209,6 +204,8 @@ class PairPACEKokkos : public PairPACE {
   typedef Kokkos::View<complex****, DeviceType> t_ace_4c;
   typedef Kokkos::View<complex***[3], DeviceType> t_ace_4c3;
 
+  typedef typename Kokkos::View<double*, DeviceType>::HostMirror th_ace_1d;
+
   t_ace_3d A_rank1;
   t_ace_4c A;
 
@@ -222,12 +219,16 @@ class PairPACEKokkos : public PairPACE {
   t_ace_2d rhos;
   t_ace_2d dF_drho;
 
+  t_ace_3c dB_flatten;
+
   // hard-core repulsion
   t_ace_1d rho_core;
-  t_ace_3c dB_flatten;
   t_ace_2d cr;
   t_ace_2d dcr;
   t_ace_1d dF_drho_core;
+  t_ace_1d dF_dfcut;
+  t_ace_1d d_corerep;
+  th_ace_1d h_corerep;
 
   // radial functions
   t_ace_4d fr;
@@ -241,23 +242,13 @@ class PairPACEKokkos : public PairPACE {
 
   void pre_compute_harmonics(int);
 
-  KOKKOS_INLINE_FUNCTION
-  void compute_barplm(double rz, int lmaxi);
-
-  KOKKOS_INLINE_FUNCTION
-  void compute_ylm(double rx, double ry, double rz, int lmaxi);
-
+  t_ace_4c A_sph;
+  t_ace_1d d_idx_sph;
   t_ace_1d alm;
   t_ace_1d blm;
   t_ace_1d cl;
   t_ace_1d dl;
 
-  t_ace_3d plm;
-  t_ace_3d dplm;
-
-  t_ace_3c ylm;
-  t_ace_4c3 dylm;
-
   // short neigh list
   t_ace_1i d_ncount;
   t_ace_2d d_mu;
@@ -265,24 +256,29 @@ class PairPACEKokkos : public PairPACE {
   t_ace_3d3 d_rhats;
   t_ace_2i d_nearest;
 
+  // for ZBL core-rep implementation
+  t_ace_1d  d_d_min; // [i] -> min-d for atom ii, d=d = r - (cut_in(mu_i, mu_j) - dcut_in(mu_i, mu_j))
+  t_ace_1i  d_jj_min; // [i] -> jj-index of nearest neigh (by r-(cut_in-dcut_in) criterion)
+  bool is_zbl;
+
   // per-type
   t_ace_1i d_ndensity;
   t_ace_1i d_npoti;
   t_ace_1d d_rho_core_cutoff;
   t_ace_1d d_drho_core_cutoff;
   t_ace_1d d_E0vals;
-  t_ace_2d d_wpre;
-  t_ace_2d d_mexp;
+  t_ace_2d_lr d_wpre;
+  t_ace_2d_lr d_mexp;
 
   // tilde
-  t_ace_1i d_idx_rho_count;
-  t_ace_2i d_rank;
-  t_ace_2i d_num_ms_combs;
-  t_ace_2i d_offsets;
-  t_ace_3i d_mus;
-  t_ace_3i d_ns;
-  t_ace_3i d_ls;
-  t_ace_3i d_ms_combs;
+  t_ace_1i d_idx_ms_combs_count;
+  t_ace_2i_lr d_rank;
+  t_ace_2i_lr d_num_ms_combs;
+  t_ace_2i_lr d_idx_funcs;
+  t_ace_3i_lr d_mus;
+  t_ace_3i_lr d_ns;
+  t_ace_3i_lr d_ls;
+  t_ace_3i_lr d_ms_combs;
   t_ace_3d d_ctildes;
 
   t_ace_3d3 f_ij;
@@ -292,12 +288,12 @@ class PairPACEKokkos : public PairPACE {
     int ntot, nlut, num_of_functions;
     double cutoff, deltaSplineBins, invrscalelookup, rscalelookup;
 
-    t_ace_3d4 lookupTable;
+    t_ace_3d4_lr lookupTable;
 
     void operator=(const SplineInterpolator &spline);
 
     void deallocate() {
-      lookupTable = t_ace_3d4();
+      lookupTable = t_ace_3d4_lr();
     }
 
     double memory_usage() {
diff --git a/src/KOKKOS/pair_reaxff_kokkos.cpp b/src/KOKKOS/pair_reaxff_kokkos.cpp
index c7d54b80cd..505681acb3 100644
--- a/src/KOKKOS/pair_reaxff_kokkos.cpp
+++ b/src/KOKKOS/pair_reaxff_kokkos.cpp
@@ -1598,7 +1598,6 @@ void PairReaxFFKokkos<DeviceType>::operator()(TagPairReaxBuildListsHalfBlocking<
   F_FLOAT dDeltap_self_i[3] = {0.0,0.0,0.0};
   F_FLOAT total_bo_i = 0.0;
 
-  int j_index,i_index;
   d_bo_first[i] = i*maxbo;
   const int bo_first_i = d_bo_first[i];
 
@@ -1675,7 +1674,7 @@ void PairReaxFFKokkos<DeviceType>::operator()(TagPairReaxBuildListsHalfBlocking<
 
       int ii_index = -1;
       int jj_index = -1;
-      if (build_bo_list<NEIGHFLAG>(bo_first_i, i, j, i_index, j_index, ii_index, jj_index)) {
+      if (build_bo_list<NEIGHFLAG>(bo_first_i, i, j, ii_index, jj_index)) {
 
         // from BondOrder1
 
@@ -1743,7 +1742,6 @@ void PairReaxFFKokkos<DeviceType>::operator()(TagPairReaxBuildListsHalfBlockingP
 
   F_FLOAT C12, C34, C56, BO_s, BO_pi, BO_pi2, BO, delij[3];
 
-  int j_index,i_index;
   d_bo_first[i] = i*maxbo;
   const int bo_first_i = d_bo_first[i];
 
@@ -1821,7 +1819,7 @@ void PairReaxFFKokkos<DeviceType>::operator()(TagPairReaxBuildListsHalfBlockingP
 
       int ii_index = -1;
       int jj_index = -1;
-      build_bo_list<NEIGHFLAG>(bo_first_i, i, j, i_index, j_index, ii_index, jj_index);
+      build_bo_list<NEIGHFLAG>(bo_first_i, i, j, ii_index, jj_index);
     }
   }
 }
@@ -1842,7 +1840,6 @@ void PairReaxFFKokkos<DeviceType>::operator()(TagPairReaxBuildListsHalfPreview<N
 
   F_FLOAT C12, C34, C56, BO_s, BO_pi, BO_pi2, BO, delij[3];
 
-  int j_index,i_index;
   d_bo_first[i] = i*maxbo;
   const int bo_first_i = d_bo_first[i];
 
@@ -1891,7 +1888,7 @@ void PairReaxFFKokkos<DeviceType>::operator()(TagPairReaxBuildListsHalfPreview<N
     int ii_index = -1;
     int jj_index = -1;
 
-    build_bo_list<NEIGHFLAG>(bo_first_i, i, j, i_index, j_index, ii_index, jj_index);
+    build_bo_list<NEIGHFLAG>(bo_first_i, i, j, ii_index, jj_index);
   }
 }
 
@@ -1942,7 +1939,8 @@ void PairReaxFFKokkos<DeviceType>::build_hb_list(F_FLOAT rsq, int i, int hb_firs
 template<class DeviceType>
 template<int NEIGHFLAG>
 KOKKOS_INLINE_FUNCTION
-bool PairReaxFFKokkos<DeviceType>::build_bo_list(int bo_first_i, int i, int j, int i_index, int j_index, int& ii_index, int& jj_index) const {
+bool PairReaxFFKokkos<DeviceType>::build_bo_list(int bo_first_i, int i, int j, int& ii_index, int& jj_index) const {
+   int i_index, j_index;
 
   if (NEIGHFLAG == HALF) {
     j_index = bo_first_i + d_bo_num[i];
@@ -2509,8 +2507,6 @@ void PairReaxFFKokkos<DeviceType>::compute_angular_sbo(int i, int itype, int j_s
   F_FLOAT prod_SBO = 1.0;
 
   for (int jj = j_start; jj < j_end; jj++) {
-    int j = d_bo_list[jj];
-    j &= NEIGHMASK;
     const int j_index = jj - j_start;
     const F_FLOAT bo_ij = d_BO(i,j_index);
 
@@ -2919,8 +2915,6 @@ void PairReaxFFKokkos<DeviceType>::operator()(TagPairReaxComputeAngularPreproces
   a_CdDelta[k] += CEcoa5;
 
   for (int ll = j_start; ll < j_end; ll++) {
-    int l = d_bo_list[ll];
-    l &= NEIGHMASK;
     const int l_index = ll - j_start;
 
     temp_bo_jt = d_BO(i,l_index);
@@ -4162,22 +4156,23 @@ double PairReaxFFKokkos<DeviceType>::memory_usage()
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
-void PairReaxFFKokkos<DeviceType>::FindBond(int &numbonds)
+void PairReaxFFKokkos<DeviceType>::FindBond(int &numbonds, int groupbit)
 {
   copymode = 1;
   Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairReaxFindBondZero>(0,nmax),*this);
 
   bo_cut_bond = api->control->bg_cut;
 
-  atomKK->sync(execution_space,TAG_MASK);
+  atomKK->sync(execution_space,TAG_MASK|MASK_MASK);
   tag = atomKK->k_tag.view<DeviceType>();
+  mask = atomKK->k_mask.view<DeviceType>();
 
   const int inum = list->inum;
   NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
   d_ilist = k_list->d_ilist;
 
   numbonds = 0;
-  PairReaxKokkosFindBondFunctor<DeviceType> find_bond_functor(this);
+  PairReaxKokkosFindBondFunctor<DeviceType> find_bond_functor(this, groupbit);
   Kokkos::parallel_reduce(inum,find_bond_functor,numbonds);
   copymode = 0;
 }
@@ -4194,24 +4189,28 @@ void PairReaxFFKokkos<DeviceType>::operator()(TagPairReaxFindBondZero, const int
 
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
-void PairReaxFFKokkos<DeviceType>::calculate_find_bond_item(int ii, int &numbonds) const
+void PairReaxFFKokkos<DeviceType>::calculate_find_bond_item(int ii, int &numbonds, int groupbit) const
 {
   const int i = d_ilist[ii];
   int nj = 0;
 
-  const int j_start = d_bo_first[i];
-  const int j_end = j_start + d_bo_num[i];
-  for (int jj = j_start; jj < j_end; jj++) {
-    int j = d_bo_list[jj];
-    j &= NEIGHMASK;
-    const tagint jtag = tag[j];
-    const int j_index = jj - j_start;
-    double bo_tmp = d_BO(i,j_index);
+  if (mask[i] & groupbit) {
+    const int j_start = d_bo_first[i];
+    const int j_end = j_start + d_bo_num[i];
+    for (int jj = j_start; jj < j_end; jj++) {
+      int j = d_bo_list[jj];
+      j &= NEIGHMASK;
+      if (mask[j] & groupbit) {
+        const tagint jtag = tag[j];
+        const int j_index = jj - j_start;
+        double bo_tmp = d_BO(i,j_index);
 
-    if (bo_tmp > bo_cut_bond) {
-      d_neighid(i,nj) = jtag;
-      d_abo(i,nj) = bo_tmp;
-      nj++;
+        if (bo_tmp > bo_cut_bond) {
+          d_neighid(i,nj) = jtag;
+          d_abo(i,nj) = bo_tmp;
+          nj++;
+        }
+      }
     }
   }
   d_numneigh_bonds[i] = nj;
@@ -4247,6 +4246,36 @@ void PairReaxFFKokkos<DeviceType>::PackBondBuffer(DAT::tdual_ffloat_1d k_buf, in
   nbuf_local = k_nbuf_local.h_view();
 }
 
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void PairReaxFFKokkos<DeviceType>::PackReducedBondBuffer(DAT::tdual_ffloat_1d k_buf, int &nbuf_local, bool store_bonds)
+{
+  d_buf = k_buf.view<DeviceType>();
+  k_params_sing.template sync<DeviceType>();
+
+  copymode = 1;
+  nlocal = atomKK->nlocal;
+  if (store_bonds) {
+    PairReaxKokkosPackReducedBondBufferFunctor<DeviceType, true> pack_bond_buffer_functor(this);
+    Kokkos::parallel_scan(nlocal,pack_bond_buffer_functor);
+  } else {
+    PairReaxKokkosPackReducedBondBufferFunctor<DeviceType, false> pack_bond_buffer_functor(this);
+    Kokkos::parallel_scan(nlocal,pack_bond_buffer_functor);
+  }
+
+  copymode = 0;
+
+  k_buf.modify<DeviceType>();
+  k_nbuf_local.modify<DeviceType>();
+
+  k_buf.sync<LMPHostType>();
+  k_nbuf_local.sync<LMPHostType>();
+  nbuf_local = k_nbuf_local.h_view();
+}
+
+/* ---------------------------------------------------------------------- */
+
 template<class DeviceType>
 KOKKOS_INLINE_FUNCTION
 void PairReaxFFKokkos<DeviceType>::pack_bond_buffer_item(int i, int &j, const bool &final) const
@@ -4288,6 +4317,42 @@ void PairReaxFFKokkos<DeviceType>::pack_bond_buffer_item(int i, int &j, const bo
     k_nbuf_local.view<DeviceType>()() = j - 1;
 }
 
+template<class DeviceType>
+template<bool STORE_BONDS>
+KOKKOS_INLINE_FUNCTION
+void PairReaxFFKokkos<DeviceType>::pack_reduced_bond_buffer_item(int i, int &j, const bool &final) const
+{
+  const int numbonds = d_numneigh_bonds[i];
+  if (final) {
+    d_buf[j] = d_total_bo[i];
+    d_buf[j+1] = paramssing(type[i]).nlp_opt - d_Delta_lp[i];
+    d_buf[j+2] = numbonds;
+  }
+
+  j += 3;
+
+  if constexpr(STORE_BONDS) {
+    if (final) {
+      for (int k = 0; k < numbonds; ++k) {
+        d_buf[j+k] = d_neighid(i,k);
+      }
+    }
+
+    j += numbonds;
+
+    if (final) {
+      for (int k = 0; k < numbonds; k++) {
+        d_buf[j+k] = d_abo(i,k);
+      }
+    }
+
+    j += numbonds;
+  }
+
+  if (final && i == nlocal-1)
+    k_nbuf_local.view<DeviceType>()() = j - 1;
+}
+
 /* ---------------------------------------------------------------------- */
 
 template<class DeviceType>
diff --git a/src/KOKKOS/pair_reaxff_kokkos.h b/src/KOKKOS/pair_reaxff_kokkos.h
index 421d704d03..5f228ebd19 100644
--- a/src/KOKKOS/pair_reaxff_kokkos.h
+++ b/src/KOKKOS/pair_reaxff_kokkos.h
@@ -130,8 +130,9 @@ class PairReaxFFKokkos : public PairReaxFF {
   void compute(int, int);
   void init_style();
   double memory_usage();
-  void FindBond(int &);
+  void FindBond(int &, int groupbit = 1);
   void PackBondBuffer(DAT::tdual_ffloat_1d, int &);
+  void PackReducedBondBuffer(DAT::tdual_ffloat_1d, int &, bool);
   void FindBondSpecies();
 
   template<int NEIGHFLAG>
@@ -184,7 +185,7 @@ class PairReaxFFKokkos : public PairReaxFF {
   // Returns if we need to populate d_d* functions or not
   template<int NEIGHFLAG>
   KOKKOS_INLINE_FUNCTION
-  bool build_bo_list(int, int, int, int, int, int&, int&) const;
+  bool build_bo_list(int, int, int, int&, int&) const;
 
   KOKKOS_INLINE_FUNCTION
   void operator()(TagPairReaxBuildListsFull, const int&) const;
@@ -284,11 +285,15 @@ class PairReaxFFKokkos : public PairReaxFF {
   void operator()(TagPairReaxFindBondZero, const int&) const;
 
   KOKKOS_INLINE_FUNCTION
-  void calculate_find_bond_item(int, int&) const;
+  void calculate_find_bond_item(int, int&, int) const;
 
   KOKKOS_INLINE_FUNCTION
   void pack_bond_buffer_item(int, int&, const bool&) const;
 
+  template<bool STORE_BONDS>
+  KOKKOS_INLINE_FUNCTION
+  void pack_reduced_bond_buffer_item(int, int&, const bool&) const;
+
   KOKKOS_INLINE_FUNCTION
   void operator()(TagPairReaxFindBondSpeciesZero, const int&) const;
 
@@ -409,6 +414,7 @@ class PairReaxFFKokkos : public PairReaxFF {
   typename AT::t_f_array f;
   typename AT::t_int_1d_randomread type;
   typename AT::t_tagint_1d_randomread tag;
+  typename AT::t_int_1d_randomread mask;
   typename AT::t_float_1d_randomread q;
   typename AT::t_tagint_1d_randomread molecule;
 
@@ -518,8 +524,9 @@ template <class DeviceType>
 struct PairReaxKokkosFindBondFunctor  {
   typedef DeviceType device_type;
   typedef int value_type;
+  int groupbit;
   PairReaxFFKokkos<DeviceType> c;
-  PairReaxKokkosFindBondFunctor(PairReaxFFKokkos<DeviceType>* c_ptr):c(*c_ptr) {};
+  PairReaxKokkosFindBondFunctor(PairReaxFFKokkos<DeviceType>* c_ptr, int groupbit):groupbit(groupbit),c(*c_ptr){};
 
   KOKKOS_INLINE_FUNCTION
   void join(int &dst,
@@ -529,7 +536,7 @@ struct PairReaxKokkosFindBondFunctor  {
 
   KOKKOS_INLINE_FUNCTION
   void operator()(const int ii, int &numbonds) const {
-    c.calculate_find_bond_item(ii,numbonds);
+    c.calculate_find_bond_item(ii,numbonds,groupbit);
   }
 };
 
@@ -546,6 +553,19 @@ struct PairReaxKokkosPackBondBufferFunctor  {
   }
 };
 
+template <class DeviceType, bool STORE_BONDS>
+struct PairReaxKokkosPackReducedBondBufferFunctor  {
+  typedef DeviceType device_type;
+  typedef int value_type;
+  PairReaxFFKokkos<DeviceType> c;
+  PairReaxKokkosPackReducedBondBufferFunctor(PairReaxFFKokkos<DeviceType>* c_ptr):c(*c_ptr) {};
+
+  KOKKOS_INLINE_FUNCTION
+  void operator()(const int ii, int &j, const bool &final) const {
+    c.template pack_reduced_bond_buffer_item<STORE_BONDS>(ii,j,final);
+  }
+};
+
 }
 
 #endif
diff --git a/src/KOKKOS/pair_sw_kokkos.cpp b/src/KOKKOS/pair_sw_kokkos.cpp
index 93b3919795..294b451e7e 100644
--- a/src/KOKKOS/pair_sw_kokkos.cpp
+++ b/src/KOKKOS/pair_sw_kokkos.cpp
@@ -37,8 +37,8 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define MAXLINE 1024
-#define DELTA 4
+static constexpr int MAXLINE = 1024;
+static constexpr int DELTA = 4;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/KOKKOS/pair_vashishta_kokkos.cpp b/src/KOKKOS/pair_vashishta_kokkos.cpp
index 8400807ec6..d2eb3036d6 100644
--- a/src/KOKKOS/pair_vashishta_kokkos.cpp
+++ b/src/KOKKOS/pair_vashishta_kokkos.cpp
@@ -36,8 +36,8 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define MAXLINE 1024
-#define DELTA 4
+static constexpr int MAXLINE = 1024;
+static constexpr int DELTA = 4;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp
index 912ae36f6f..4a253c5779 100644
--- a/src/KOKKOS/pppm_kokkos.cpp
+++ b/src/KOKKOS/pppm_kokkos.cpp
@@ -39,22 +39,17 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecialKokkos;
 
-#define MAXORDER 7
-#define OFFSET 16384
-#define LARGE 10000.0
-#define SMALL 0.00001
-#define EPS_HOC 1.0e-7
+static constexpr int MAXORDER = 7;
+static constexpr int OFFSET = 16384;
+static constexpr double LARGE = 10000.0;
+static constexpr double SMALL = 0.00001;
+static constexpr double EPS_HOC = 1.0e-7;
 
 enum{REVERSE_RHO};
 enum{FORWARD_IK,FORWARD_IK_PERATOM};
 
-#ifdef FFT_SINGLE
-#define ZEROF 0.0f
-#define ONEF  1.0f
-#else
-#define ZEROF 0.0
-#define ONEF  1.0
-#endif
+static constexpr FFT_SCALAR ZEROF = 0.0;
+static constexpr FFT_SCALAR ONEF =  1.0;
 
 /* ---------------------------------------------------------------------- */
 
@@ -1371,8 +1366,6 @@ void PPPMKokkos<DeviceType>::operator()(TagPPPM_brick2fft, const int &ii) const
 template<class DeviceType>
 void PPPMKokkos<DeviceType>::poisson_ik()
 {
-  int j;
-
   // transform charge density (r -> k)
 
   copymode = 1;
@@ -1383,7 +1376,8 @@ void PPPMKokkos<DeviceType>::poisson_ik()
 
   // global energy and virial contribution
 
-  scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
+  bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm;
+  scaleinv = 1.0/ngridtotal;
   s2 = scaleinv*scaleinv;
 
   if (eflag_global || vflag_global) {
@@ -1392,7 +1386,7 @@ void PPPMKokkos<DeviceType>::poisson_ik()
       copymode = 1;
       Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik2>(0,nfft),*this,ev);
       copymode = 0;
-      for (j = 0; j < 6; j++) virial[j] += ev.v[j];
+      for (int j = 0; j < 6; j++) virial[j] += ev.v[j];
       energy += ev.ecoul;
     } else {
       copymode = 1;
diff --git a/src/KOKKOS/region_block_kokkos.cpp b/src/KOKKOS/region_block_kokkos.cpp
index c53fae7b03..6d53514d19 100644
--- a/src/KOKKOS/region_block_kokkos.cpp
+++ b/src/KOKKOS/region_block_kokkos.cpp
@@ -18,7 +18,7 @@
 
 using namespace LAMMPS_NS;
 
-#define BIG 1.0e20
+static constexpr double BIG = 1.0e20;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/KOKKOS/third_order_kokkos.cpp b/src/KOKKOS/third_order_kokkos.cpp
index 6208aa966a..04c467777f 100644
--- a/src/KOKKOS/third_order_kokkos.cpp
+++ b/src/KOKKOS/third_order_kokkos.cpp
@@ -174,72 +174,45 @@ void ThirdOrderKokkos::update_force()
   }
 
   bool execute_on_host = false;
-  unsigned int datamask_read_device = 0;
-  unsigned int datamask_modify_device = 0;
   unsigned int datamask_read_host = 0;
 
   if (pair_compute_flag) {
     if (force->pair->execution_space==Host) {
       execute_on_host  = true;
       datamask_read_host   |= force->pair->datamask_read;
-      datamask_modify_device |= force->pair->datamask_modify;
-    } else {
-      datamask_read_device   |= force->pair->datamask_read;
-      datamask_modify_device |= force->pair->datamask_modify;
     }
   }
   if (atomKK->molecular && force->bond)  {
     if (force->bond->execution_space==Host) {
       execute_on_host  = true;
       datamask_read_host   |= force->bond->datamask_read;
-      datamask_modify_device |= force->bond->datamask_modify;
-    } else {
-      datamask_read_device   |= force->bond->datamask_read;
-      datamask_modify_device |= force->bond->datamask_modify;
     }
   }
   if (atomKK->molecular && force->angle) {
     if (force->angle->execution_space==Host) {
       execute_on_host  = true;
       datamask_read_host   |= force->angle->datamask_read;
-      datamask_modify_device |= force->angle->datamask_modify;
-    } else {
-      datamask_read_device   |= force->angle->datamask_read;
-      datamask_modify_device |= force->angle->datamask_modify;
     }
   }
   if (atomKK->molecular && force->dihedral) {
     if (force->dihedral->execution_space==Host) {
       execute_on_host  = true;
       datamask_read_host   |= force->dihedral->datamask_read;
-      datamask_modify_device |= force->dihedral->datamask_modify;
-    } else {
-      datamask_read_device   |= force->dihedral->datamask_read;
-      datamask_modify_device |= force->dihedral->datamask_modify;
     }
   }
   if (atomKK->molecular && force->improper) {
     if (force->improper->execution_space==Host) {
       execute_on_host  = true;
       datamask_read_host   |= force->improper->datamask_read;
-      datamask_modify_device |= force->improper->datamask_modify;
-    } else {
-      datamask_read_device   |= force->improper->datamask_read;
-      datamask_modify_device |= force->improper->datamask_modify;
     }
   }
   if (kspace_compute_flag) {
     if (force->kspace->execution_space==Host) {
       execute_on_host  = true;
       datamask_read_host   |= force->kspace->datamask_read;
-      datamask_modify_device |= force->kspace->datamask_modify;
-    } else {
-      datamask_read_device   |= force->kspace->datamask_read;
-      datamask_modify_device |= force->kspace->datamask_modify;
     }
   }
 
-
   if (pair_compute_flag) {
     atomKK->sync(force->pair->execution_space,force->pair->datamask_read);
     atomKK->sync(force->pair->execution_space,~(~force->pair->datamask_read|(F_MASK | ENERGY_MASK | VIRIAL_MASK)));
diff --git a/src/KOKKOS/transpose_helper_kokkos.h b/src/KOKKOS/transpose_helper_kokkos.h
index e3a4d86f9a..06af0aea91 100644
--- a/src/KOKKOS/transpose_helper_kokkos.h
+++ b/src/KOKKOS/transpose_helper_kokkos.h
@@ -125,8 +125,7 @@ struct TransposeHelperKokkos {
     elem[0] = extent_tile_id[0] * tile_size;
     elem[1] = extent_tile_id[1] * tile_size;
 
-    if (elem[0] >= d_dst.extent(0) ||
-      elem[1] >= d_dst.extent(1)) return;
+    if ((elem[0] >= (int)d_dst.extent(0)) || (elem[1] >= (int)d_dst.extent(1))) return;
 
     // determine if a row/column is a full `tile_size` in size or not
     bool perfect_pad[2];
@@ -135,35 +134,30 @@ struct TransposeHelperKokkos {
 
     // load phase
     if (src_is_layout_right) {
-      Kokkos::parallel_for(Kokkos::ThreadVectorRange(team_member, tile_size),
-        [&] (const int j) {
-
-        if (elem[1] + j < d_src.extent(1)) {
-          if (perfect_pad[0]) {
-            for (int i = 0; i < tile_size; i++)
-              buffer[i * (tile_size + bank_pad) + j] = d_src(elem[0] + i, elem[1] + j);
-          } else {
-            for (int i = 0; i < (d_src.extent(0) - elem[0]); i++)
-              buffer[i * (tile_size + bank_pad) + j] = d_src(elem[0] + i, elem[1] + j);
+      Kokkos::parallel_for(Kokkos::ThreadVectorRange(team_member, tile_size), [&] (const int j) {
+          if (elem[1] + j < (int)d_src.extent(1)) {
+            if (perfect_pad[0]) {
+              for (int i = 0; i < tile_size; i++)
+                buffer[i * (tile_size + bank_pad) + j] = d_src(elem[0] + i, elem[1] + j);
+            } else {
+              for (int i = 0; i < ((int)d_src.extent(0) - elem[0]); i++)
+                buffer[i * (tile_size + bank_pad) + j] = d_src(elem[0] + i, elem[1] + j);
+            }
           }
-        }
-      });
-
+        });
     } else {
       // src is layout left
-      Kokkos::parallel_for(Kokkos::ThreadVectorRange(team_member, tile_size),
-        [&] (const int i) {
-
-        if (elem[0] + i < d_src.extent(0)) {
-          if (perfect_pad[1]) {
-            for (int j = 0; j < tile_size; j++)
-              buffer[i * (tile_size + bank_pad) + j] = d_src(elem[0] + i, elem[1] + j);
-          } else {
-            for (int j = 0; j < (d_src.extent(1) - elem[1]); j++)
-              buffer[i * (tile_size + bank_pad) + j] = d_src(elem[0] + i, elem[1] + j);
+      Kokkos::parallel_for(Kokkos::ThreadVectorRange(team_member, tile_size), [&] (const int i) {
+          if (elem[0] + i < (int)d_src.extent(0)) {
+            if (perfect_pad[1]) {
+              for (int j = 0; j < tile_size; j++)
+                buffer[i * (tile_size + bank_pad) + j] = d_src(elem[0] + i, elem[1] + j);
+            } else {
+              for (int j = 0; j < ((int)d_src.extent(1) - elem[1]); j++)
+                buffer[i * (tile_size + bank_pad) + j] = d_src(elem[0] + i, elem[1] + j);
+            }
           }
-        }
-      });
+        });
     }
 
     // No need for an extra sync b/c there is an implicit sync at the end
@@ -171,37 +165,31 @@ struct TransposeHelperKokkos {
 
     // save phase
     if (src_is_layout_right) {
-      Kokkos::parallel_for(Kokkos::ThreadVectorRange(team_member, tile_size),
-        [&] (const int i) {
-
-        if (elem[0] + i < d_dst.extent(0)) {
-          if (perfect_pad[1]) {
-            for (int j = 0; j < tile_size; j++)
-              d_dst(elem[0] + i, elem[1] + j) = buffer[i * (tile_size + bank_pad) + j];
-          } else {
-            for (int j = 0; j < (d_dst.extent(1) - elem[1]); j++)
-              d_dst(elem[0] + i, elem[1] + j) = buffer[i * (tile_size + bank_pad) + j];
+      Kokkos::parallel_for(Kokkos::ThreadVectorRange(team_member, tile_size), [&] (const int i) {
+          if (elem[0] + i < (int)d_dst.extent(0)) {
+            if (perfect_pad[1]) {
+              for (int j = 0; j < tile_size; j++)
+                d_dst(elem[0] + i, elem[1] + j) = buffer[i * (tile_size + bank_pad) + j];
+            } else {
+              for (int j = 0; j < ((int)d_dst.extent(1) - elem[1]); j++)
+                d_dst(elem[0] + i, elem[1] + j) = buffer[i * (tile_size + bank_pad) + j];
+            }
           }
-        }
-      });
+        });
     } else {
-
       // src is layout left
-      Kokkos::parallel_for(Kokkos::ThreadVectorRange(team_member, tile_size),
-        [&] (const int j) {
-
-        if (elem[1] + j < d_dst.extent(1)) {
-          if (perfect_pad[0]) {
-            for (int i = 0; i < tile_size; i++)
-              d_dst(elem[0] + i, elem[1] + j) = buffer[i * (tile_size + bank_pad) + j];
-          } else {
-            for (int i = 0; i < (d_dst.extent(0) - elem[0]); i++)
-              d_dst(elem[0] + i, elem[1] + j) = buffer[i * (tile_size + bank_pad) + j];
+      Kokkos::parallel_for(Kokkos::ThreadVectorRange(team_member, tile_size), [&] (const int j) {
+          if (elem[1] + j < (int)d_dst.extent(1)) {
+            if (perfect_pad[0]) {
+              for (int i = 0; i < tile_size; i++)
+                d_dst(elem[0] + i, elem[1] + j) = buffer[i * (tile_size + bank_pad) + j];
+            } else {
+              for (int i = 0; i < ((int)d_dst.extent(0) - elem[0]); i++)
+                d_dst(elem[0] + i, elem[1] + j) = buffer[i * (tile_size + bank_pad) + j];
+            }
           }
-        }
-      });
+        });
     }
-
   }
 };
 
diff --git a/src/KSPACE/ewald.cpp b/src/KSPACE/ewald.cpp
index 93470c60ac..930cc68ba9 100644
--- a/src/KSPACE/ewald.cpp
+++ b/src/KSPACE/ewald.cpp
@@ -35,7 +35,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define SMALL 0.00001
+static constexpr double SMALL = 0.00001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/KSPACE/ewald_dipole.cpp b/src/KSPACE/ewald_dipole.cpp
index 7a3a1da8ff..e4982b1d56 100644
--- a/src/KSPACE/ewald_dipole.cpp
+++ b/src/KSPACE/ewald_dipole.cpp
@@ -36,7 +36,7 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
-#define SMALL 0.00001
+static constexpr double SMALL = 0.00001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/KSPACE/ewald_dipole_spin.cpp b/src/KSPACE/ewald_dipole_spin.cpp
index c679e164f7..93821db9ba 100644
--- a/src/KSPACE/ewald_dipole_spin.cpp
+++ b/src/KSPACE/ewald_dipole_spin.cpp
@@ -34,7 +34,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define SMALL 0.00001
+static constexpr double SMALL = 0.00001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/KSPACE/ewald_disp.cpp b/src/KSPACE/ewald_disp.cpp
index 31149134ad..9c81b21448 100644
--- a/src/KSPACE/ewald_disp.cpp
+++ b/src/KSPACE/ewald_disp.cpp
@@ -38,7 +38,7 @@ using namespace MathConst;
 using namespace MathSpecial;
 using namespace MathExtra;
 
-#define SMALL 0.00001
+static constexpr double SMALL = 0.00001;
 
 //#define DEBUG
 
diff --git a/src/KSPACE/fft3d_wrap.cpp b/src/KSPACE/fft3d_wrap.cpp
index 478cf6fc9d..7b00543eea 100644
--- a/src/KSPACE/fft3d_wrap.cpp
+++ b/src/KSPACE/fft3d_wrap.cpp
@@ -27,30 +27,66 @@ FFT3d::FFT3d(LAMMPS *lmp, MPI_Comm comm, int nfast, int nmid, int nslow,
              int out_klo, int out_khi,
              int scaled, int permute, int *nbuf, int usecollective) : Pointers(lmp)
 {
+  #ifndef FFT_HEFFTE
   plan = fft_3d_create_plan(comm,nfast,nmid,nslow,
                             in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi,
                             out_ilo,out_ihi,out_jlo,out_jhi,out_klo,out_khi,
                             scaled,permute,nbuf,usecollective);
   if (plan == nullptr) error->one(FLERR,"Could not create 3d FFT plan");
+  #else
+  heffte::plan_options options = heffte::default_options<heffte_backend>();
+  options.algorithm = (usecollective == 0) ?
+                          heffte::reshape_algorithm::p2p_plined
+                        : heffte::reshape_algorithm::alltoallv;
+  options.use_reorder = (permute != 0);
+  hscale = (scaled == 0) ? heffte::scale::none : heffte::scale::full;
+
+  heffte_plan = std::unique_ptr<heffte::fft3d<heffte_backend>>(
+        new heffte::fft3d<heffte_backend>(
+                heffte::box3d<>({in_ilo,in_jlo,in_klo}, {in_ihi, in_jhi, in_khi}),
+                heffte::box3d<>({out_ilo,out_jlo,out_klo}, {out_ihi, out_jhi, out_khi}),
+                comm, options)
+      );
+  *nbuf = heffte_plan->size_workspace();
+  heffte_workspace.resize(heffte_plan->size_workspace());
+  #endif
 }
 
 /* ---------------------------------------------------------------------- */
 
 FFT3d::~FFT3d()
 {
+  #ifndef FFT_HEFFTE
   fft_3d_destroy_plan(plan);
+  #endif
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FFT3d::compute(FFT_SCALAR *in, FFT_SCALAR *out, int flag)
 {
+  #ifndef FFT_HEFFTE
   fft_3d((FFT_DATA *) in,(FFT_DATA *) out,flag,plan);
+  #else
+  if (flag == 1)
+      heffte_plan->forward(reinterpret_cast<std::complex<FFT_SCALAR>*>(in),
+                           reinterpret_cast<std::complex<FFT_SCALAR>*>(out),
+                           reinterpret_cast<std::complex<FFT_SCALAR>*>(heffte_workspace.data())
+                          );
+  else
+      heffte_plan->backward(reinterpret_cast<std::complex<FFT_SCALAR>*>(in),
+                            reinterpret_cast<std::complex<FFT_SCALAR>*>(out),
+                            reinterpret_cast<std::complex<FFT_SCALAR>*>(heffte_workspace.data()),
+                            hscale
+                           );
+  #endif
 }
 
 /* ---------------------------------------------------------------------- */
 
 void FFT3d::timing1d(FFT_SCALAR *in, int nsize, int flag)
 {
+  #ifndef FFT_HEFFTE
   fft_1d_only((FFT_DATA *) in,nsize,flag,plan);
+  #endif
 }
diff --git a/src/KSPACE/fft3d_wrap.h b/src/KSPACE/fft3d_wrap.h
index f72cfd4622..04b828b7de 100644
--- a/src/KSPACE/fft3d_wrap.h
+++ b/src/KSPACE/fft3d_wrap.h
@@ -17,6 +17,19 @@
 #include "fft3d.h"    // IWYU pragma: export
 #include "pointers.h"
 
+#ifdef FFT_HEFFTE
+#include "heffte.h"
+// select the backend
+#if defined(FFT_HEFFTE_FFTW)
+using heffte_backend = heffte::backend::fftw;
+#elif defined(FFT_HEFFTE_MKL)
+using heffte_backend = heffte::backend::mkl;
+#else
+using heffte_backend = heffte::backend::stock;
+#endif
+
+#endif // FFT_HEFFTE
+
 namespace LAMMPS_NS {
 
 class FFT3d : protected Pointers {
@@ -30,7 +43,14 @@ class FFT3d : protected Pointers {
   void timing1d(FFT_SCALAR *, int, int);
 
  private:
+  #ifdef FFT_HEFFTE
+  // the heFFTe plan supersedes the internal fft_plan_3d
+  std::unique_ptr<heffte::fft3d<heffte_backend>> heffte_plan;
+  std::vector<std::complex<FFT_SCALAR>> heffte_workspace;
+  heffte::scale hscale;
+  #else
   struct fft_plan_3d *plan;
+  #endif
 };
 
 }    // namespace LAMMPS_NS
diff --git a/src/KSPACE/msm.cpp b/src/KSPACE/msm.cpp
index 041a5d5242..8f79ab408c 100644
--- a/src/KSPACE/msm.cpp
+++ b/src/KSPACE/msm.cpp
@@ -37,8 +37,8 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 
 #define MAX_LEVELS 10
-#define OFFSET 16384
-#define SMALL 0.00001
+static constexpr int OFFSET = 16384;
+static constexpr double SMALL = 0.00001;
 
 enum{REVERSE_RHO,REVERSE_AD,REVERSE_AD_PERATOM};
 enum{FORWARD_RHO,FORWARD_AD,FORWARD_AD_PERATOM};
diff --git a/src/KSPACE/msm_cg.cpp b/src/KSPACE/msm_cg.cpp
index 4a8daedf10..e680c05cb2 100644
--- a/src/KSPACE/msm_cg.cpp
+++ b/src/KSPACE/msm_cg.cpp
@@ -31,8 +31,8 @@
 
 using namespace LAMMPS_NS;
 
-#define OFFSET 16384
-#define SMALLQ 0.00001
+static constexpr int OFFSET = 16384;
+static constexpr double SMALLQ = 0.00001;
 
 enum{REVERSE_RHO,REVERSE_AD,REVERSE_AD_PERATOM};
 enum{FORWARD_RHO,FORWARD_AD,FORWARD_AD_PERATOM};
diff --git a/src/KSPACE/pair_coul_streitz.cpp b/src/KSPACE/pair_coul_streitz.cpp
index bd7fd20656..9e3811c027 100644
--- a/src/KSPACE/pair_coul_streitz.cpp
+++ b/src/KSPACE/pair_coul_streitz.cpp
@@ -35,9 +35,9 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define DELTA 4
-#define PGDELTA 1
-#define MAXNEIGH 24
+static constexpr int DELTA = 4;
+static constexpr int PGDELTA = 1;
+static constexpr int MAXNEIGH = 24;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp b/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp
index b7635c49c7..260c26e8aa 100644
--- a/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp
+++ b/src/KSPACE/pair_lj_charmmfsw_coul_long.cpp
@@ -76,6 +76,8 @@ PairLJCharmmfswCoulLong::PairLJCharmmfswCoulLong(LAMMPS *lmp) : Pair(lmp)
 
 PairLJCharmmfswCoulLong::~PairLJCharmmfswCoulLong()
 {
+  if (copymode) return;
+
   // switch qqr2e back from CHARMM value to LAMMPS value
 
   if (update && strcmp(update->unit_style,"real") == 0) {
@@ -85,8 +87,6 @@ PairLJCharmmfswCoulLong::~PairLJCharmmfswCoulLong()
     force->qqr2e = force->qqr2e_lammps_real;
   }
 
-  if (copymode) return;
-
   if (allocated) {
     memory->destroy(setflag);
     memory->destroy(cutsq);
diff --git a/src/KSPACE/pppm.cpp b/src/KSPACE/pppm.cpp
index 2f5b4fc670..0ac83c01fb 100644
--- a/src/KSPACE/pppm.cpp
+++ b/src/KSPACE/pppm.cpp
@@ -45,22 +45,17 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
-#define MAXORDER 7
-#define OFFSET 16384
-#define LARGE 10000.0
-#define SMALL 0.00001
-#define EPS_HOC 1.0e-7
+static constexpr int MAXORDER = 7;
+static constexpr int OFFSET = 16384;
+static constexpr double LARGE = 10000.0;
+static constexpr double SMALL = 0.00001;
+static constexpr double EPS_HOC = 1.0e-7;
 
 enum{REVERSE_RHO};
 enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM};
 
-#ifdef FFT_SINGLE
-#define ZEROF 0.0f
-#define ONEF  1.0f
-#else
-#define ZEROF 0.0
-#define ONEF  1.0
-#endif
+static constexpr FFT_SCALAR ZEROF = 0.0;
+static constexpr FFT_SCALAR ONEF =  1.0;
 
 /* ---------------------------------------------------------------------- */
 
@@ -1188,7 +1183,7 @@ double PPPM::compute_qopt()
   // each proc calculates contributions from every Pth grid point
 
   bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm;
-  int nxy_pppm = nx_pppm * ny_pppm;
+  bigint nxy_pppm = (bigint) nx_pppm * ny_pppm;
 
   double qopt = 0.0;
 
@@ -1944,7 +1939,8 @@ void PPPM::poisson_ik()
 
   // global energy and virial contribution
 
-  double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
+  bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm;
+  double scaleinv = 1.0/ngridtotal;
   double s2 = scaleinv*scaleinv;
 
   if (eflag_global || vflag_global) {
@@ -2145,7 +2141,8 @@ void PPPM::poisson_ad()
 
   // global energy and virial contribution
 
-  double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
+  bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm;
+  double scaleinv = 1.0/ngridtotal;
   double s2 = scaleinv*scaleinv;
 
   if (eflag_global || vflag_global) {
@@ -3259,7 +3256,8 @@ void PPPM::poisson_groups(int AA_flag)
   //  keep everything in reciprocal space so
   //  no inverse FFTs needed
 
-  double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
+  bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm;
+  double scaleinv = 1.0/ngridtotal;
   double s2 = scaleinv*scaleinv;
 
   // energy
diff --git a/src/KSPACE/pppm_cg.cpp b/src/KSPACE/pppm_cg.cpp
index 845abe0078..e5ae0b05c8 100644
--- a/src/KSPACE/pppm_cg.cpp
+++ b/src/KSPACE/pppm_cg.cpp
@@ -34,17 +34,13 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define OFFSET 16384
-#define SMALLQ 0.00001
+static constexpr int OFFSET = 16384;
+static constexpr double SMALLQ = 0.00001;
 
 enum{REVERSE_RHO};
 enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM};
 
-#ifdef FFT_SINGLE
-#define ZEROF 0.0f
-#else
-#define ZEROF 0.0
-#endif
+static constexpr FFT_SCALAR ZEROF = 0.0;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/KSPACE/pppm_dipole.cpp b/src/KSPACE/pppm_dipole.cpp
index a01ffea1dc..da64f85f9e 100644
--- a/src/KSPACE/pppm_dipole.cpp
+++ b/src/KSPACE/pppm_dipole.cpp
@@ -40,22 +40,17 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
-#define MAXORDER 7
-#define OFFSET 16384
-#define LARGE 10000.0
-#define SMALL 0.00001
-#define EPS_HOC 1.0e-7
+static constexpr int MAXORDER = 7;
+static constexpr int OFFSET = 16384;
+static constexpr double LARGE = 10000.0;
+static constexpr double SMALL = 0.00001;
+static constexpr double EPS_HOC = 1.0e-7;
 
 enum{REVERSE_MU};
 enum{FORWARD_MU,FORWARD_MU_PERATOM};
 
-#ifdef FFT_SINGLE
-#define ZEROF 0.0f
-#define ONEF  1.0f
-#else
-#define ZEROF 0.0
-#define ONEF  1.0
-#endif
+static constexpr FFT_SCALAR ZEROF = 0.0;
+static constexpr FFT_SCALAR ONEF =  1.0;
 
 /* ---------------------------------------------------------------------- */
 
@@ -1338,7 +1333,8 @@ void PPPMDipole::poisson_ik_dipole()
 
   // global energy and virial contribution
 
-  double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
+  bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm;
+  double scaleinv = 1.0/ngridtotal;
   double s2 = scaleinv*scaleinv;
 
   if (eflag_global || vflag_global) {
diff --git a/src/KSPACE/pppm_dipole_spin.cpp b/src/KSPACE/pppm_dipole_spin.cpp
index e96378180a..16b9e459e6 100644
--- a/src/KSPACE/pppm_dipole_spin.cpp
+++ b/src/KSPACE/pppm_dipole_spin.cpp
@@ -35,22 +35,17 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define MAXORDER 7
-#define OFFSET 16384
-#define LARGE 10000.0
-#define SMALL 0.00001
-#define EPS_HOC 1.0e-7
+static constexpr int MAXORDER = 7;
+static constexpr int OFFSET = 16384;
+static constexpr double LARGE = 10000.0;
+static constexpr double SMALL = 0.00001;
+static constexpr double EPS_HOC = 1.0e-7;
 
 enum{REVERSE_MU};
 enum{FORWARD_MU,FORWARD_MU_PERATOM};
 
-#ifdef FFT_SINGLE
-#define ZEROF 0.0f
-#define ONEF  1.0f
-#else
-#define ZEROF 0.0
-#define ONEF  1.0
-#endif
+static constexpr FFT_SCALAR ZEROF = 0.0;
+static constexpr FFT_SCALAR ONEF =  1.0;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/KSPACE/pppm_disp.cpp b/src/KSPACE/pppm_disp.cpp
index 72424a7330..58c91ce3be 100644
--- a/src/KSPACE/pppm_disp.cpp
+++ b/src/KSPACE/pppm_disp.cpp
@@ -40,11 +40,11 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define MAXORDER   7
-#define OFFSET 16384
-#define SMALL 0.00001
-#define LARGE 10000.0
-#define EPS_HOC 1.0e-7
+static constexpr int MAXORDER =   7;
+static constexpr int OFFSET = 16384;
+static constexpr double SMALL = 0.00001;
+static constexpr double LARGE = 10000.0;
+static constexpr double EPS_HOC = 1.0e-7;
 
 enum{REVERSE_RHO,REVERSE_RHO_GEOM,REVERSE_RHO_ARITH,REVERSE_RHO_NONE};
 enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM,
@@ -55,13 +55,8 @@ enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM,
      FORWARD_IK_NONE,FORWARD_AD_NONE,FORWARD_IK_PERATOM_NONE,
      FORWARD_AD_PERATOM_NONE};
 
-#ifdef FFT_SINGLE
-#define ZEROF 0.0f
-#define ONEF  1.0f
-#else
-#define ZEROF 0.0
-#define ONEF  1.0
-#endif
+static constexpr FFT_SCALAR ZEROF = 0.0;
+static constexpr FFT_SCALAR ONEF =  1.0;
 
 /* ---------------------------------------------------------------------- */
 
@@ -4556,7 +4551,8 @@ void PPPMDisp::poisson_ik(FFT_SCALAR* wk1, FFT_SCALAR* wk2,
 
   // if requested, compute energy and virial contribution
 
-  double scaleinv = 1.0/(nx_p*ny_p*nz_p);
+  bigint ngridtotal = (bigint) nx_p * ny_p * nz_p;
+  double scaleinv = 1.0/ngridtotal;
   double s2 = scaleinv*scaleinv;
 
   if (eflag_global || vflag_global) {
@@ -4696,7 +4692,8 @@ void PPPMDisp::poisson_ad(FFT_SCALAR* wk1, FFT_SCALAR* wk2,
 
   // if requested, compute energy and virial contribution
 
-  double scaleinv = 1.0/(nx_p*ny_p*nz_p);
+  bigint ngridtotal = (bigint) nx_p * ny_p * nz_p;
+  double scaleinv = 1.0/ngridtotal;
   double s2 = scaleinv*scaleinv;
 
   if (eflag_global || vflag_global) {
@@ -4844,7 +4841,8 @@ poisson_2s_ik(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
   int i,j,k,n;
   double eng;
 
-  double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);
+  bigint ngridtotal = (bigint) nx_pppm_6 * ny_pppm_6 * nz_pppm_6;
+  double scaleinv = 1.0/ngridtotal;
 
   // transform charge/dispersion density (r -> k)
   // only one transform when energies and pressures not calculated
@@ -5017,7 +5015,8 @@ poisson_none_ik(int n1, int n2,FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
   int i,j,k,n;
   double eng;
 
-  double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);
+  bigint ngridtotal = (bigint) nx_pppm_6 * ny_pppm_6 * nz_pppm_6;
+  double scaleinv = 1.0/ngridtotal;
 
   // transform charge/dispersion density (r -> k)
   // only one transform required when energies and pressures not needed
@@ -5191,7 +5190,8 @@ poisson_2s_ad(FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
   int i,j,k,n;
   double eng;
 
-  double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);
+  bigint ngridtotal = (bigint) nx_pppm_6 * ny_pppm_6 * nz_pppm_6;
+  double scaleinv = 1.0/ngridtotal;
 
   // transform charge/dispersion density (r -> k)
   // only one tansform required when energies and pressures not needed
@@ -5289,7 +5289,8 @@ poisson_none_ad(int n1, int n2, FFT_SCALAR* dfft_1, FFT_SCALAR* dfft_2,
   int i,j,k,n;
   double eng;
 
-  double scaleinv = 1.0/(nx_pppm_6*ny_pppm_6*nz_pppm_6);
+  bigint ngridtotal = (bigint) nx_pppm_6 * ny_pppm_6 * nz_pppm_6;
+  double scaleinv = 1.0/ngridtotal;
 
   // transform charge/dispersion density (r -> k)
   // only one tansform required when energies and pressures not needed
diff --git a/src/KSPACE/pppm_disp.h b/src/KSPACE/pppm_disp.h
index a222e041d9..1a271e59d2 100644
--- a/src/KSPACE/pppm_disp.h
+++ b/src/KSPACE/pppm_disp.h
@@ -25,8 +25,8 @@ KSpaceStyle(pppm/disp,PPPMDisp);
 
 namespace LAMMPS_NS {
 
-#define EWALD_MAXORDER 6
-#define EWALD_FUNCS 4
+static constexpr int EWALD_MAXORDER = 6;
+static constexpr int EWALD_FUNCS = 4;
 
 class PPPMDisp : public KSpace {
  public:
diff --git a/src/KSPACE/pppm_disp_tip4p.cpp b/src/KSPACE/pppm_disp_tip4p.cpp
index c5ed2f26d6..6161ebbe09 100644
--- a/src/KSPACE/pppm_disp_tip4p.cpp
+++ b/src/KSPACE/pppm_disp_tip4p.cpp
@@ -29,15 +29,10 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define OFFSET 16384
+static constexpr int OFFSET = 16384;
 
-#ifdef FFT_SINGLE
-#define ZEROF 0.0f
-#define ONEF  1.0f
-#else
-#define ZEROF 0.0
-#define ONEF  1.0
-#endif
+static constexpr FFT_SCALAR ZEROF = 0.0;
+static constexpr FFT_SCALAR ONEF =  1.0;
 
 /* ---------------------------------------------------------------------- */
 
@@ -490,7 +485,7 @@ void PPPMDispTIP4P::fieldforce_c_peratom()
   Fix handling of TIP4P dipole compared to PPPMDisp::slabcorr
 ------------------------------------------------------------------------- */
 
-#define SMALL 0.00001
+static constexpr double SMALL = 0.00001;
 
 void PPPMDispTIP4P::slabcorr(int /*eflag*/)
 {
diff --git a/src/KSPACE/pppm_stagger.cpp b/src/KSPACE/pppm_stagger.cpp
index d6f3c9cac6..a14d7a68d6 100644
--- a/src/KSPACE/pppm_stagger.cpp
+++ b/src/KSPACE/pppm_stagger.cpp
@@ -33,19 +33,14 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
-#define OFFSET 16384
-#define EPS_HOC 1.0e-7
+static constexpr int OFFSET = 16384;
+static constexpr double EPS_HOC = 1.0e-7;
 
 enum{REVERSE_RHO};
 enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM};
 
-#ifdef FFT_SINGLE
-#define ZEROF 0.0f
-#define ONEF  1.0f
-#else
-#define ZEROF 0.0
-#define ONEF  1.0
-#endif
+static constexpr FFT_SCALAR ZEROF = 0.0;
+static constexpr FFT_SCALAR ONEF =  1.0;
 
 /* ---------------------------------------------------------------------- */
 
@@ -302,7 +297,7 @@ double PPPMStagger::compute_qopt()
   // each proc calculates contributions from every Pth grid point
 
   bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm;
-  int nxy_pppm = nx_pppm * ny_pppm;
+  bigint nxy_pppm = (bigint) nx_pppm * ny_pppm;
 
   double qopt = 0.0;
 
@@ -398,7 +393,7 @@ double PPPMStagger::compute_qopt_ad()
   // each proc calculates contributions from every Pth grid point
 
   bigint ngridtotal = (bigint) nx_pppm * ny_pppm * nz_pppm;
-  int nxy_pppm = nx_pppm * ny_pppm;
+  bigint nxy_pppm = (bigint) nx_pppm * ny_pppm;
 
   double qopt = 0.0;
 
diff --git a/src/KSPACE/pppm_tip4p.cpp b/src/KSPACE/pppm_tip4p.cpp
index 730b604d7a..7237bc24f2 100644
--- a/src/KSPACE/pppm_tip4p.cpp
+++ b/src/KSPACE/pppm_tip4p.cpp
@@ -29,15 +29,9 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define OFFSET 16384
-
-#ifdef FFT_SINGLE
-#define ZEROF 0.0f
-#define ONEF  1.0f
-#else
-#define ZEROF 0.0
-#define ONEF  1.0
-#endif
+static constexpr FFT_SCALAR ZEROF = 0.0;
+static constexpr FFT_SCALAR ONEF =  1.0;
+static constexpr int OFFSET = 16384;
 
 /* ---------------------------------------------------------------------- */
 
@@ -483,7 +477,7 @@ void PPPMTIP4P::fieldforce_peratom()
   Fix handling of TIP4P dipole compared to PPPMDisp::slabcorr
 ------------------------------------------------------------------------- */
 
-#define SMALL 0.00001
+static constexpr double SMALL = 0.00001;
 
 void PPPMTIP4P::slabcorr()
 {
diff --git a/src/LATBOLTZ/fix_lb_fluid.cpp b/src/LATBOLTZ/fix_lb_fluid.cpp
index f692d28084..f3d8f45142 100644
--- a/src/LATBOLTZ/fix_lb_fluid.cpp
+++ b/src/LATBOLTZ/fix_lb_fluid.cpp
@@ -4430,9 +4430,9 @@ void FixLbFluid::calc_MPT(double &totalmass, double totalmomentum[3], double &Ta
 ------------------------------------------------------------------------- */
 /* ---------------------------------------------------------------------- */
 
-int FixLbFluid::adjust_dof_fix() /* Based on same private method in compute class */
-{                                /* altered to return fix_dof */
-  int fix_dof = 0;
+bigint FixLbFluid::adjust_dof_fix() /* Based on same private method in compute class */
+{                                   /* altered to return fix_dof */
+  bigint fix_dof = 0;
   for (auto &ifix : modify->get_fix_list())
     if (ifix->dof_flag) fix_dof += ifix->dof(igroup);
   return fix_dof;
diff --git a/src/LATBOLTZ/fix_lb_fluid.h b/src/LATBOLTZ/fix_lb_fluid.h
index 19cd2c6dc3..f134b50901 100644
--- a/src/LATBOLTZ/fix_lb_fluid.h
+++ b/src/LATBOLTZ/fix_lb_fluid.h
@@ -182,7 +182,7 @@ class FixLbFluid : public Fix {
   void calc_fluidforceII(void);
   void calc_fluidforceweight(void);
 
-  int adjust_dof_fix();
+  bigint adjust_dof_fix();
   double dof_compute();
 
   /* nanopit parameters */
diff --git a/src/LEPTON/angle_lepton.cpp b/src/LEPTON/angle_lepton.cpp
index 59310f5637..9fe565f8ee 100644
--- a/src/LEPTON/angle_lepton.cpp
+++ b/src/LEPTON/angle_lepton.cpp
@@ -44,6 +44,7 @@ AngleLepton::AngleLepton(LAMMPS *_lmp) :
 {
   writedata = 1;
   reinitflag = 0;
+  auto_offset = 1;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -90,10 +91,21 @@ template <int EVFLAG, int EFLAG, int NEWTON_BOND> void AngleLepton::eval()
 {
   std::vector<Lepton::CompiledExpression> angleforce;
   std::vector<Lepton::CompiledExpression> anglepot;
-  for (const auto &expr : expressions) {
-    auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
-    angleforce.emplace_back(parsed.differentiate("theta").createCompiledExpression());
-    if (EFLAG) anglepot.emplace_back(parsed.createCompiledExpression());
+  std::vector<bool> has_ref;
+  try {
+    for (const auto &expr : expressions) {
+      auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
+      angleforce.emplace_back(parsed.differentiate("theta").createCompiledExpression());
+      has_ref.push_back(true);
+      try {
+        angleforce.back().getVariableReference("theta");
+      } catch (Lepton::Exception &) {
+        has_ref.back() = false;
+      }
+      if (EFLAG) anglepot.emplace_back(parsed.createCompiledExpression());
+    }
+  } catch (std::exception &e) {
+    error->all(FLERR, e.what());
   }
 
   const double *const *const x = atom->x;
@@ -142,8 +154,7 @@ template <int EVFLAG, int EFLAG, int NEWTON_BOND> void AngleLepton::eval()
 
     const double dtheta = acos(c) - theta0[type];
     const int idx = type2expression[type];
-    angleforce[idx].getVariableReference("theta") = dtheta;
-
+    if (has_ref[idx]) angleforce[idx].getVariableReference("theta") = dtheta;
     const double a = -angleforce[idx].evaluate() * s;
     const double a11 = a * c / rsq1;
     const double a12 = -a / (r1 * r2);
@@ -179,7 +190,11 @@ template <int EVFLAG, int EFLAG, int NEWTON_BOND> void AngleLepton::eval()
 
     double eangle = 0.0;
     if (EFLAG) {
-      anglepot[idx].getVariableReference("theta") = dtheta;
+      try {
+        anglepot[idx].getVariableReference("theta") = dtheta;
+      } catch (Lepton::Exception &) {
+        ;    // ignore -> constant force
+      }
       eangle = anglepot[idx].evaluate() - offset[type];
     }
     if (EVFLAG)
@@ -202,6 +217,24 @@ void AngleLepton::allocate()
   for (int i = 1; i < np1; i++) setflag[i] = 0;
 }
 
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+void AngleLepton::settings(int narg, char **arg)
+{
+  auto_offset = 1;
+  if (narg > 0) {
+    if (strcmp(arg[0],"auto_offset") == 0) {
+      auto_offset = 1;
+    } else if (strcmp(arg[0],"no_offset") == 0) {
+      auto_offset = 0;
+    } else {
+      error->all(FLERR, "Unknown angle style lepton setting {}", arg[0]);
+    }
+  }
+}
+
 /* ----------------------------------------------------------------------
    set coeffs for one or more types
 ------------------------------------------------------------------------- */
@@ -224,9 +257,20 @@ void AngleLepton::coeff(int narg, char **arg)
     auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(exp_one, lmp));
     auto anglepot = parsed.createCompiledExpression();
     auto angleforce = parsed.differentiate("theta").createCompiledExpression();
-    anglepot.getVariableReference("theta") = 0.0;
-    angleforce.getVariableReference("theta") = 0.0;
-    offset_one = anglepot.evaluate();
+    try {
+      anglepot.getVariableReference("theta") = 0.0;
+    } catch (Lepton::Exception &) {
+      if (comm->me == 0)
+        error->warning(FLERR, "Lepton potential expression {} does not depend on 'theta'", exp_one);
+    }
+    try {
+      angleforce.getVariableReference("theta") = 0.0;
+    } catch (Lepton::Exception &) {
+      if (comm->me == 0)
+        error->warning(FLERR, "Force from Lepton expression {} does not depend on 'theta'",
+                       exp_one);
+    }
+    if (auto_offset) offset_one = anglepot.evaluate();
     angleforce.evaluate();
   } catch (std::exception &e) {
     error->all(FLERR, e.what());
@@ -284,6 +328,7 @@ void AngleLepton::write_restart(FILE *fp)
     fwrite(&n, sizeof(int), 1, fp);
     fwrite(exp.c_str(), sizeof(char), n, fp);
   }
+  fwrite(&auto_offset, sizeof(int), 1, fp);
 }
 
 /* ----------------------------------------------------------------------
@@ -323,6 +368,9 @@ void AngleLepton::read_restart(FILE *fp)
     expressions.emplace_back(buf);
   }
 
+  if (comm->me == 0) utils::sfread(FLERR, &auto_offset, sizeof(int), 1, fp, nullptr, error);
+  MPI_Bcast(&auto_offset, 1, MPI_INT, 0, world);
+
   delete[] buf;
 }
 
@@ -363,7 +411,11 @@ double AngleLepton::single(int type, int i1, int i2, int i3)
   const auto &expr = expressions[type2expression[type]];
   auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
   auto anglepot = parsed.createCompiledExpression();
-  anglepot.getVariableReference("theta") = dtheta;
+  try {
+    anglepot.getVariableReference("theta") = dtheta;
+  } catch (Lepton::Exception &) {
+    ;    // ignore -> constant potential
+  }
   return anglepot.evaluate() - offset[type];
 }
 
diff --git a/src/LEPTON/angle_lepton.h b/src/LEPTON/angle_lepton.h
index 67d2718fb6..4f0e5729ed 100644
--- a/src/LEPTON/angle_lepton.h
+++ b/src/LEPTON/angle_lepton.h
@@ -29,6 +29,7 @@ class AngleLepton : public Angle {
   AngleLepton(class LAMMPS *);
   ~AngleLepton() override;
   void compute(int, int) override;
+  void settings(int, char **) override;
   void coeff(int, char **) override;
   double equilibrium_angle(int) override;
   void write_restart(FILE *) override;
@@ -42,6 +43,7 @@ class AngleLepton : public Angle {
   double *theta0;
   int *type2expression;
   double *offset;
+  int auto_offset;
 
   virtual void allocate();
 
diff --git a/src/LEPTON/bond_lepton.cpp b/src/LEPTON/bond_lepton.cpp
index 773607782d..8679d0ed62 100644
--- a/src/LEPTON/bond_lepton.cpp
+++ b/src/LEPTON/bond_lepton.cpp
@@ -37,6 +37,7 @@ BondLepton::BondLepton(LAMMPS *_lmp) :
 {
   writedata = 1;
   reinitflag = 0;
+  auto_offset = 1;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -82,10 +83,17 @@ template <int EVFLAG, int EFLAG, int NEWTON_BOND> void BondLepton::eval()
 {
   std::vector<Lepton::CompiledExpression> bondforce;
   std::vector<Lepton::CompiledExpression> bondpot;
+  std::vector<bool> has_ref;
   try {
     for (const auto &expr : expressions) {
       auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
       bondforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
+      has_ref.push_back(true);
+      try {
+        bondforce.back().getVariableReference("r");
+      } catch (Lepton::Exception &) {
+        has_ref.back() = false;
+      }
       if (EFLAG) bondpot.emplace_back(parsed.createCompiledExpression());
     }
   } catch (std::exception &e) {
@@ -116,7 +124,7 @@ template <int EVFLAG, int EFLAG, int NEWTON_BOND> void BondLepton::eval()
 
     double fbond = 0.0;
     if (r > 0.0) {
-      bondforce[idx].getVariableReference("r") = dr;
+      if (has_ref[idx]) bondforce[idx].getVariableReference("r") = dr;
       fbond = -bondforce[idx].evaluate() / r;
     }
 
@@ -136,7 +144,11 @@ template <int EVFLAG, int EFLAG, int NEWTON_BOND> void BondLepton::eval()
 
     double ebond = 0.0;
     if (EFLAG) {
-      bondpot[idx].getVariableReference("r") = dr;
+      try {
+        bondpot[idx].getVariableReference("r") = dr;
+      } catch (Lepton::Exception &) {
+        ;    // ignore -> constant potential
+      }
       ebond = bondpot[idx].evaluate() - offset[type];
     }
     if (EVFLAG) ev_tally(i1, i2, nlocal, NEWTON_BOND, ebond, fbond, delx, dely, delz);
@@ -157,6 +169,24 @@ void BondLepton::allocate()
   for (int i = 1; i < np1; i++) setflag[i] = 0;
 }
 
+/* ----------------------------------------------------------------------
+   global settings
+------------------------------------------------------------------------- */
+
+void BondLepton::settings(int narg, char **arg)
+{
+  auto_offset = 1;
+  if (narg > 0) {
+    if (strcmp(arg[0],"auto_offset") == 0) {
+      auto_offset = 1;
+    } else if (strcmp(arg[0],"no_offset") == 0) {
+      auto_offset = 0;
+    } else {
+      error->all(FLERR, "Unknown bond style lepton setting {}", arg[0]);
+    }
+  }
+}
+
 /* ----------------------------------------------------------------------
    set coeffs for one or more types
 ------------------------------------------------------------------------- */
@@ -179,9 +209,19 @@ void BondLepton::coeff(int narg, char **arg)
     auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(exp_one, lmp));
     auto bondpot = parsed.createCompiledExpression();
     auto bondforce = parsed.differentiate("r").createCompiledExpression();
-    bondpot.getVariableReference("r") = 0.0;
-    bondforce.getVariableReference("r") = 0.0;
-    offset_one = bondpot.evaluate();
+    try {
+      bondpot.getVariableReference("r") = 0.0;
+    } catch (Lepton::Exception &e) {
+      if (comm->me == 0)
+        error->warning(FLERR, "Lepton potential expression {} does not depend on 'r'", exp_one);
+    }
+    try {
+      bondforce.getVariableReference("r") = 0.0;
+    } catch (Lepton::Exception &e) {
+      if (comm->me == 0)
+        error->warning(FLERR, "Force from Lepton expression {} does not depend on 'r'", exp_one);
+    }
+    if (auto_offset) offset_one = bondpot.evaluate();
     bondforce.evaluate();
   } catch (std::exception &e) {
     error->all(FLERR, e.what());
@@ -239,6 +279,7 @@ void BondLepton::write_restart(FILE *fp)
     fwrite(&n, sizeof(int), 1, fp);
     fwrite(exp.c_str(), sizeof(char), n, fp);
   }
+  fwrite(&auto_offset, sizeof(int), 1, fp);
 }
 
 /* ----------------------------------------------------------------------
@@ -278,6 +319,9 @@ void BondLepton::read_restart(FILE *fp)
     expressions.emplace_back(buf);
   }
 
+  if (comm->me == 0) utils::sfread(FLERR, &auto_offset, sizeof(int), 1, fp, nullptr, error);
+  MPI_Bcast(&auto_offset, 1, MPI_INT, 0, world);
+
   delete[] buf;
 }
 
@@ -302,8 +346,12 @@ double BondLepton::single(int type, double rsq, int /*i*/, int /*j*/, double &ff
   auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
   auto bondpot = parsed.createCompiledExpression();
   auto bondforce = parsed.differentiate("r").createCompiledExpression();
-  bondforce.getVariableReference("r") = dr;
-  bondpot.getVariableReference("r") = dr;
+  try {
+    bondpot.getVariableReference("r") = dr;
+    bondforce.getVariableReference("r") = dr;
+  } catch (Lepton::Exception &) {
+    ;    // ignore -> constant potential or force
+  }
 
   // force and energy
 
diff --git a/src/LEPTON/bond_lepton.h b/src/LEPTON/bond_lepton.h
index 9e693298a7..e59648a3f0 100644
--- a/src/LEPTON/bond_lepton.h
+++ b/src/LEPTON/bond_lepton.h
@@ -29,6 +29,7 @@ class BondLepton : public Bond {
   BondLepton(class LAMMPS *);
   ~BondLepton() override;
   void compute(int, int) override;
+  void settings(int, char **) override;
   void coeff(int, char **) override;
   double equilibrium_distance(int) override;
   void write_restart(FILE *) override;
@@ -42,6 +43,7 @@ class BondLepton : public Bond {
   double *r0;
   int *type2expression;
   double *offset;
+  int auto_offset;
 
   virtual void allocate();
 
diff --git a/src/LEPTON/dihedral_lepton.cpp b/src/LEPTON/dihedral_lepton.cpp
index 6470e43033..069ff13d74 100644
--- a/src/LEPTON/dihedral_lepton.cpp
+++ b/src/LEPTON/dihedral_lepton.cpp
@@ -92,10 +92,17 @@ template <int EVFLAG, int EFLAG, int NEWTON_BOND> void DihedralLepton::eval()
 {
   std::vector<Lepton::CompiledExpression> dihedralforce;
   std::vector<Lepton::CompiledExpression> dihedralpot;
+  std::vector<bool> has_ref;
   try {
     for (const auto &expr : expressions) {
       auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp));
       dihedralforce.emplace_back(parsed.differentiate("phi").createCompiledExpression());
+      has_ref.push_back(true);
+      try {
+        dihedralforce.back().getVariableReference("phi");
+      } catch (Lepton::Exception &) {
+        has_ref.back() = false;
+      }
       if (EFLAG) dihedralpot.emplace_back(parsed.createCompiledExpression());
     }
   } catch (std::exception &e) {
@@ -278,7 +285,7 @@ template <int EVFLAG, int EFLAG, int NEWTON_BOND> void DihedralLepton::eval()
     }
 
     const int idx = type2expression[type];
-    dihedralforce[idx].getVariableReference("phi") = phi;
+    if (has_ref[idx]) dihedralforce[idx].getVariableReference("phi") = phi;
     double m_du_dphi = -dihedralforce[idx].evaluate();
 
     // ----- Step 4: Calculate the force direction in real space -----
@@ -322,7 +329,11 @@ template <int EVFLAG, int EFLAG, int NEWTON_BOND> void DihedralLepton::eval()
 
     double edihedral = 0.0;
     if (EFLAG) {
-      dihedralpot[idx].getVariableReference("phi") = phi;
+      try {
+        dihedralpot[idx].getVariableReference("phi") = phi;
+      } catch (Lepton::Exception &) {
+        ;    // ignore -> constant potential
+      }
       edihedral = dihedralpot[idx].evaluate();
     }
     if (EVFLAG)
@@ -362,8 +373,18 @@ void DihedralLepton::coeff(int narg, char **arg)
     auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(exp_one, lmp));
     auto dihedralpot = parsed.createCompiledExpression();
     auto dihedralforce = parsed.differentiate("phi").createCompiledExpression();
-    dihedralpot.getVariableReference("phi") = 0.0;
-    dihedralforce.getVariableReference("phi") = 0.0;
+    try {
+      dihedralpot.getVariableReference("phi") = 0.0;
+    } catch (Lepton::Exception &) {
+      if (comm->me == 0)
+        error->warning(FLERR, "Lepton potential expression {} does not depend on 'phi'", exp_one);
+    }
+    try {
+      dihedralforce.getVariableReference("phi") = 0.0;
+    } catch (Lepton::Exception &) {
+      if (comm->me == 0)
+        error->warning(FLERR, "Force from Lepton expression {} does not depend on 'phi'", exp_one);
+    }
     dihedralforce.evaluate();
   } catch (std::exception &e) {
     error->all(FLERR, e.what());
diff --git a/src/LEPTON/fix_wall_lepton.cpp b/src/LEPTON/fix_wall_lepton.cpp
index a81d3c4edb..7530188c00 100644
--- a/src/LEPTON/fix_wall_lepton.cpp
+++ b/src/LEPTON/fix_wall_lepton.cpp
@@ -13,6 +13,7 @@
 
 #include "fix_wall_lepton.h"
 #include "atom.h"
+#include "comm.h"
 #include "error.h"
 
 #include "Lepton.h"
@@ -41,8 +42,18 @@ void FixWallLepton::post_constructor()
       auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(exp_one, lmp));
       auto wallpot = parsed.createCompiledExpression();
       auto wallforce = parsed.differentiate("r").createCompiledExpression();
-      wallpot.getVariableReference("r") = 0.0;
-      wallforce.getVariableReference("r") = 0.0;
+      try {
+        wallpot.getVariableReference("r") = 0.0;
+      } catch (Lepton::Exception &) {
+        if (comm->me == 0)
+          error->warning(FLERR, "Lepton potential expression {} does not depend on 'r'", exp_one);
+      }
+      try {
+        wallforce.getVariableReference("r") = 0.0;
+      } catch (Lepton::Exception &) {
+        if (comm->me == 0)
+          error->warning(FLERR, "Force from Lepton expression {} does not depend on 'r'", exp_one);
+      }
       wallpot.evaluate();
       wallforce.evaluate();
     } catch (std::exception &e) {
diff --git a/src/LEPTON/pair_lepton.cpp b/src/LEPTON/pair_lepton.cpp
index a8af0ce576..adc07cbfa8 100644
--- a/src/LEPTON/pair_lepton.cpp
+++ b/src/LEPTON/pair_lepton.cpp
@@ -27,6 +27,7 @@
 
 #include "Lepton.h"
 #include "lepton_utils.h"
+#include <array>
 #include <cmath>
 #include <map>
 
@@ -105,11 +106,17 @@ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void PairLepton::eval()
 
   std::vector<Lepton::CompiledExpression> pairforce;
   std::vector<Lepton::CompiledExpression> pairpot;
+  std::vector<bool> has_ref;
   try {
     for (const auto &expr : expressions) {
       auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp), functions);
       pairforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
-      pairforce.back().getVariableReference("r");
+      has_ref.push_back(true);
+      try {
+        pairforce.back().getVariableReference("r");
+      } catch (Lepton::Exception &) {
+        has_ref.back() = false;
+      }
       if (EFLAG) pairpot.emplace_back(parsed.createCompiledExpression());
     }
   } catch (std::exception &e) {
@@ -142,8 +149,7 @@ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void PairLepton::eval()
       if (rsq < cutsq[itype][jtype]) {
         const double r = sqrt(rsq);
         const int idx = type2expression[itype][jtype];
-        double &r_for = pairforce[idx].getVariableReference("r");
-        r_for = r;
+        if (has_ref[idx]) pairforce[idx].getVariableReference("r") = r;
         const double fpair = -pairforce[idx].evaluate() / r * factor_lj;
 
         fxtmp += delx * fpair;
@@ -157,7 +163,11 @@ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void PairLepton::eval()
 
         double evdwl = 0.0;
         if (EFLAG) {
-          pairpot[idx].getVariableReference("r") = r;
+          try {
+            pairpot[idx].getVariableReference("r") = r;
+          } catch (Lepton::Exception &) {
+            ;    // ignore -> constant potential
+          }
           evdwl = pairpot[idx].evaluate() - offset[itype][jtype];
           evdwl *= factor_lj;
         }
@@ -229,8 +239,12 @@ void PairLepton::coeff(int narg, char **arg)
     auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(exp_one, lmp), functions);
     auto pairforce = parsed.differentiate("r").createCompiledExpression();
     auto pairpot = parsed.createCompiledExpression();
-    pairpot.getVariableReference("r") = 1.0;
-    pairforce.getVariableReference("r") = 1.0;
+    try {
+      pairpot.getVariableReference("r") = 1.0;
+      pairforce.getVariableReference("r") = 1.0;
+    } catch (Lepton::Exception &) {
+      ;    // ignore -> constant potential or force
+    }
     pairpot.evaluate();
     pairforce.evaluate();
   } catch (std::exception &e) {
@@ -270,7 +284,11 @@ double PairLepton::init_one(int i, int j)
     try {
       auto expr = LeptonUtils::substitute(expressions[type2expression[i][j]], lmp);
       auto pairpot = Lepton::Parser::parse(expr, functions).createCompiledExpression();
-      pairpot.getVariableReference("r") = cut[i][j];
+      try {
+        pairpot.getVariableReference("r") = cut[i][j];
+      } catch (Lepton::Exception &) {
+        ;    // ignore -> constant potential
+      }
       offset[i][j] = pairpot.evaluate();
     } catch (std::exception &) {
     }
@@ -429,9 +447,12 @@ double PairLepton::single(int /* i */, int /* j */, int itype, int jtype, double
   auto pairforce = parsed.differentiate("r").createCompiledExpression();
 
   const double r = sqrt(rsq);
-  pairpot.getVariableReference("r") = r;
-  pairforce.getVariableReference("r") = r;
-
+  try {
+    pairpot.getVariableReference("r") = r;
+    pairforce.getVariableReference("r") = r;
+  } catch (Lepton::Exception &) {
+    ;    // ignore -> constant potential or force
+  }
   fforce = -pairforce.evaluate() / r * factor_lj;
   return (pairpot.evaluate() - offset[itype][jtype]) * factor_lj;
 }
diff --git a/src/LEPTON/pair_lepton_coul.cpp b/src/LEPTON/pair_lepton_coul.cpp
index 841565e874..f7d2042874 100644
--- a/src/LEPTON/pair_lepton_coul.cpp
+++ b/src/LEPTON/pair_lepton_coul.cpp
@@ -28,6 +28,8 @@
 
 #include "Lepton.h"
 #include "lepton_utils.h"
+
+#include <array>
 #include <cmath>
 
 using namespace LAMMPS_NS;
@@ -79,25 +81,30 @@ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void PairLeptonCoul::eval()
 
   std::vector<Lepton::CompiledExpression> pairforce;
   std::vector<Lepton::CompiledExpression> pairpot;
-  std::vector<std::pair<bool, bool>> have_q;
+  std::vector<std::array<bool, 3>> has_ref;
   try {
     for (const auto &expr : expressions) {
       auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp), functions);
       pairforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
+      has_ref.push_back({true, true, true});
+      try {
+        pairforce.back().getVariableReference("r");
+      } catch (Lepton::Exception &) {
+        has_ref.back()[0] = false;
+      }
       if (EFLAG) pairpot.emplace_back(parsed.createCompiledExpression());
-      pairforce.back().getVariableReference("r");
-      have_q.emplace_back(true, true);
 
       // check if there are references to charges
+
       try {
         pairforce.back().getVariableReference("qi");
-      } catch (std::exception &) {
-        have_q.back().first = false;
+      } catch (Lepton::Exception &) {
+        has_ref.back()[1] = false;
       }
       try {
         pairforce.back().getVariableReference("qj");
-      } catch (std::exception &) {
-        have_q.back().second = false;
+      } catch (Lepton::Exception &) {
+        has_ref.back()[2] = false;
       }
     }
   } catch (std::exception &e) {
@@ -130,9 +137,9 @@ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void PairLeptonCoul::eval()
       if (rsq < cutsq[itype][jtype]) {
         const double r = sqrt(rsq);
         const int idx = type2expression[itype][jtype];
-        pairforce[idx].getVariableReference("r") = r;
-        if (have_q[idx].first) pairforce[idx].getVariableReference("qi") = q2e * q[i];
-        if (have_q[idx].second) pairforce[idx].getVariableReference("qj") = q2e * q[j];
+        if (has_ref[idx][0]) pairforce[idx].getVariableReference("r") = r;
+        if (has_ref[idx][1]) pairforce[idx].getVariableReference("qi") = q2e * q[i];
+        if (has_ref[idx][2]) pairforce[idx].getVariableReference("qj") = q2e * q[j];
         const double fpair = -pairforce[idx].evaluate() / r * factor_coul;
 
         fxtmp += delx * fpair;
@@ -146,9 +153,14 @@ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void PairLeptonCoul::eval()
 
         double ecoul = 0.0;
         if (EFLAG) {
-          pairpot[idx].getVariableReference("r") = r;
-          if (have_q[idx].first) pairpot[idx].getVariableReference("qi") = q2e * q[i];
-          if (have_q[idx].second) pairpot[idx].getVariableReference("qj") = q2e * q[j];
+          try {
+            pairpot[idx].getVariableReference("r") = r;
+          } catch (Lepton::Exception &) {
+            ;    // ignore -> constant potential
+          }
+          if (has_ref[idx][1]) pairpot[idx].getVariableReference("qi") = q2e * q[i];
+          if (has_ref[idx][2]) pairpot[idx].getVariableReference("qj") = q2e * q[j];
+
           ecoul = pairpot[idx].evaluate();
           ecoul *= factor_coul;
         }
@@ -249,18 +261,22 @@ double PairLeptonCoul::single(int i, int j, int itype, int jtype, double rsq, do
 
   const double r = sqrt(rsq);
   const double q2e = sqrt(force->qqrd2e);
-  pairpot.getVariableReference("r") = r;
-  pairforce.getVariableReference("r") = r;
+  try {
+    pairpot.getVariableReference("r") = r;
+    pairforce.getVariableReference("r") = r;
+  } catch (Lepton::Exception &) {
+    ;    // ignore -> constant potential or force
+  }
   try {
     pairpot.getVariableReference("qi") = q2e * atom->q[i];
     pairforce.getVariableReference("qi") = q2e * atom->q[i];
-  } catch (std::exception &) {
+  } catch (Lepton::Exception &) {
     /* ignore */
   }
   try {
     pairpot.getVariableReference("qj") = q2e * atom->q[j];
     pairforce.getVariableReference("qj") = q2e * atom->q[j];
-  } catch (std::exception &) {
+  } catch (Lepton::Exception &) {
     /* ignore */
   }
 
diff --git a/src/LEPTON/pair_lepton_sphere.cpp b/src/LEPTON/pair_lepton_sphere.cpp
index 29514aed38..72d0e85d0b 100644
--- a/src/LEPTON/pair_lepton_sphere.cpp
+++ b/src/LEPTON/pair_lepton_sphere.cpp
@@ -28,6 +28,7 @@
 
 #include "Lepton.h"
 #include "lepton_utils.h"
+#include <array>
 #include <cmath>
 
 using namespace LAMMPS_NS;
@@ -77,25 +78,30 @@ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void PairLeptonSphere::eval()
 
   std::vector<Lepton::CompiledExpression> pairforce;
   std::vector<Lepton::CompiledExpression> pairpot;
-  std::vector<std::pair<bool, bool>> have_rad;
+  std::vector<std::array<bool, 3>> has_ref;
   try {
     for (const auto &expr : expressions) {
       auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, lmp), functions);
       pairforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
+      has_ref.push_back({true, true, true});
+      try {
+        pairforce.back().getVariableReference("r");
+      } catch (Lepton::Exception &) {
+        has_ref.back()[0] = false;
+      }
       if (EFLAG) pairpot.emplace_back(parsed.createCompiledExpression());
-      pairforce.back().getVariableReference("r");
-      have_rad.emplace_back(true, true);
 
-      // check if there are references to charges
+      // check if there are references to radii
+
       try {
         pairforce.back().getVariableReference("radi");
-      } catch (std::exception &) {
-        have_rad.back().first = false;
+      } catch (Lepton::Exception &) {
+        has_ref.back()[1] = false;
       }
       try {
         pairforce.back().getVariableReference("radj");
-      } catch (std::exception &) {
-        have_rad.back().second = false;
+      } catch (Lepton::Exception &) {
+        has_ref.back()[2] = false;
       }
     }
   } catch (std::exception &e) {
@@ -128,9 +134,9 @@ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void PairLeptonSphere::eval()
       if (rsq < cutsq[itype][jtype]) {
         const double r = sqrt(rsq);
         const int idx = type2expression[itype][jtype];
-        pairforce[idx].getVariableReference("r") = r;
-        if (have_rad[idx].first) pairforce[idx].getVariableReference("radi") = radius[i];
-        if (have_rad[idx].second) pairforce[idx].getVariableReference("radj") = radius[j];
+        if (has_ref[idx][0]) pairforce[idx].getVariableReference("r") = r;
+        if (has_ref[idx][1]) pairforce[idx].getVariableReference("radi") = radius[i];
+        if (has_ref[idx][2]) pairforce[idx].getVariableReference("radj") = radius[j];
         const double fpair = -pairforce[idx].evaluate() / r * factor_lj;
 
         fxtmp += delx * fpair;
@@ -144,9 +150,14 @@ template <int EVFLAG, int EFLAG, int NEWTON_PAIR> void PairLeptonSphere::eval()
 
         double evdwl = 0.0;
         if (EFLAG) {
-          pairpot[idx].getVariableReference("r") = r;
-          if (have_rad[idx].first) pairpot[idx].getVariableReference("radi") = radius[i];
-          if (have_rad[idx].second) pairpot[idx].getVariableReference("radj") = radius[j];
+          try {
+            pairpot[idx].getVariableReference("r") = r;
+          } catch (Lepton::Exception &) {
+            ;    // ignore -> constant potential
+          }
+          if (has_ref[idx][1]) pairpot[idx].getVariableReference("radi") = radius[i];
+          if (has_ref[idx][2]) pairpot[idx].getVariableReference("radj") = radius[j];
+
           evdwl = pairpot[idx].evaluate();
           evdwl *= factor_lj;
         }
@@ -211,19 +222,23 @@ double PairLeptonSphere::single(int i, int j, int itype, int jtype, double rsq,
   auto pairforce = parsed.differentiate("r").createCompiledExpression();
 
   const double r = sqrt(rsq);
-  pairpot.getVariableReference("r") = r;
-  pairforce.getVariableReference("r") = r;
+  try {
+    pairpot.getVariableReference("r") = r;
+    pairforce.getVariableReference("r") = r;
+  } catch (Lepton::Exception &) {
+    ;    // ignore -> constant potential or force
+  }
   try {
     pairpot.getVariableReference("radi") = atom->radius[i];
     pairforce.getVariableReference("radi") = atom->radius[i];
-  } catch (std::exception &) {
-    /* ignore */
+  } catch (Lepton::Exception &) {
+    ;    // ignore
   }
   try {
     pairpot.getVariableReference("radj") = atom->radius[j];
     pairforce.getVariableReference("radj") = atom->radius[j];
-  } catch (std::exception &) {
-    /* ignore */
+  } catch (Lepton::Exception &) {
+    ;    // ignore
   }
 
   fforce = -pairforce.evaluate() / r * factor_lj;
diff --git a/src/MACHDYN/fix_smd_adjust_dt.cpp b/src/MACHDYN/fix_smd_adjust_dt.cpp
index d2728f1042..30006ec7cc 100644
--- a/src/MACHDYN/fix_smd_adjust_dt.cpp
+++ b/src/MACHDYN/fix_smd_adjust_dt.cpp
@@ -37,7 +37,7 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-#define BIG 1.0e20
+static constexpr double BIG = 1.0e20;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/MACHDYN/fix_smd_tlsph_reference_configuration.cpp b/src/MACHDYN/fix_smd_tlsph_reference_configuration.cpp
index b236952e60..72ad76eccd 100644
--- a/src/MACHDYN/fix_smd_tlsph_reference_configuration.cpp
+++ b/src/MACHDYN/fix_smd_tlsph_reference_configuration.cpp
@@ -48,7 +48,7 @@ using namespace FixConst;
 using namespace SMD_Kernels;
 using namespace SMD_Math;
 
-#define DELTA 16384
+static constexpr int DELTA = 16384;
 
 #define INSERT_PREDEFINED_CRACKS false
 
diff --git a/src/MACHDYN/fix_smd_wall_surface.cpp b/src/MACHDYN/fix_smd_wall_surface.cpp
index facc03d471..3753b64f52 100644
--- a/src/MACHDYN/fix_smd_wall_surface.cpp
+++ b/src/MACHDYN/fix_smd_wall_surface.cpp
@@ -32,8 +32,8 @@ using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace Eigen;
 using namespace std;
-#define DELTA 16384
-#define EPSILON 1.0e-6
+static constexpr int DELTA = 16384;
+static constexpr double EPSILON = 1.0e-6;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/MACHDYN/pair_smd_tlsph.cpp b/src/MACHDYN/pair_smd_tlsph.cpp
index 845fed3352..6b2320219f 100644
--- a/src/MACHDYN/pair_smd_tlsph.cpp
+++ b/src/MACHDYN/pair_smd_tlsph.cpp
@@ -54,8 +54,6 @@ using namespace SMD_Math;
 #define JAUMANN false
 #define DETF_MIN 0.2 // maximum compression deformation allow
 #define DETF_MAX 2.0 // maximum tension deformation allowed
-#define TLSPH_DEBUG 0
-#define PLASTIC_STRAIN_AVERAGE_WINDOW 100.0
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/MAKE/MACHINES/Makefile.bgq b/src/MAKE/MACHINES/Makefile.bgq
deleted file mode 100644
index 4baecb9fc3..0000000000
--- a/src/MAKE/MACHINES/Makefile.bgq
+++ /dev/null
@@ -1,60 +0,0 @@
-# bgq = IBM Blue Gene/Q, multiple compiler options, native MPI, ALCF FFTW2
-
-SHELL = /bin/bash
-.SUFFIXES: .cpp .u 
-
-# ---------------------------------------------------------------------
-# build rules and dependencies
-# do not edit this section
-# select which compiler by editing Makefile.bgq.details
-
-include ../MAKE/MACHINES/bgq.make.details
-
-include Makefile.package.settings
-include Makefile.package
-
-EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
-EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
-EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB) $(DYN_LIB)
-EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS)
-EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS)
-
-# Path to src files
-
-vpath %.cpp ..
-vpath %.h ..
-
-# Link target
-
-$(EXE): main.o $(LMPLIB) $(EXTRA_LINK_DEPENDS)
-	$(LINK) $(LINKFLAGS) main.o $(EXTRA_PATH) $(LMPLINK) $(EXTRA_LIB) $(LIB) -o $@
-	$(SIZE) $@
-
-# Library targets
-
-$(ARLIB): $(OBJ) $(EXTRA_LINK_DEPENDS)
-	@rm -f ../$(ARLIB)
-	$(ARCHIVE) $(ARFLAGS) ../$(ARLIB) $(OBJ)
-	@rm -f $(ARLIB)
-	@ln -s ../$(ARLIB) $(ARLIB)
-
-$(SHLIB): $(OBJ) $(EXTRA_LINK_DEPENDS)
-	$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o ../$(SHLIB) \
-		$(OBJ) $(EXTRA_LIB) $(LIB)
-	@rm -f $(SHLIB)
-	@ln -s ../$(SHLIB) $(SHLIB)
-
-# Compilation rules
-
-%.o:%.cpp
-	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
-
-# Individual dependencies
-
-depend : fastdep.exe $(SRC)
-	@./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1
-
-fastdep.exe: ../DEPEND/fastdep.c
-	cc -O -o $@ $<
-
-sinclude .depend
diff --git a/src/MAKE/MACHINES/Makefile.xe6 b/src/MAKE/MACHINES/Makefile.xe6
deleted file mode 100644
index 9dbe0ba73e..0000000000
--- a/src/MAKE/MACHINES/Makefile.xe6
+++ /dev/null
@@ -1,125 +0,0 @@
-# xe6 = Cray XE6, Cray CC, native MPI, FFTW
-
-SHELL = /bin/sh
-.SUFFIXES: .cpp .d
-
-# ---------------------------------------------------------------------
-# compiler/linker settings
-# specify flags and libraries needed for your compiler
-
-CC =		CC
-CCFLAGS =	-fastsse
-SHFLAGS =	-fPIC
-DEPFLAGS =	-M
-
-LINK =		CC
-LINKFLAGS =	-O
-LIB =           -lstdc++
-SIZE =		size
-
-ARCHIVE =	ar
-ARFLAGS =	-rc
-SHLIBFLAGS =	-shared
-
-# ---------------------------------------------------------------------
-# LAMMPS-specific settings, all OPTIONAL
-# specify settings for LAMMPS features you will use
-# if you change any -D setting, do full re-compile after "make clean"
-
-# LAMMPS ifdef settings
-# see possible settings in Section 3.5 of the manual
-
-LMP_INC =	-DLAMMPS_GZIP
-
-# MPI library
-# see discussion in Section 3.4 of the manual
-# MPI wrapper compiler/linker can provide this info
-# can point to dummy MPI library in src/STUBS as in Makefile.serial
-# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts
-# INC = path for mpi.h, MPI compiler settings
-# PATH = path for MPI library
-# LIB = name of MPI library
-
-MPI_INC =       -DMPICH_SKIP_MPICXX 
-MPI_PATH = 
-MPI_LIB =	
-
-# FFT library
-# see discussion in Section 3.5.2 of manual
-# can be left blank to use provided KISS FFT library
-# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
-# PATH = path for FFT library
-# LIB = name of FFT library
-
-FFT_INC =
-FFT_PATH =
-FFT_LIB =
-
-# JPEG and/or PNG library
-# see discussion in Section 3.5.4 of manual
-# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
-# INC = path(s) for jpeglib.h and/or png.h
-# PATH = path(s) for JPEG library and/or PNG library
-# LIB = name(s) of JPEG library and/or PNG library
-
-JPG_INC =
-JPG_PATH =
-JPG_LIB =
-
-#  library for loading shared objects (defaults to -ldl, should be empty on Windows)
-# uncomment to change the default
-
-# override DYN_LIB =
-
-# ---------------------------------------------------------------------
-# build rules and dependencies
-# do not edit this section
-
-include Makefile.package.settings
-include Makefile.package
-
-EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
-EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
-EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB) $(DYN_LIB)
-EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS)
-EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS)
-
-# Path to src files
-
-vpath %.cpp ..
-vpath %.h ..
-
-# Link target
-
-$(EXE): main.o $(LMPLIB) $(EXTRA_LINK_DEPENDS)
-	$(LINK) $(LINKFLAGS) main.o $(EXTRA_PATH) $(LMPLINK) $(EXTRA_LIB) $(LIB) -o $@
-	$(SIZE) $@
-
-# Library targets
-
-$(ARLIB): $(OBJ) $(EXTRA_LINK_DEPENDS)
-	@rm -f ../$(ARLIB)
-	$(ARCHIVE) $(ARFLAGS) ../$(ARLIB) $(OBJ)
-	@rm -f $(ARLIB)
-	@ln -s ../$(ARLIB) $(ARLIB)
-
-$(SHLIB): $(OBJ) $(EXTRA_LINK_DEPENDS)
-	$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o ../$(SHLIB) \
-		$(OBJ) $(EXTRA_LIB) $(LIB)
-	@rm -f $(SHLIB)
-	@ln -s ../$(SHLIB) $(SHLIB)
-
-# Compilation rules
-
-%.o:%.cpp
-	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
-
-# Individual dependencies
-
-depend : fastdep.exe $(SRC)
-	@./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1
-
-fastdep.exe: ../DEPEND/fastdep.c
-	cc -O -o $@ $<
-
-sinclude .depend
diff --git a/src/MAKE/MACHINES/bgq.make.details b/src/MAKE/MACHINES/bgq.make.details
deleted file mode 100644
index 0febd69d26..0000000000
--- a/src/MAKE/MACHINES/bgq.make.details
+++ /dev/null
@@ -1,125 +0,0 @@
-# multiple compiler options for BGQ
-
-# ---------------------------------------------------------------------
-# compiler/linker settings
-# specify flags and libraries needed for your compiler
-
-# uncomment one and only one of the following three lines 
-# to choose a compiler toolchain
-
-#COMPILER = GCC
-#COMPILER = LLVM
-COMPILER = XLC
-
-ifeq ($(COMPILER),XLC)
-CC       = /bgsys/drivers/ppcfloor/comm/xl.ndebug/bin/mpixlcxx_r
-CCFLAGS  = -O3 -qarch=qp -qtune=qp -qsmp=omp -qsimd=auto -qhot=level=2 -qprefetch -qunroll=yes
-FC       = /bgsys/drivers/ppcfloor/comm/xl.ndebug/bin/mpixlf90_r
-FFLAGS   = -O3 -qarch=qp -qtune=qp -qsimd=auto -qhot=level=2 -qprefetch -qunroll=yes -qsmp=omp -qextname -qnosave
-DEPFLAGS = -M -qmakedep=gcc
-endif
-
-ifeq ($(COMPILER),GCC)
-CC       = /bgsys/drivers/ppcfloor/comm/gcc.legacy/bin/mpicxx
-CCFLAGS  = -O3 -fopenmp
-FC       = /bgsys/drivers/ppcfloor/comm/gcc.legacy/bin/mpif90
-FFLAGS   = -O3 -fopenmp
-DEPFLAGS = -M
-endif
-
-ifeq ($(COMPILER),LLVM)
-#CC       = bgclang++
-CC       = /home/projects/llvm/mpi/bgclang/bin/mpiclang++
-CCFLAGS  = -O3 -fopenmp
-DEPFLAGS = -M
-FC	 = /bin/false
-FFLAGS   = LLVM does not have a Fortran front-end!
-endif
-
-LINK      = $(CC)
-LINKFLAGS = $(CCFLAGS)
-
-ifeq ($(COMPILER),XLC)
-  MASS_LIB    = ${IBM_MAIN_DIR}/xlmass/bg/7.3/bglib64 
-  XLF_LIB     = ${IBM_MAIN_DIR}/xlf/bg/14.1/bglib64
-  XLSMP_LIB   = ${IBM_MAIN_DIR}/xlsmp/bg/3.1/bglib64
-  LIB        += -L${MASS_LIB} -L${XLF_LIB} -L${XLSMP_LIB} 
-  LIB        += -lmassv -lmass 
-  LIB        += -lxlf90_r -lxlsmp -lxlopt -lxlfmath -lxl
-endif
-
-ifeq ($(COMPILER),GCC)
-# libm is definitely slower than libmass...
-  LIB += -lm -lgfortran
-endif
-
-ifeq ($(COMPILER),LLVM)
-    SLEEF_DIR = /home/projects/llvm/sleef
-    LIB += -L${SLEEF_DIR}/lib -lsleef
-endif
-
-SIZE       = size
-
-ARCHIVE    = ar
-ARFLAGS    = -rc
-
-# BGQ should not use shared libraries
-
-SHFLAGS    =
-SHLIBFLAGS = 
-
-# ---------------------------------------------------------------------
-# LAMMPS-specific settings, all OPTIONAL
-# specify settings for LAMMPS features you will use
-# if you change any -D setting, do full re-compile after "make clean"
-
-# LAMMPS ifdef settings
-# see possible settings in Section 3.5 of the manual
-
-LMP_INC = -DLAMMPS_GZIP
-
-# MPI library
-# see discussion in Section 3.4 of the manual
-# MPI wrapper compiler/linker can provide this info
-# can point to dummy MPI library in src/STUBS as in Makefile.serial
-# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts
-# INC = path for mpi.h, MPI compiler settings
-# PATH = path for MPI library
-# LIB = name of MPI library
-
-MPI_INC    = 
-MPI_PATH   = 
-MPI_LIB    =
-
-MPI_INC += -DMPICH_SKIP_MPICXX 
-MPI_LIB += #/home/jhammond/OSPRI/branches/marpn/wrap/libmpiarbrpn.a
-
-# FFT library
-# see discussion in Section 3.5.2 of manual
-# can be left blank to use provided KISS FFT library
-# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
-# PATH = path for FFT library
-# LIB = name of FFT library
-
-FFT_INC  = -I/soft/libraries/alcf/current/xl/FFTW2/include -DFFT_FFTW2 -DFFTW_SIZE
-FFT_PATH = #/soft/libraries/alcf/current/xl/FFTW2
-FFT_LIB  = -L/soft/libraries/alcf/current/xl/FFTW2/lib -ldfftw
-
-# JPEG and/or PNG library
-# see discussion in Section 3.5.4 of manual
-# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
-# INC = path(s) for jpeglib.h and/or png.h
-# PATH = path(s) for JPEG library and/or PNG library
-# LIB = name(s) of JPEG library and/or PNG library
-
-JPG_INC =
-JPG_PATH =
-JPG_LIB =
-
-depend : fastdep.exe $(SRC)
-	@./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1
-
-fastdep.exe: ../DEPEND/fastdep.c
-	cc -O -o $@ $<
-
-sinclude .depend
diff --git a/src/MANIFOLD/fix_nve_manifold_rattle.cpp b/src/MANIFOLD/fix_nve_manifold_rattle.cpp
index b1efea951f..dc0492dbe9 100644
--- a/src/MANIFOLD/fix_nve_manifold_rattle.cpp
+++ b/src/MANIFOLD/fix_nve_manifold_rattle.cpp
@@ -287,21 +287,21 @@ void FixNVEManifoldRattle::update_var_params()
 
 /* -----------------------------------------------------------------------------
    ---------------------------------------------------------------------------*/
-int FixNVEManifoldRattle::dof(int /*igroup*/)
+bigint FixNVEManifoldRattle::dof(int /*igroup*/)
 {
   int *mask = atom->mask;
   int nlocal = atom->nlocal;
-  int natoms = 0;
+  bigint natoms = 0;
   for (int i = 0; i < nlocal; ++i) {
     if (mask[i] & groupbit) ++natoms;
   }
 
-  int dofs;
-  MPI_Allreduce( &natoms, &dofs, 1, MPI_INT, MPI_SUM, world );
+  bigint dofs;
+  MPI_Allreduce( &natoms, &dofs, 1, MPI_LMP_BIGINT, MPI_SUM, world );
 
   // Make sure that, if there is just no or one atom, no dofs are subtracted,
   // since for the first atom already 3 dofs are subtracted because of the
-  // centre of mass corrections:
+  // center of mass corrections:
   if (dofs <= 1) dofs = 0;
   stats.dofs_removed = dofs;
 
diff --git a/src/MANIFOLD/fix_nve_manifold_rattle.h b/src/MANIFOLD/fix_nve_manifold_rattle.h
index 3eae9c4bc3..7c9e302094 100644
--- a/src/MANIFOLD/fix_nve_manifold_rattle.h
+++ b/src/MANIFOLD/fix_nve_manifold_rattle.h
@@ -75,7 +75,7 @@ class FixNVEManifoldRattle : public Fix {
   void init() override;
   void reset_dt() override;
   void end_of_step() override;
-  int dof(int) override;
+  bigint dof(int) override;
   void setup(int) override {}    // Not needed for fixNVE but is for fixNVT
   double memory_usage() override;
 
diff --git a/src/MANYBODY/pair_airebo.cpp b/src/MANYBODY/pair_airebo.cpp
index 129b9d2218..13d2b97433 100644
--- a/src/MANYBODY/pair_airebo.cpp
+++ b/src/MANYBODY/pair_airebo.cpp
@@ -41,8 +41,8 @@
 using namespace LAMMPS_NS;
 using namespace MathSpecial;
 
-#define TOL 1.0e-9
-#define PGDELTA 1
+static constexpr double TOL = 1.0e-9;
+static constexpr int PGDELTA = 1;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/MANYBODY/pair_comb.cpp b/src/MANYBODY/pair_comb.cpp
index 126544ddc5..37e3bb404f 100644
--- a/src/MANYBODY/pair_comb.cpp
+++ b/src/MANYBODY/pair_comb.cpp
@@ -43,9 +43,9 @@ using namespace MathConst;
 using namespace MathExtra;
 using namespace MathSpecial;
 
-#define DELTA 4
-#define PGDELTA 1
-#define MAXNEIGH 24
+static constexpr int DELTA = 4;
+static constexpr int PGDELTA = 1;
+static constexpr int MAXNEIGH = 24;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/MANYBODY/pair_comb3.cpp b/src/MANYBODY/pair_comb3.cpp
index a6a6ed37fd..5ae599ea31 100644
--- a/src/MANYBODY/pair_comb3.cpp
+++ b/src/MANYBODY/pair_comb3.cpp
@@ -43,9 +43,9 @@ using namespace MathConst;
 using namespace MathExtra;
 using namespace MathSpecial;
 
-#define DELTA 4
-#define PGDELTA 1
-#define MAXNEIGH 24
+static constexpr int DELTA = 4;
+static constexpr int PGDELTA = 1;
+static constexpr int MAXNEIGH = 24;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/MANYBODY/pair_eam.cpp b/src/MANYBODY/pair_eam.cpp
index a3d4257cc2..a2fff8fb55 100644
--- a/src/MANYBODY/pair_eam.cpp
+++ b/src/MANYBODY/pair_eam.cpp
@@ -33,7 +33,7 @@
 
 using namespace LAMMPS_NS;
 
-#define MAXLINE 1024
+static constexpr int MAXLINE = 1024;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/MANYBODY/pair_eam_cd.cpp b/src/MANYBODY/pair_eam_cd.cpp
index 1746435fad..798292eb1d 100644
--- a/src/MANYBODY/pair_eam_cd.cpp
+++ b/src/MANYBODY/pair_eam_cd.cpp
@@ -32,7 +32,7 @@
 
 using namespace LAMMPS_NS;
 
-#define MAXLINE 1024        // This sets the maximum line length in EAM input files.
+static constexpr int MAXLINE = 1024;        // This sets the maximum line length in EAM input files.
 
 PairEAMCD::PairEAMCD(LAMMPS *lmp, int _cdeamVersion)
   : PairEAM(lmp), PairEAMAlloy(lmp), cdeamVersion(_cdeamVersion)
diff --git a/src/MANYBODY/pair_edip.cpp b/src/MANYBODY/pair_edip.cpp
index 974dc9ab84..0098bb32e5 100644
--- a/src/MANYBODY/pair_edip.cpp
+++ b/src/MANYBODY/pair_edip.cpp
@@ -39,11 +39,11 @@
 
 using namespace LAMMPS_NS;
 
-#define MAXLINE 1024
-#define DELTA 4
+static constexpr int MAXLINE = 1024;
+static constexpr int DELTA = 4;
 
-#define GRIDDENSITY 8000
-#define GRIDSTART 0.1
+static constexpr int GRIDDENSITY = 8000;
+static constexpr double GRIDSTART = 0.1;
 
 // max number of interaction per atom for f(Z) environment potential
 
diff --git a/src/MANYBODY/pair_edip_multi.cpp b/src/MANYBODY/pair_edip_multi.cpp
index 00be0ad829..2f8e9ca9c8 100644
--- a/src/MANYBODY/pair_edip_multi.cpp
+++ b/src/MANYBODY/pair_edip_multi.cpp
@@ -38,8 +38,8 @@
 using namespace LAMMPS_NS;
 using namespace MathExtra;
 
-#define MAXLINE 1024
-#define DELTA 4
+static constexpr int MAXLINE = 1024;
+static constexpr int DELTA = 4;
 
 static const char cite_pair_edip[] =
   "pair edip/multi: doi:10.1103/PhysRevB.86.144118, doi:10.1088/0953-8984/22/3/035802\n\n"
diff --git a/src/MANYBODY/pair_extep.cpp b/src/MANYBODY/pair_extep.cpp
index 755b4d0132..bcebbd3e72 100644
--- a/src/MANYBODY/pair_extep.cpp
+++ b/src/MANYBODY/pair_extep.cpp
@@ -37,9 +37,9 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathExtra;
 
-#define MAXLINE 1024
-#define DELTA 4
-#define PGDELTA 1
+static constexpr int MAXLINE = 1024;
+static constexpr int DELTA = 4;
+static constexpr int PGDELTA = 1;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/MANYBODY/pair_gw.cpp b/src/MANYBODY/pair_gw.cpp
index 1fd98b80f0..471896851a 100644
--- a/src/MANYBODY/pair_gw.cpp
+++ b/src/MANYBODY/pair_gw.cpp
@@ -37,7 +37,7 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathExtra;
 
-#define DELTA 4
+static constexpr int DELTA = 4;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/MANYBODY/pair_gw_zbl.cpp b/src/MANYBODY/pair_gw_zbl.cpp
index 5e61778c68..da1ff557ad 100644
--- a/src/MANYBODY/pair_gw_zbl.cpp
+++ b/src/MANYBODY/pair_gw_zbl.cpp
@@ -32,8 +32,8 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define MAXLINE 1024
-#define DELTA 4
+static constexpr int MAXLINE = 1024;
+static constexpr int DELTA = 4;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/MANYBODY/pair_lcbop.cpp b/src/MANYBODY/pair_lcbop.cpp
index ed085c4b98..1e19363212 100644
--- a/src/MANYBODY/pair_lcbop.cpp
+++ b/src/MANYBODY/pair_lcbop.cpp
@@ -33,9 +33,9 @@
 
 using namespace LAMMPS_NS;
 
-#define MAXLINE 1024
-#define TOL 1.0e-9
-#define PGDELTA 1
+static constexpr int MAXLINE = 1024;
+static constexpr double TOL = 1.0e-9;
+static constexpr int PGDELTA = 1;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/MANYBODY/pair_local_density.cpp b/src/MANYBODY/pair_local_density.cpp
index 71a7e658d2..444f56f291 100644
--- a/src/MANYBODY/pair_local_density.cpp
+++ b/src/MANYBODY/pair_local_density.cpp
@@ -34,7 +34,7 @@
 
 using namespace LAMMPS_NS;
 
-#define MAXLINE 1024
+static constexpr int MAXLINE = 1024;
 
 static const char cite_pair_local_density[] =
   "pair_style local/density command: doi:10.1063/1.4958629, doi:10.1021/acs.jpcb.7b12446\n\n"
diff --git a/src/MANYBODY/pair_meam_spline.cpp b/src/MANYBODY/pair_meam_spline.cpp
index 1c17f434f2..b952330c04 100644
--- a/src/MANYBODY/pair_meam_spline.cpp
+++ b/src/MANYBODY/pair_meam_spline.cpp
@@ -440,7 +440,7 @@ void PairMEAMSpline::coeff(int narg, char **arg)
   }
 }
 
-#define MAXLINE 1024
+static constexpr int MAXLINE = 1024;
 
 void PairMEAMSpline::read_file(const char* filename)
 {
diff --git a/src/MANYBODY/pair_meam_sw_spline.cpp b/src/MANYBODY/pair_meam_sw_spline.cpp
index 3e3e813c5b..e02625c598 100644
--- a/src/MANYBODY/pair_meam_sw_spline.cpp
+++ b/src/MANYBODY/pair_meam_sw_spline.cpp
@@ -384,7 +384,7 @@ void PairMEAMSWSpline::coeff(int narg, char **arg)
    set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 
-#define MAXLINE 1024
+static constexpr int MAXLINE = 1024;
 
 void PairMEAMSWSpline::read_file(const char* filename)
 {
diff --git a/src/MANYBODY/pair_nb3b_harmonic.cpp b/src/MANYBODY/pair_nb3b_harmonic.cpp
index 5bc930c186..51e554694c 100644
--- a/src/MANYBODY/pair_nb3b_harmonic.cpp
+++ b/src/MANYBODY/pair_nb3b_harmonic.cpp
@@ -34,8 +34,8 @@
 using namespace LAMMPS_NS;
 using MathConst::MY_PI;
 
-#define DELTA 4
-#define SMALL 0.001
+static constexpr int DELTA = 4;
+static constexpr double SMALL = 0.001;
 
 static const char *substyle[] = {"nb3n/harmonic", "nb3b/screened"};
 
diff --git a/src/MANYBODY/pair_nb3b_screened.cpp b/src/MANYBODY/pair_nb3b_screened.cpp
index d66945d563..9480ae9f15 100644
--- a/src/MANYBODY/pair_nb3b_screened.cpp
+++ b/src/MANYBODY/pair_nb3b_screened.cpp
@@ -20,7 +20,7 @@
 
 #include <cmath>
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 using namespace LAMMPS_NS;
 
diff --git a/src/MANYBODY/pair_polymorphic.cpp b/src/MANYBODY/pair_polymorphic.cpp
index 03ef6cb49f..cc0b225a78 100644
--- a/src/MANYBODY/pair_polymorphic.cpp
+++ b/src/MANYBODY/pair_polymorphic.cpp
@@ -38,8 +38,8 @@
 using namespace LAMMPS_NS;
 using namespace MathExtra;
 
-#define MAXLINE 1024
-#define DELTA 4
+static constexpr int MAXLINE = 1024;
+static constexpr int DELTA = 4;
 
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/MANYBODY/pair_sw.cpp b/src/MANYBODY/pair_sw.cpp
index 540fd8772c..18b642967c 100644
--- a/src/MANYBODY/pair_sw.cpp
+++ b/src/MANYBODY/pair_sw.cpp
@@ -33,7 +33,7 @@
 
 using namespace LAMMPS_NS;
 
-#define DELTA 4
+static constexpr int DELTA = 4;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/MANYBODY/pair_sw_angle_table.cpp b/src/MANYBODY/pair_sw_angle_table.cpp
index 21f1967c0d..8e605caebd 100644
--- a/src/MANYBODY/pair_sw_angle_table.cpp
+++ b/src/MANYBODY/pair_sw_angle_table.cpp
@@ -39,7 +39,7 @@ using MathConst::DEG2RAD;
 using MathConst::MY_PI;
 using MathConst::RAD2DEG;
 
-#define DELTA 4
+static constexpr int DELTA = 4;
 
 enum { LINEAR, SPLINE };
 
diff --git a/src/MANYBODY/pair_tersoff.cpp b/src/MANYBODY/pair_tersoff.cpp
index c7c45bc865..e3c8c83416 100644
--- a/src/MANYBODY/pair_tersoff.cpp
+++ b/src/MANYBODY/pair_tersoff.cpp
@@ -40,7 +40,7 @@ using namespace MathConst;
 using namespace MathSpecial;
 using namespace MathExtra;
 
-#define DELTA 4
+static constexpr int DELTA = 4;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/MANYBODY/pair_tersoff_mod.cpp b/src/MANYBODY/pair_tersoff_mod.cpp
index 010ff8df87..f4ae623bae 100644
--- a/src/MANYBODY/pair_tersoff_mod.cpp
+++ b/src/MANYBODY/pair_tersoff_mod.cpp
@@ -35,7 +35,7 @@ using namespace MathConst;
 using namespace MathExtra;
 using namespace MathSpecial;
 
-#define DELTA 4
+static constexpr int DELTA = 4;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/MANYBODY/pair_tersoff_mod_c.cpp b/src/MANYBODY/pair_tersoff_mod_c.cpp
index bdec854c41..e7fef3ccfa 100644
--- a/src/MANYBODY/pair_tersoff_mod_c.cpp
+++ b/src/MANYBODY/pair_tersoff_mod_c.cpp
@@ -28,7 +28,7 @@
 
 using namespace LAMMPS_NS;
 
-#define DELTA 4
+static constexpr int DELTA = 4;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/MANYBODY/pair_tersoff_table.cpp b/src/MANYBODY/pair_tersoff_table.cpp
index 325542f97e..34c3573d0c 100644
--- a/src/MANYBODY/pair_tersoff_table.cpp
+++ b/src/MANYBODY/pair_tersoff_table.cpp
@@ -39,10 +39,10 @@
 using namespace LAMMPS_NS;
 using MathConst::MY_PI;
 
-#define MAXLINE 1024
-#define DELTA 4
+static constexpr int MAXLINE = 1024;
+static constexpr int DELTA = 4;
 
-#define GRIDSTART 0.1
+static constexpr double GRIDSTART = 0.1;
 #define GRIDDENSITY_FCUTOFF 5000
 #define GRIDDENSITY_EXP 12000
 #define GRIDDENSITY_GTETA 12000
diff --git a/src/MANYBODY/pair_tersoff_zbl.cpp b/src/MANYBODY/pair_tersoff_zbl.cpp
index daf2718cb1..9c4dabc219 100644
--- a/src/MANYBODY/pair_tersoff_zbl.cpp
+++ b/src/MANYBODY/pair_tersoff_zbl.cpp
@@ -34,7 +34,7 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
-#define DELTA 4
+static constexpr int DELTA = 4;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/MANYBODY/pair_threebody_table.cpp b/src/MANYBODY/pair_threebody_table.cpp
index 57f7021816..20b26edbfa 100644
--- a/src/MANYBODY/pair_threebody_table.cpp
+++ b/src/MANYBODY/pair_threebody_table.cpp
@@ -35,7 +35,7 @@
 using namespace LAMMPS_NS;
 using MathConst::MY_PI;
 
-#define DELTA 4
+static constexpr int DELTA = 4;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/MANYBODY/pair_vashishta.cpp b/src/MANYBODY/pair_vashishta.cpp
index 531f15d263..79df1f36d7 100644
--- a/src/MANYBODY/pair_vashishta.cpp
+++ b/src/MANYBODY/pair_vashishta.cpp
@@ -33,7 +33,7 @@
 
 using namespace LAMMPS_NS;
 
-#define DELTA 4
+static constexpr int DELTA = 4;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/MC/fix_bond_break.cpp b/src/MC/fix_bond_break.cpp
index 6589f93e23..bb3e725c96 100644
--- a/src/MC/fix_bond_break.cpp
+++ b/src/MC/fix_bond_break.cpp
@@ -32,7 +32,7 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-#define DELTA 16
+static constexpr int DELTA = 16;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/MC/fix_bond_create.cpp b/src/MC/fix_bond_create.cpp
index adecc9f252..8922bde55f 100644
--- a/src/MC/fix_bond_create.cpp
+++ b/src/MC/fix_bond_create.cpp
@@ -34,8 +34,8 @@ using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathConst;
 
-#define BIG 1.0e20
-#define DELTA 16
+static constexpr double BIG = 1.0e20;
+static constexpr int DELTA = 16;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/MC/fix_charge_regulation.cpp b/src/MC/fix_charge_regulation.cpp
index a828f276ea..2fade2be79 100644
--- a/src/MC/fix_charge_regulation.cpp
+++ b/src/MC/fix_charge_regulation.cpp
@@ -66,9 +66,9 @@ static const char cite_fix_charge_regulation[] =
 enum{CONSTANT,EQUAL}; // parsing input variables
 
 // large energy value used to signal overlap
-#define MAXENERGYSIGNAL 1.0e100
-#define MAXENERGYTEST 1.0e50
-#define SMALL 0.0000001
+static constexpr double MAXENERGYSIGNAL = 1.0e100;
+static constexpr double MAXENERGYTEST = 1.0e50;
+static constexpr double SMALL = 0.0000001;
 #define NA_RHO0 0.602214 // Avogadro's constant times reference concentration  (N_A * mol / liter)  [nm^-3]
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/MC/fix_gcmc.cpp b/src/MC/fix_gcmc.cpp
index ff8c5eb9c9..b3d000e8b1 100644
--- a/src/MC/fix_gcmc.cpp
+++ b/src/MC/fix_gcmc.cpp
@@ -52,14 +52,14 @@ using namespace MathConst;
 
 // large energy value used to signal overlap
 
-#define MAXENERGYSIGNAL 1.0e100
+static constexpr double MAXENERGYSIGNAL = 1.0e100;
 
 // this must be lower than MAXENERGYSIGNAL
 // by a large amount, so that it is still
 // less than total energy when negative
 // energy contributions are added to MAXENERGYSIGNAL
 
-#define MAXENERGYTEST 1.0e50
+static constexpr double MAXENERGYTEST = 1.0e50;
 
 enum { EXCHATOM, EXCHMOL };          // exchmode
 enum { NONE, MOVEATOM, MOVEMOL };    // movemode
@@ -89,6 +89,7 @@ FixGCMC::FixGCMC(LAMMPS *lmp, int narg, char **arg) :
 
   ngroups = 0;
   ngrouptypes = 0;
+  triclinic = domain->triclinic;
 
   // required args
 
@@ -123,8 +124,7 @@ FixGCMC::FixGCMC(LAMMPS *lmp, int narg, char **arg) :
 
   // error checks on region and its extent being inside simulation box
 
-  region_xlo = region_xhi = region_ylo = region_yhi =
-    region_zlo = region_zhi = 0.0;
+  region_xlo = region_xhi = region_ylo = region_yhi = region_zlo = region_zhi = 0.0;
   if (region) {
     if (region->bboxflag == 0)
       error->all(FLERR,"Fix gcmc region does not support a bounding box");
@@ -298,8 +298,7 @@ void FixGCMC::options(int narg, char **arg)
     } else if (strcmp(arg[iarg],"region") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix gcmc command");
       region = domain->get_region_by_id(arg[iarg+1]);
-      if (!region)
-        error->all(FLERR,"Region {} for fix gcmc does not exist",arg[iarg+1]);
+      if (!region) error->all(FLERR,"Region {} for fix gcmc does not exist",arg[iarg+1]);
       idregion = utils::strdup(arg[iarg+1]);
       iarg += 2;
     } else if (strcmp(arg[iarg],"maxangle") == 0) {
@@ -464,6 +463,8 @@ int FixGCMC::setmask()
 
 void FixGCMC::init()
 {
+  triclinic = domain->triclinic;
+
   // set index and check validity of region
 
   if (idregion) {
@@ -471,19 +472,31 @@ void FixGCMC::init()
     if (!region) error->all(FLERR, "Region {} for fix gcmc does not exist", idregion);
   }
 
-  triclinic = domain->triclinic;
+  if (region) {
+    if (region->bboxflag == 0)
+      error->all(FLERR,"Fix gcmc region does not support a bounding box");
+    if (region->dynamic_check())
+      error->all(FLERR,"Fix gcmc region cannot be dynamic");
 
-  if (triclinic) {
-    if ((region_xlo < domain->boxlo_bound[0]) || (region_xhi > domain->boxhi_bound[0]) ||
-        (region_ylo < domain->boxlo_bound[1]) || (region_yhi > domain->boxhi_bound[1]) ||
-        (region_zlo < domain->boxlo_bound[2]) || (region_zhi > domain->boxhi_bound[2])) {
-      error->all(FLERR,"Fix gcmc region extends outside simulation box");
+    region_xlo = region->extent_xlo;
+    region_xhi = region->extent_xhi;
+    region_ylo = region->extent_ylo;
+    region_yhi = region->extent_yhi;
+    region_zlo = region->extent_zlo;
+    region_zhi = region->extent_zhi;
+
+    if (triclinic) {
+      if ((region_xlo < domain->boxlo_bound[0]) || (region_xhi > domain->boxhi_bound[0]) ||
+          (region_ylo < domain->boxlo_bound[1]) || (region_yhi > domain->boxhi_bound[1]) ||
+          (region_zlo < domain->boxlo_bound[2]) || (region_zhi > domain->boxhi_bound[2])) {
+        error->all(FLERR,"Fix gcmc region extends outside simulation box");
+      }
+    } else {
+      if ((region_xlo < domain->boxlo[0]) || (region_xhi > domain->boxhi[0]) ||
+          (region_ylo < domain->boxlo[1]) || (region_yhi > domain->boxhi[1]) ||
+          (region_zlo < domain->boxlo[2]) || (region_zhi > domain->boxhi[2]))
+        error->all(FLERR,"Fix gcmc region extends outside simulation box");
     }
-  } else {
-    if ((region_xlo < domain->boxlo[0]) || (region_xhi > domain->boxhi[0]) ||
-        (region_ylo < domain->boxlo[1]) || (region_yhi > domain->boxhi[1]) ||
-        (region_zlo < domain->boxlo[2]) || (region_zhi > domain->boxhi[2]))
-      error->all(FLERR,"Fix gcmc region extends outside simulation box");
   }
 
   // set probabilities for MC moves
diff --git a/src/MC/fix_mol_swap.cpp b/src/MC/fix_mol_swap.cpp
index a47cf039d1..f496d4a9c5 100644
--- a/src/MC/fix_mol_swap.cpp
+++ b/src/MC/fix_mol_swap.cpp
@@ -38,7 +38,7 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-#define BIG 1.0e20
+static constexpr double BIG = 1.0e20;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/MC/fix_widom.cpp b/src/MC/fix_widom.cpp
index cc2f1bc94d..c0a5501a22 100644
--- a/src/MC/fix_widom.cpp
+++ b/src/MC/fix_widom.cpp
@@ -50,7 +50,7 @@ using namespace LAMMPS_NS;
 using namespace FixConst;
 using MathConst::MY_2PI;
 
-#define MAXENERGYTEST 1.0e50
+static constexpr double MAXENERGYTEST = 1.0e50;
 enum { EXCHATOM, EXCHMOL };    // exchmode
 
 /* ---------------------------------------------------------------------- */
@@ -73,6 +73,8 @@ FixWidom::FixWidom(LAMMPS *lmp, int narg, char **arg) :
   restart_global = 1;
   time_depend = 1;
 
+  triclinic = domain->triclinic;
+
   // required args
 
   nevery = utils::inumeric(FLERR,arg[3],false,lmp);
@@ -111,18 +113,6 @@ FixWidom::FixWidom(LAMMPS *lmp, int narg, char **arg) :
     region_zlo = region->extent_zlo;
     region_zhi = region->extent_zhi;
 
-    if (triclinic) {
-      if ((region_xlo < domain->boxlo_bound[0]) || (region_xhi > domain->boxhi_bound[0]) ||
-          (region_ylo < domain->boxlo_bound[1]) || (region_yhi > domain->boxhi_bound[1]) ||
-          (region_zlo < domain->boxlo_bound[2]) || (region_zhi > domain->boxhi_bound[2]))
-        error->all(FLERR,"Fix widom region {} extends outside simulation box", region->id);
-    } else {
-      if ((region_xlo < domain->boxlo[0]) || (region_xhi > domain->boxhi[0]) ||
-          (region_ylo < domain->boxlo[1]) || (region_yhi > domain->boxhi[1]) ||
-          (region_zlo < domain->boxlo[2]) || (region_zhi > domain->boxhi[2]))
-        error->all(FLERR,"Fix widom region {} extends outside simulation box", region->id);
-    }
-
     // estimate region volume using MC trials
 
     double coord[3];
@@ -216,8 +206,7 @@ void FixWidom::options(int narg, char **arg)
     } else if (strcmp(arg[iarg],"region") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix widom command");
       region = domain->get_region_by_id(arg[iarg+1]);
-      if (!region)
-        error->all(FLERR,"Region {} for fix widom does not exist",arg[iarg+1]);
+      if (!region) error->all(FLERR,"Region {} for fix widom does not exist",arg[iarg+1]);
       idregion = utils::strdup(arg[iarg+1]);
       iarg += 2;
     } else if (strcmp(arg[iarg],"charge") == 0) {
@@ -292,6 +281,7 @@ int FixWidom::setmask()
 
 void FixWidom::init()
 {
+  triclinic = domain->triclinic;
 
   // set index and check validity of region
 
@@ -300,7 +290,31 @@ void FixWidom::init()
     if (!region) error->all(FLERR, "Region {} for fix widom does not exist", idregion);
   }
 
-  triclinic = domain->triclinic;
+  if (region) {
+    if (region->bboxflag == 0)
+      error->all(FLERR,"Fix gcmc region does not support a bounding box");
+    if (region->dynamic_check())
+      error->all(FLERR,"Fix gcmc region cannot be dynamic");
+
+    region_xlo = region->extent_xlo;
+    region_xhi = region->extent_xhi;
+    region_ylo = region->extent_ylo;
+    region_yhi = region->extent_yhi;
+    region_zlo = region->extent_zlo;
+    region_zhi = region->extent_zhi;
+
+    if (triclinic) {
+      if ((region_xlo < domain->boxlo_bound[0]) || (region_xhi > domain->boxhi_bound[0]) ||
+          (region_ylo < domain->boxlo_bound[1]) || (region_yhi > domain->boxhi_bound[1]) ||
+          (region_zlo < domain->boxlo_bound[2]) || (region_zhi > domain->boxhi_bound[2]))
+        error->all(FLERR,"Fix widom region {} extends outside simulation box", region->id);
+    } else {
+      if ((region_xlo < domain->boxlo[0]) || (region_xhi > domain->boxhi[0]) ||
+          (region_ylo < domain->boxlo[1]) || (region_yhi > domain->boxhi[1]) ||
+          (region_zlo < domain->boxlo[2]) || (region_zhi > domain->boxhi[2]))
+        error->all(FLERR,"Fix widom region {} extends outside simulation box", region->id);
+    }
+  }
 
   ave_widom_chemical_potential = 0.0;
 
diff --git a/src/MDI/fix_mdi_qm.cpp b/src/MDI/fix_mdi_qm.cpp
index 3ff6b22764..86dc87fd35 100644
--- a/src/MDI/fix_mdi_qm.cpp
+++ b/src/MDI/fix_mdi_qm.cpp
@@ -27,7 +27,7 @@ using namespace FixConst;
 
 enum { NATIVE, REAL, METAL };    // LAMMPS units which MDI supports
 
-#define MAXELEMENT 118
+static constexpr int MAXELEMENT = 118;
 
 // prototype for non-class compare function for sorting QM IDs
 
@@ -48,6 +48,17 @@ FixMDIQM::FixMDIQM(LAMMPS *lmp, int narg, char **arg) :
   if (atom->map_style == Atom::MAP_NONE)
     error->all(FLERR, "Fix mdi/qm requires an atom map be defined");
 
+  // initialize class members
+
+  plugin = 0;
+  natoms_exists = 0;
+  celldispl_exists = 0;
+  elements_exists = 0;
+  types_exists = 0;
+  stress_exists = 0;
+  pe_exists = 0;
+  keelec_exists = 0;
+
   // confirm LAMMPS is being run as a driver
 
   int role;
diff --git a/src/MDI/fix_mdi_qmmm.cpp b/src/MDI/fix_mdi_qmmm.cpp
index 8e63a34472..c6df52333d 100644
--- a/src/MDI/fix_mdi_qmmm.cpp
+++ b/src/MDI/fix_mdi_qmmm.cpp
@@ -31,7 +31,7 @@ using namespace FixConst;
 enum { NATIVE, REAL, METAL };    // LAMMPS units which MDI supports
 enum { DIRECT, POTENTIAL };      // mode of QMMM coupling
 
-#define MAXELEMENT 118
+static constexpr int MAXELEMENT = 118;
 
 // prototype for non-class compare function for sorting QM IDs
 
@@ -53,6 +53,18 @@ FixMDIQMMM::FixMDIQMMM(LAMMPS *lmp, int narg, char **arg) :
   if (atom->map_style == Atom::MAP_NONE)
     error->all(FLERR, "Fix mdi/qmmm requires an atom map be defined");
 
+  // initialize class members
+
+  plugin = 0;
+  maxlocal = 0;
+  natoms_exists = 0;
+  celldispl_exists = 0;
+  elements_exists = 0;
+  types_exists = 0;
+  stress_exists = 0;
+  pe_exists = 0;
+  keelec_exists = 0;
+
   // confirm LAMMPS is being run as a driver
 
   int role;
diff --git a/src/MDI/mdi_engine.cpp b/src/MDI/mdi_engine.cpp
index f7025549cd..9c58de7560 100644
--- a/src/MDI/mdi_engine.cpp
+++ b/src/MDI/mdi_engine.cpp
@@ -54,7 +54,7 @@ enum { DEFAULT, MD, OPT };       // top-level MDI engine modes
 
 enum { TYPE, CHARGE, MASS, COORD, VELOCITY, FORCE, ADDFORCE };
 
-#define MAXELEMENT 118
+static constexpr int MAXELEMENT = 118;
 
 /* ----------------------------------------------------------------------
    trigger LAMMPS to start acting as an MDI engine
diff --git a/src/MDI/mdi_plugin.cpp b/src/MDI/mdi_plugin.cpp
index d7805d0cba..92b78b6afb 100644
--- a/src/MDI/mdi_plugin.cpp
+++ b/src/MDI/mdi_plugin.cpp
@@ -20,6 +20,7 @@
 
 #include "error.h"
 #include "input.h"
+#include "memory.h"
 #include "modify.h"
 
 #include <cstdlib>
@@ -67,12 +68,12 @@ MDIPlugin::MDIPlugin(LAMMPS *_lmp, int narg, char **arg) : Pointers(_lmp)
       // do variable substitution in multiple word extra_arg
 
       int ncopy = strlen(extra_arg) + 1;
-      char *copy = (char *) malloc(ncopy);
+      char *copy = (char *) memory->smalloc(ncopy,"mdi_plugin:copy");
       strncpy(copy, extra_arg, ncopy);
-      char *work = (char *) malloc(ncopy);
+      char *work = (char *) memory->smalloc(ncopy,"mdi_plugin:work");
       int nwork = ncopy;
       input->substitute(copy, work, ncopy, nwork, 0);
-      free(work);
+      memory->sfree(work);
       extra_arg = copy;
 
       iarg += 2;
@@ -83,12 +84,12 @@ MDIPlugin::MDIPlugin(LAMMPS *_lmp, int narg, char **arg) : Pointers(_lmp)
       // do variable substitution in multiple word lammps_command
 
       int ncopy = strlen(lammps_command) + 1;
-      char *copy = (char *) malloc(ncopy);
-      strncpy(copy, extra_arg, ncopy);
-      char *work = (char *) malloc(ncopy);
+      char *copy = (char *) memory->smalloc(ncopy,"mdi_plugin:work");
+      strncpy(copy, lammps_command, ncopy);
+      char *work = (char *) memory->smalloc(ncopy,"mdi_plugin:work");
       int nwork = ncopy;
       input->substitute(copy, work, ncopy, nwork, 0);
-      free(work);
+      memory->sfree(work);
       lammps_command = copy;
 
       iarg += 2;
@@ -128,8 +129,8 @@ MDIPlugin::MDIPlugin(LAMMPS *_lmp, int narg, char **arg) : Pointers(_lmp)
   MDI_Launch_plugin(plugin_name, plugin_args, &world, plugin_wrapper, (void *) this);
 
   delete[] plugin_args;
-  delete[] extra_arg;
-  delete[] lammps_command;
+  memory->sfree(extra_arg);
+  memory->sfree(lammps_command);
 }
 
 /* ----------------------------------------------------------------------
diff --git a/src/MEAM/pair_meam.cpp b/src/MEAM/pair_meam.cpp
index 2f095754af..c8932d9a31 100644
--- a/src/MEAM/pair_meam.cpp
+++ b/src/MEAM/pair_meam.cpp
@@ -34,7 +34,7 @@
 
 using namespace LAMMPS_NS;
 
-#define MAXLINE 1024
+static constexpr int MAXLINE = 1024;
 
 static const int nkeywords = 22;
 static const char *keywords[] = {
diff --git a/src/MESONT/pair_mesocnt.cpp b/src/MESONT/pair_mesocnt.cpp
index 521c8c5f60..eae638bde8 100644
--- a/src/MESONT/pair_mesocnt.cpp
+++ b/src/MESONT/pair_mesocnt.cpp
@@ -43,11 +43,11 @@ using namespace MathExtra;
 using MathConst::MY_2PI;
 using MathConst::MY_PI;
 
-#define MAXLINE 1024
+static constexpr int MAXLINE = 1024;
 #define SELF_CUTOFF 3
-#define SMALL 1.0e-6
-#define SWITCH 1.0e-4
-#define RHOMIN 10.0
+static constexpr double SMALL = 1.0e-6;
+static constexpr double SWITCH = 1.0e-4;
+static constexpr double RHOMIN = 10.0;
 
 #define QUAD_FINF 129
 #define QUAD_FSEMI 10
diff --git a/src/MESONT/pair_mesocnt_viscous.cpp b/src/MESONT/pair_mesocnt_viscous.cpp
index be3715be25..df159db92a 100644
--- a/src/MESONT/pair_mesocnt_viscous.cpp
+++ b/src/MESONT/pair_mesocnt_viscous.cpp
@@ -36,7 +36,7 @@ using namespace MathExtra;
 using MathConst::MY_PI;
 
 #define SELF_CUTOFF 3
-#define RHOMIN 10.0
+static constexpr double RHOMIN = 10.0;
 
 #define QUAD_FINF 129
 #define QUAD_FSEMI 10
diff --git a/src/MISC/pair_agni.cpp b/src/MISC/pair_agni.cpp
index 8597e5db93..92b358316f 100644
--- a/src/MISC/pair_agni.cpp
+++ b/src/MISC/pair_agni.cpp
@@ -47,8 +47,8 @@ static const char cite_pair_agni[] =
   " year      = {2019},\n"
   "}\n\n";
 
-#define MAXLINE 10240
-#define MAXWORD 40
+static constexpr int MAXLINE = 10240;
+static constexpr int MAXWORD = 40;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/MISC/pair_srp.cpp b/src/MISC/pair_srp.cpp
index ddcfb92e9c..31f5b85760 100644
--- a/src/MISC/pair_srp.cpp
+++ b/src/MISC/pair_srp.cpp
@@ -47,8 +47,8 @@ Please contact Timothy Sirk for questions (tim.sirk@us.army.mil).
 
 using namespace LAMMPS_NS;
 
-#define SMALL 1.0e-10
-#define BIG 1e10
+static constexpr double SMALL = 1.0e-10;
+static constexpr double BIG = 1e10;
 #define ONETWOBIT 0x40000000
 
 static const char cite_srp[] =
diff --git a/src/ML-IAP/mliap_descriptor_snap.cpp b/src/ML-IAP/mliap_descriptor_snap.cpp
index cec03fca76..892dc8a004 100644
--- a/src/ML-IAP/mliap_descriptor_snap.cpp
+++ b/src/ML-IAP/mliap_descriptor_snap.cpp
@@ -31,8 +31,8 @@
 
 using namespace LAMMPS_NS;
 
-#define MAXLINE 1024
-#define MAXWORD 3
+static constexpr int MAXLINE = 1024;
+static constexpr int MAXWORD = 3;
 
 /* ---------------------------------------------------------------------- */
 
@@ -380,7 +380,8 @@ void MLIAPDescriptorSNAP::read_paramfile(char *paramfilename)
                  utils::getsyserror());
   }
 
-  char line[MAXLINE], *ptr;
+  char line[MAXLINE] = {'\0'};
+  char *ptr;
   int eof = 0;
   int n;
 
diff --git a/src/ML-IAP/mliap_descriptor_so3.cpp b/src/ML-IAP/mliap_descriptor_so3.cpp
index 4f976f06be..2308961913 100644
--- a/src/ML-IAP/mliap_descriptor_so3.cpp
+++ b/src/ML-IAP/mliap_descriptor_so3.cpp
@@ -30,8 +30,8 @@
 
 using namespace LAMMPS_NS;
 
-#define MAXLINE 1024
-#define MAXWORD 3
+static constexpr int MAXLINE = 1024;
+static constexpr int MAXWORD = 3;
 
 /* ---------------------------------------------------------------------- */
 
@@ -90,7 +90,8 @@ void MLIAPDescriptorSO3::read_paramfile(char *paramfilename)
                  utils::getsyserror());
   }
 
-  char line[MAXLINE], *ptr;
+  char line[MAXLINE] = {'\0'};
+  char *ptr;
   int eof = 0;
   int n, nwords;
 
diff --git a/src/ML-IAP/mliap_model.cpp b/src/ML-IAP/mliap_model.cpp
index a93090d364..c6df037c04 100644
--- a/src/ML-IAP/mliap_model.cpp
+++ b/src/ML-IAP/mliap_model.cpp
@@ -26,8 +26,8 @@
 
 using namespace LAMMPS_NS;
 
-#define MAXLINE 1024
-#define MAXWORD 3
+static constexpr int MAXLINE = 1024;
+static constexpr int MAXWORD = 3;
 
 /* ---------------------------------------------------------------------- */
 
@@ -93,7 +93,8 @@ void MLIAPModelSimple::read_coeffs(char *coefffilename)
                  utils::getsyserror());
   }
 
-  char line[MAXLINE], *ptr;
+  char line[MAXLINE] = {'\0'};
+  char *ptr;
   int eof = 0;
 
   int n;
diff --git a/src/ML-IAP/mliap_model_nn.cpp b/src/ML-IAP/mliap_model_nn.cpp
index 6c039f9a07..6695109f91 100644
--- a/src/ML-IAP/mliap_model_nn.cpp
+++ b/src/ML-IAP/mliap_model_nn.cpp
@@ -28,7 +28,7 @@
 
 using namespace LAMMPS_NS;
 
-#define MAXLINE 1024
+static constexpr int MAXLINE = 1024;
 
 /* ---------------------------------------------------------------------- */
 
@@ -75,7 +75,8 @@ void MLIAPModelNN::read_coeffs(char *coefffilename)
                  utils::getsyserror());
   }
 
-  char line[MAXLINE], *ptr;
+  char line[MAXLINE] = {'\0'};
+  char *ptr;
   int n, eof = 0, nwords = 0;
   while (nwords == 0) {
     if (comm->me == 0) {
diff --git a/src/ML-IAP/mliap_so3.cpp b/src/ML-IAP/mliap_so3.cpp
index 72ca466ab3..bfdde4b968 100644
--- a/src/ML-IAP/mliap_so3.cpp
+++ b/src/ML-IAP/mliap_so3.cpp
@@ -31,7 +31,7 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
-#define SMALL 1.0e-8
+static constexpr double SMALL = 1.0e-8;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/ML-PACE/compute_pace.cpp b/src/ML-PACE/compute_pace.cpp
new file mode 100644
index 0000000000..b96432cfe3
--- /dev/null
+++ b/src/ML-PACE/compute_pace.cpp
@@ -0,0 +1,479 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS Development team: developers@lammps.org
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "compute_pace.h"
+#include "ace-evaluator/ace_evaluator.h"
+#include "ace-evaluator/ace_c_basis.h"
+#include "ace-evaluator/ace_abstract_basis.h"
+#include "ace-evaluator/ace_types.h"
+#include <cstring>
+#include <map>
+
+#include "atom.h"
+#include "update.h"
+#include "modify.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "neigh_request.h"
+#include "force.h"
+#include "pair.h"
+#include "comm.h"
+#include "memory.h"
+#include "error.h"
+
+namespace LAMMPS_NS {
+struct ACECimpl {
+  ACECimpl() : basis_set(nullptr), ace(nullptr) {}
+  ~ACECimpl()
+  {
+    delete basis_set;
+    delete ace;
+  }
+  ACECTildeBasisSet *basis_set;
+  ACECTildeEvaluator *ace;
+};
+}
+
+using namespace LAMMPS_NS;
+
+enum { SCALAR, VECTOR, ARRAY };
+ComputePACE::ComputePACE(LAMMPS *lmp, int narg, char **arg) :
+  Compute(lmp, narg, arg), cutsq(nullptr), list(nullptr), pace(nullptr), paceall(nullptr),
+  pace_peratom(nullptr), map(nullptr), cg(nullptr), c_pe(nullptr), c_virial(nullptr)
+{
+  array_flag = 1;
+  extarray = 0;
+  bikflag = 0;
+  dgradflag = 0;
+
+  int ntypes = atom->ntypes;
+  int nargmin = 4;
+
+  acecimpl = new ACECimpl;
+  if (narg < nargmin) error->all(FLERR,"Illegal compute pace command");
+
+  bikflag = utils::inumeric(FLERR, arg[4], false, lmp);
+  dgradflag = utils::inumeric(FLERR, arg[5], false, lmp);
+  if (dgradflag && !bikflag)
+    error->all(FLERR,"Illegal compute pace command: dgradflag=1 requires bikflag=1");
+
+  memory->create(map,ntypes+1,"pace:map");
+
+  //read in file with CG coefficients or c_tilde coefficients
+
+  auto potential_file_name = utils::get_potential_file_path(arg[3]);
+  delete acecimpl->basis_set;
+  acecimpl->basis_set = new ACECTildeBasisSet(potential_file_name);
+  cutmax = acecimpl->basis_set->cutoffmax;
+
+  //# of rank 1, rank > 1 functions
+
+  int n_r1, n_rp = 0;
+  n_r1 = acecimpl->basis_set->total_basis_size_rank1[0];
+  n_rp = acecimpl->basis_set->total_basis_size[0];
+
+  int ncoeff = n_r1 + n_rp;
+  nvalues = ncoeff;
+
+  ndims_force = 3;
+  ndims_virial = 6;
+  bik_rows = 1;
+  yoffset = nvalues;
+  zoffset = 2*nvalues;
+  natoms = atom->natoms;
+  if (bikflag) bik_rows = natoms;
+    dgrad_rows = ndims_force*natoms;
+  size_array_rows = bik_rows+dgrad_rows + ndims_virial;
+  if (dgradflag) {
+    size_array_rows = bik_rows + 3*natoms*natoms + 1;
+    size_array_cols = nvalues + 3;
+    if (comm->me == 0)
+      error->warning(FLERR,"dgradflag=1 creates a N^2 array, beware of large systems.");
+  } else size_array_cols = nvalues*atom->ntypes + 1;
+  lastcol = size_array_cols-1;
+
+  ndims_peratom = ndims_force;
+  size_peratom = ndims_peratom*nvalues*atom->ntypes;
+
+  nmax = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+ComputePACE::~ComputePACE()
+{
+  delete acecimpl;
+  memory->destroy(pace);
+  memory->destroy(paceall);
+  memory->destroy(cutsq);
+  memory->destroy(pace_peratom);
+  memory->destroy(map);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ComputePACE::init()
+{
+  if (force->pair == nullptr)
+    error->all(FLERR,"Compute pace requires a pair style be defined");
+
+  if (cutmax > force->pair->cutforce)
+    error->all(FLERR,"Compute pace cutoff is longer than pairwise cutoff");
+
+  // need an occasional full neighbor list
+  neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_OCCASIONAL);
+
+  int count = 0;
+  for (int i = 0; i < modify->ncompute; i++)
+    if (strcmp(modify->compute[i]->style,"pace") == 0) count++;
+  if (count > 1 && comm->me == 0)
+    error->warning(FLERR,"More than one compute pace");
+
+  // allocate memory for global array
+  memory->create(pace,size_array_rows,size_array_cols, "pace:pace");
+  memory->create(paceall,size_array_rows,size_array_cols, "pace:paceall");
+  array = paceall;
+
+  // find compute for reference energy
+
+  std::string id_pe = std::string("thermo_pe");
+  int ipe = modify->find_compute(id_pe);
+  if (ipe == -1)
+    error->all(FLERR,"compute thermo_pe does not exist.");
+  c_pe = modify->compute[ipe];
+
+  // add compute for reference virial tensor
+
+  std::string id_virial = std::string("pace_press");
+  std::string pcmd = id_virial + " all pressure NULL virial";
+  modify->add_compute(pcmd);
+
+  int ivirial = modify->find_compute(id_virial);
+  if (ivirial == -1)
+    error->all(FLERR,"compute pace_press does not exist.");
+  c_virial = modify->compute[ivirial];
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ComputePACE::init_list(int /*id*/, NeighList *ptr)
+{
+  list = ptr;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ComputePACE::compute_array()
+{
+  int ntotal = atom->nlocal + atom->nghost;
+  invoked_array = update->ntimestep;
+
+  // grow pace_peratom array if necessary
+
+  if (atom->nmax > nmax) {
+    memory->destroy(pace_peratom);
+    nmax = atom->nmax;
+    memory->create(pace_peratom,nmax,size_peratom,"pace:pace_peratom");
+  }
+
+  // clear global array
+
+  for (int irow = 0; irow < size_array_rows; irow++){
+    for (int icoeff = 0; icoeff < size_array_cols; icoeff++){
+      pace[irow][icoeff] = 0.0;
+    }
+  }
+
+  // clear local peratom array
+
+  for (int i = 0; i < ntotal; i++){
+    for (int icoeff = 0; icoeff < size_peratom; icoeff++) {
+      pace_peratom[i][icoeff] = 0.0;
+    }
+  }
+
+  // invoke full neighbor list (will copy or build if necessary)
+
+  neighbor->build_one(list);
+
+  const int inum = list->inum;
+  const int* const ilist = list->ilist;
+  const int* const numneigh = list->numneigh;
+  int** const firstneigh = list->firstneigh;
+  int * const type = atom->type;
+
+  //determine the maximum number of neighbours
+  int max_jnum = -1;
+  int nei = 0;
+  int jtmp =0;
+  for (int iitmp = 0; iitmp < list->inum; iitmp++) {
+    int itmp = ilist[iitmp];
+    jtmp = numneigh[itmp];
+    nei = nei + jtmp;
+    if (jtmp > max_jnum){
+      max_jnum = jtmp;
+    }
+  }
+
+  // compute pace derivatives for each atom in group
+  // use full neighbor list to count atoms less than cutoff
+
+  const int* const mask = atom->mask;
+  const int ntypes = atom->ntypes;
+
+  for (int ii = 0; ii < inum; ii++) {
+    int irow = 0;
+    if (bikflag) irow = atom->tag[ilist[ii] & NEIGHMASK]-1;
+    const int i = ilist[ii];
+    if (mask[i] & groupbit) {
+      const int itype = type[i];
+      const int* const jlist = firstneigh[i];
+      const int jnum = numneigh[i];
+      const int typeoffset_local = ndims_peratom*nvalues*(itype-1);
+      const int typeoffset_global = nvalues*(itype-1);
+
+      delete acecimpl->ace;
+      acecimpl->ace = new ACECTildeEvaluator(*acecimpl->basis_set);
+      acecimpl->ace->compute_projections = true;
+      acecimpl->ace->compute_b_grad = true;
+      int n_r1, n_rp = 0;
+      n_r1 = acecimpl->basis_set->total_basis_size_rank1[0];
+      n_rp = acecimpl->basis_set->total_basis_size[0];
+
+      int ncoeff = n_r1 + n_rp;
+      acecimpl->ace->element_type_mapping.init(ntypes+1);
+      for (int ik = 1; ik <= ntypes; ik++) {
+        for(int mu = 0; mu < acecimpl->basis_set->nelements; mu++){
+          if (mu != -1) {
+            if (mu == ik - 1) {
+              map[ik] = mu;
+              acecimpl->ace->element_type_mapping(ik) = mu;
+            }
+          }
+        }
+      }
+
+      if (dgradflag) {
+
+        // dBi/dRi tags
+
+        pace[bik_rows + ((atom->tag[i]-1)*3*natoms) + 3*(atom->tag[i]-1) + 0][0] = atom->tag[i]-1;
+        pace[bik_rows + ((atom->tag[i]-1)*3*natoms) + 3*(atom->tag[i]-1) + 0][1] = atom->tag[i]-1;
+        pace[bik_rows + ((atom->tag[i]-1)*3*natoms) + 3*(atom->tag[i]-1) + 0][2] = 0;
+        pace[bik_rows + ((atom->tag[i]-1)*3*natoms) + 3*(atom->tag[i]-1) + 1][0] = atom->tag[i]-1;
+        pace[bik_rows + ((atom->tag[i]-1)*3*natoms) + 3*(atom->tag[i]-1) + 1][1] = atom->tag[i]-1;
+        pace[bik_rows + ((atom->tag[i]-1)*3*natoms) + 3*(atom->tag[i]-1) + 1][2] = 1;
+        pace[bik_rows + ((atom->tag[i]-1)*3*natoms) + 3*(atom->tag[i]-1) + 2][0] = atom->tag[i]-1;
+        pace[bik_rows + ((atom->tag[i]-1)*3*natoms) + 3*(atom->tag[i]-1) + 2][1] = atom->tag[i]-1;
+        pace[bik_rows + ((atom->tag[i]-1)*3*natoms) + 3*(atom->tag[i]-1) + 2][2] = 2;
+
+        // dBi/dRj tags
+
+        for (int j=0; j<natoms; j++) {
+          pace[bik_rows + ((j)*3*natoms) + 3*(atom->tag[i]-1) + 0][0] = atom->tag[i]-1;
+          pace[bik_rows + ((j)*3*natoms) + 3*(atom->tag[i]-1) + 0][1] = j;
+          pace[bik_rows + ((j)*3*natoms) + 3*(atom->tag[i]-1) + 0][2] = 0;
+          pace[bik_rows + ((j)*3*natoms) + 3*(atom->tag[i]-1) + 1][0] = atom->tag[i]-1;
+          pace[bik_rows + ((j)*3*natoms) + 3*(atom->tag[i]-1) + 1][1] = j;
+          pace[bik_rows + ((j)*3*natoms) + 3*(atom->tag[i]-1) + 1][2] = 1;
+          pace[bik_rows + ((j)*3*natoms) + 3*(atom->tag[i]-1) + 2][0] = atom->tag[i]-1;
+          pace[bik_rows + ((j)*3*natoms) + 3*(atom->tag[i]-1) + 2][1] = j;
+          pace[bik_rows + ((j)*3*natoms) + 3*(atom->tag[i]-1) + 2][2] = 2;
+        }
+      }
+
+      // resize the neighbor cache after setting the basis
+      acecimpl->ace->resize_neighbours_cache(max_jnum);
+      acecimpl->ace->compute_atom(i, atom->x, atom->type, list->numneigh[i], list->firstneigh[i]);
+      Array1D<DOUBLE_TYPE> Bs = acecimpl->ace->projections;
+
+      for (int jj = 0; jj < jnum; jj++) {
+        const int j = jlist[jj];
+        //replace mapping of jj to j
+        if (!dgradflag) {
+          double *pacedi = pace_peratom[i]+typeoffset_local;
+          double *pacedj = pace_peratom[j]+typeoffset_local;
+
+          //force array in (func_ind,neighbour_ind,xyz_ind) format
+          // dimension: (n_descriptors,max_jnum,3)
+          //example to access entries for neighbour jj after running compute_atom for atom i:
+          for (int func_ind =0; func_ind < n_r1 + n_rp; func_ind++){
+            DOUBLE_TYPE fx_dB = acecimpl->ace->neighbours_dB(func_ind,jj,0);
+            DOUBLE_TYPE fy_dB = acecimpl->ace->neighbours_dB(func_ind,jj,1);
+            DOUBLE_TYPE fz_dB = acecimpl->ace->neighbours_dB(func_ind,jj,2);
+            pacedi[func_ind] += fx_dB;
+            pacedi[func_ind+yoffset] += fy_dB;
+            pacedi[func_ind+zoffset] += fz_dB;
+            pacedj[func_ind] -= fx_dB;
+            pacedj[func_ind+yoffset] -= fy_dB;
+            pacedj[func_ind+zoffset] -= fz_dB;
+            }
+         } else {
+            for (int iicoeff = 0; iicoeff < ncoeff; iicoeff++) {
+
+              // add to pace array for this proc
+              // dBi/dRj
+              DOUBLE_TYPE fx_dB = acecimpl->ace->neighbours_dB(iicoeff,jj,0);
+              DOUBLE_TYPE fy_dB = acecimpl->ace->neighbours_dB(iicoeff,jj,1);
+              DOUBLE_TYPE fz_dB = acecimpl->ace->neighbours_dB(iicoeff,jj,2);
+              pace[bik_rows + ((atom->tag[j]-1)*3*natoms) + 3*(atom->tag[i]-1) + 0][iicoeff+3] -= fx_dB;
+              pace[bik_rows + ((atom->tag[j]-1)*3*natoms) + 3*(atom->tag[i]-1) + 1][iicoeff+3] -= fy_dB;
+              pace[bik_rows + ((atom->tag[j]-1)*3*natoms) + 3*(atom->tag[i]-1) + 2][iicoeff+3] -= fz_dB;
+
+              // dBi/dRi
+              pace[bik_rows + ((atom->tag[i]-1)*3*natoms) + 3*(atom->tag[i]-1) + 0][iicoeff+3] += fx_dB;
+              pace[bik_rows + ((atom->tag[i]-1)*3*natoms) + 3*(atom->tag[i]-1) + 1][iicoeff+3] += fy_dB;
+              pace[bik_rows + ((atom->tag[i]-1)*3*natoms) + 3*(atom->tag[i]-1) + 2][iicoeff+3] += fz_dB;
+            }
+          }
+        } // loop over jj inside
+      if (!dgradflag) {
+
+        int k = typeoffset_global;
+
+        for (int icoeff = 0; icoeff < ncoeff; icoeff++){
+          pace[irow][k++] += Bs(icoeff);
+        }
+      } else {
+        int k = 3;
+        for (int icoeff = 0; icoeff < ncoeff; icoeff++){
+          pace[irow][k++] += Bs(icoeff);
+        }
+      }
+    } //group bit
+  } // for ii loop
+  // accumulate force contributions to global array
+  if (!dgradflag){
+    for (int itype = 0; itype < atom->ntypes; itype++) {
+      const int typeoffset_local = ndims_peratom*nvalues*itype;
+      const int typeoffset_global = nvalues*itype;
+      for (int icoeff = 0; icoeff < nvalues; icoeff++) {
+        for (int i = 0; i < ntotal; i++) {
+          double *pacedi = pace_peratom[i]+typeoffset_local;
+          int iglobal = atom->tag[i];
+          int irow = 3*(iglobal-1)+1;
+          pace[irow++][icoeff+typeoffset_global] += pacedi[icoeff];
+          pace[irow++][icoeff+typeoffset_global] += pacedi[icoeff+yoffset];
+          pace[irow][icoeff+typeoffset_global] += pacedi[icoeff+zoffset];
+        }
+      }
+    }
+  }
+
+  if (!dgradflag) {
+    // accumulate forces to global array
+    for (int i = 0; i < atom->nlocal; i++) {
+      int iglobal = atom->tag[i];
+      int irow = 3*(iglobal-1)+1;
+      pace[irow++][lastcol] = atom->f[i][0];
+      pace[irow++][lastcol] = atom->f[i][1];
+      pace[irow][lastcol] = atom->f[i][2];
+    }
+  } else {
+
+    // for dgradflag=1, put forces at first 3 columns of bik rows
+
+    for (int i=0; i<atom->nlocal; i++) {
+      int iglobal = atom->tag[i];
+      pace[iglobal-1][0+0] = atom->f[i][0];
+      pace[iglobal-1][0+1] = atom->f[i][1];
+      pace[iglobal-1][0+2] = atom->f[i][2];
+    }
+  }
+
+  dbdotr_compute();
+
+  // sum up over all processes
+  MPI_Allreduce(&pace[0][0],&paceall[0][0],size_array_rows*size_array_cols,MPI_DOUBLE,MPI_SUM,world);
+
+  // assign energy to last column
+
+  if (!dgradflag) {
+    for (int i = 0; i < bik_rows; i++) paceall[i][lastcol] = 0;
+    int irow = 0;
+    double reference_energy = c_pe->compute_scalar();
+    paceall[irow][lastcol] = reference_energy;
+  } else {
+
+    // assign reference energy right after the dgrad rows, first column
+
+    int irow = bik_rows + 3*natoms*natoms;
+    double reference_energy = c_pe->compute_scalar();
+    paceall[irow][0] = reference_energy;
+  }
+
+  // assign virial stress to last column
+  // switch to Voigt notation
+
+  if (!dgradflag) {
+    c_virial->compute_vector();
+    int irow = 3*natoms+bik_rows;
+    paceall[irow++][lastcol] = c_virial->vector[0];
+    paceall[irow++][lastcol] = c_virial->vector[1];
+    paceall[irow++][lastcol] = c_virial->vector[2];
+    paceall[irow++][lastcol] = c_virial->vector[5];
+    paceall[irow++][lastcol] = c_virial->vector[4];
+    paceall[irow++][lastcol] = c_virial->vector[3];
+  }
+}
+
+/* ----------------------------------------------------------------------
+   compute global virial contributions via summing r_i.dB^j/dr_i over
+   own & ghost atoms
+------------------------------------------------------------------------- */
+void ComputePACE::dbdotr_compute()
+{
+
+  if (dgradflag) return;
+
+  double **x = atom->x;
+  int irow0 = bik_rows+ndims_force*natoms;
+
+  // sum over ace contributions to forces
+  // on all particles including ghosts
+
+  int nall = atom->nlocal + atom->nghost;
+  for (int i = 0; i < nall; i++)
+    for (int itype = 0; itype < atom->ntypes; itype++) {
+      const int typeoffset_local = ndims_peratom*nvalues*itype;
+      const int typeoffset_global = nvalues*itype;
+      double *pacedi = pace_peratom[i]+typeoffset_local;
+      for (int icoeff = 0; icoeff < nvalues; icoeff++) {
+        double dbdx = pacedi[icoeff];
+        double dbdy = pacedi[icoeff+yoffset];
+        double dbdz = pacedi[icoeff+zoffset];
+        int irow = irow0;
+        pace[irow++][icoeff+typeoffset_global] += dbdx*x[i][0];
+        pace[irow++][icoeff+typeoffset_global] += dbdy*x[i][1];
+        pace[irow++][icoeff+typeoffset_global] += dbdz*x[i][2];
+        pace[irow++][icoeff+typeoffset_global] += dbdz*x[i][1];
+        pace[irow++][icoeff+typeoffset_global] += dbdz*x[i][0];
+        pace[irow++][icoeff+typeoffset_global] += dbdy*x[i][0];
+      }
+    }
+}
+
+/* ----------------------------------------------------------------------
+   memory usage
+------------------------------------------------------------------------- */
+
+double ComputePACE::memory_usage()
+{
+
+  double bytes = (double)size_array_rows*size_array_cols*sizeof(double); // pace
+  bytes += (double)size_array_rows*size_array_cols*sizeof(double);       // paceall
+  bytes += (double)nmax*size_peratom * sizeof(double);                   // pace_peratom
+  int n = atom->ntypes+1;
+  bytes += (double)n*sizeof(int);        // map
+
+  return bytes;
+}
diff --git a/src/ML-PACE/compute_pace.h b/src/ML-PACE/compute_pace.h
new file mode 100644
index 0000000000..496c8a16d3
--- /dev/null
+++ b/src/ML-PACE/compute_pace.h
@@ -0,0 +1,58 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS Development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef COMPUTE_CLASS
+// clang-format off
+ComputeStyle(pace,ComputePACE);
+// clang-format on
+#else
+
+#ifndef LMP_COMPUTE_PACE_H
+#define LMP_COMPUTE_PACE_H
+
+#include "compute.h"
+
+namespace LAMMPS_NS {
+
+class ComputePACE : public Compute {
+ public:
+  ComputePACE(class LAMMPS *, int, char **);
+  ~ComputePACE();
+  void init();
+  void init_list(int, class NeighList *);
+  void compute_array();
+  double memory_usage();
+
+ private:
+  int natoms, nmax, size_peratom, lastcol;
+  int nvalues, yoffset, zoffset;
+  int ndims_peratom, ndims_force, ndims_virial;
+  double **cutsq;
+  class NeighList *list;
+  double **pace, **paceall;
+  double **pace_peratom;
+  int *map;    // map types to [0,nelements)
+  int bikflag, bik_rows, dgradflag, dgrad_rows;
+  double *cg;
+  double cutmax;
+  Compute *c_pe;
+  Compute *c_virial;
+
+  void dbdotr_compute();
+  struct ACECimpl *acecimpl;
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/ML-PACE/pair_pace.cpp b/src/ML-PACE/pair_pace.cpp
index 57f12597d1..e9bd25f9d7 100644
--- a/src/ML-PACE/pair_pace.cpp
+++ b/src/ML-PACE/pair_pace.cpp
@@ -45,6 +45,7 @@ Copyright 2021 Yury Lysogorskiy^1, Cas van der Oord^2, Anton Bochkarev^1,
 #include "ace-evaluator/ace_evaluator.h"
 #include "ace-evaluator/ace_recursive.h"
 #include "ace-evaluator/ace_version.h"
+#include "ace/ace_b_basis.h"
 
 namespace LAMMPS_NS {
 struct ACEImpl {
@@ -87,6 +88,10 @@ PairPACE::PairPACE(LAMMPS *lmp) : Pair(lmp)
   one_coeff = 1;
   manybody_flag = 1;
 
+  nmax_corerep = 0;
+  flag_corerep_factor = 0;
+  corerep_factor = nullptr;
+
   aceimpl = new ACEImpl;
   recursive = false;
 
@@ -109,6 +114,7 @@ PairPACE::~PairPACE()
     memory->destroy(setflag);
     memory->destroy(cutsq);
     memory->destroy(scale);
+    memory->destroy(corerep_factor);
   }
 }
 
@@ -143,10 +149,18 @@ void PairPACE::compute(int eflag, int vflag)
   // the pointer to the list of neighbors of "i"
   firstneigh = list->firstneigh;
 
+  if (flag_corerep_factor && atom->nlocal > nmax_corerep) {
+    memory->destroy(corerep_factor);
+    nmax_corerep = atom->nlocal;
+    memory->create(corerep_factor, nmax_corerep, "pace/atom:corerep_factor");
+    //zeroify array
+    memset(corerep_factor, 0, nmax_corerep * sizeof(*corerep_factor));
+  }
+
   //determine the maximum number of neighbours
   int max_jnum = 0;
   int nei = 0;
-  for (ii = 0; ii < list->inum; ii++) {
+  for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     jnum = numneigh[i];
     nei = nei + jnum;
@@ -156,7 +170,7 @@ void PairPACE::compute(int eflag, int vflag)
   aceimpl->ace->resize_neighbours_cache(max_jnum);
 
   //loop over atoms
-  for (ii = 0; ii < list->inum; ii++) {
+  for (ii = 0; ii < inum; ii++) {
     i = list->ilist[ii];
     const int itype = type[i];
 
@@ -181,6 +195,9 @@ void PairPACE::compute(int eflag, int vflag)
       error->one(FLERR, e.what());
     }
 
+    if (flag_corerep_factor)
+      corerep_factor[i] = 1 - aceimpl->ace->ace_fcut;
+
     // 'compute_atom' will update the `aceimpl->ace->e_atom` and `aceimpl->ace->neighbours_forces(jj, alpha)` arrays
 
     for (jj = 0; jj < jnum; jj++) {
@@ -287,7 +304,14 @@ void PairPACE::coeff(int narg, char **arg)
   //load potential file
   delete aceimpl->basis_set;
   if (comm->me == 0) utils::logmesg(lmp, "Loading {}\n", potential_file_name);
-  aceimpl->basis_set = new ACECTildeBasisSet(potential_file_name);
+  // if potential is in ACEBBasisSet (YAML) format, then convert to ACECTildeBasisSet automatically
+  if (utils::strmatch(potential_file_name,".*\\.yaml$")) {
+    ACEBBasisSet bBasisSet = ACEBBasisSet(potential_file_name);
+    ACECTildeBasisSet cTildeBasisSet = bBasisSet.to_ACECTildeBasisSet();
+    aceimpl->basis_set = new ACECTildeBasisSet(cTildeBasisSet);
+  } else {
+      aceimpl->basis_set = new ACECTildeBasisSet(potential_file_name);
+  }
 
   if (comm->me == 0) {
     utils::logmesg(lmp, "Total number of basis functions\n");
@@ -374,7 +398,29 @@ double PairPACE::init_one(int i, int j)
  ---------------------------------------------------------------------- */
 void *PairPACE::extract(const char *str, int &dim)
 {
+  dim = 0;
+  //check if str=="corerep_flag" then compute extrapolation grades on this iteration
+  if (strcmp(str, "corerep_flag") == 0) return (void *) &flag_corerep_factor;
+
   dim = 2;
   if (strcmp(str, "scale") == 0) return (void *) scale;
   return nullptr;
 }
+
+/* ----------------------------------------------------------------------
+   peratom requests from FixPair
+   return ptr to requested data
+   also return ncol = # of quantites per atom
+     0 = per-atom vector
+     1 or more = # of columns in per-atom array
+   return NULL if str is not recognized
+---------------------------------------------------------------------- */
+void *PairPACE::extract_peratom(const char *str, int &ncol)
+{
+  if (strcmp(str, "corerep") == 0) {
+    ncol = 0;
+    return (void *) corerep_factor;
+  }
+
+  return nullptr;
+}
diff --git a/src/ML-PACE/pair_pace.h b/src/ML-PACE/pair_pace.h
index 94649ecaab..9b5d2c5480 100644
--- a/src/ML-PACE/pair_pace.h
+++ b/src/ML-PACE/pair_pace.h
@@ -48,11 +48,15 @@ class PairPACE : public Pair {
   double init_one(int, int) override;
 
   void *extract(const char *, int &) override;
+  void *extract_peratom(const char *, int &) override;
 
  protected:
   struct ACEImpl *aceimpl;
+  int nmax_corerep = 0;
 
   virtual void allocate();
+  double *corerep_factor;                    //per-atom core-rep factor (= 1 - fcut)
+  int flag_corerep_factor;
 
   double **scale;
   bool recursive;    // "recursive" option for ACERecursiveEvaluator
diff --git a/src/ML-PACE/pair_pace_extrapolation.cpp b/src/ML-PACE/pair_pace_extrapolation.cpp
index dc0fb1848b..d9b8d3588a 100644
--- a/src/ML-PACE/pair_pace_extrapolation.cpp
+++ b/src/ML-PACE/pair_pace_extrapolation.cpp
@@ -93,11 +93,14 @@ PairPACEExtrapolation::PairPACEExtrapolation(LAMMPS *lmp) : Pair(lmp)
   manybody_flag = 1;
 
   nmax = 0;
+  nmax_corerep = 0;
 
   aceimpl = new ACEALImpl;
   scale = nullptr;
   flag_compute_extrapolation_grade = 0;
   extrapolation_grade_gamma = nullptr;
+  flag_corerep_factor = 0;
+  corerep_factor = nullptr;
 
   chunksize = 4096;
 }
@@ -118,6 +121,7 @@ PairPACEExtrapolation::~PairPACEExtrapolation()
     memory->destroy(scale);
     memory->destroy(map);
     memory->destroy(extrapolation_grade_gamma);
+    memory->destroy(corerep_factor);
   }
 }
 
@@ -166,11 +170,18 @@ void PairPACEExtrapolation::compute(int eflag, int vflag)
     //zeroify array
     memset(extrapolation_grade_gamma, 0, nmax * sizeof(*extrapolation_grade_gamma));
   }
+  if (flag_corerep_factor && atom->nlocal > nmax_corerep) {
+    memory->destroy(corerep_factor);
+    nmax_corerep = atom->nlocal;
+    memory->create(corerep_factor, nmax_corerep, "pace/atom:corerep_factor");
+    //zeroify array
+    memset(corerep_factor, 0, nmax_corerep * sizeof(*corerep_factor));
+  }
 
   //determine the maximum number of neighbours
   int max_jnum = 0;
   int nei = 0;
-  for (ii = 0; ii < list->inum; ii++) {
+  for (ii = 0; ii < inum; ii++) {
     i = ilist[ii];
     jnum = numneigh[i];
     nei = nei + jnum;
@@ -183,7 +194,7 @@ void PairPACEExtrapolation::compute(int eflag, int vflag)
     aceimpl->rec_ace->resize_neighbours_cache(max_jnum);
 
   //loop over atoms
-  for (ii = 0; ii < list->inum; ii++) {
+  for (ii = 0; ii < inum; ii++) {
     i = list->ilist[ii];
     const int itype = type[i];
 
@@ -216,6 +227,11 @@ void PairPACEExtrapolation::compute(int eflag, int vflag)
     if (flag_compute_extrapolation_grade)
       extrapolation_grade_gamma[i] = aceimpl->ace->max_gamma_grade;
 
+    if (flag_corerep_factor) {
+      corerep_factor[i] = 1 - (flag_compute_extrapolation_grade ? aceimpl->ace->ace_fcut
+                              : aceimpl->rec_ace->ace_fcut);
+    }
+
     Array2D<DOUBLE_TYPE> &neighbours_forces =
         (flag_compute_extrapolation_grade ? aceimpl->ace->neighbours_forces
                                           : aceimpl->rec_ace->neighbours_forces);
@@ -437,9 +453,11 @@ double PairPACEExtrapolation::init_one(int i, int j)
  ---------------------------------------------------------------------- */
 void *PairPACEExtrapolation::extract(const char *str, int &dim)
 {
-  //check if str=="gamma_flag" then compute extrapolation grades on this iteration
   dim = 0;
+  //check if str=="gamma_flag" then compute extrapolation grades on this iteration
   if (strcmp(str, "gamma_flag") == 0) return (void *) &flag_compute_extrapolation_grade;
+  //check if str=="corerep_flag" then compute extrapolation grades on this iteration
+  if (strcmp(str, "corerep_flag") == 0) return (void *) &flag_corerep_factor;
 
   dim = 2;
   if (strcmp(str, "scale") == 0) return (void *) scale;
@@ -461,5 +479,10 @@ void *PairPACEExtrapolation::extract_peratom(const char *str, int &ncol)
     return (void *) extrapolation_grade_gamma;
   }
 
+  if (strcmp(str, "corerep") == 0) {
+    ncol = 0;
+    return (void *) corerep_factor;
+  }
+
   return nullptr;
 }
diff --git a/src/ML-PACE/pair_pace_extrapolation.h b/src/ML-PACE/pair_pace_extrapolation.h
index 6f7eeb279e..2dcec04d4b 100644
--- a/src/ML-PACE/pair_pace_extrapolation.h
+++ b/src/ML-PACE/pair_pace_extrapolation.h
@@ -47,13 +47,15 @@ class PairPACEExtrapolation : public Pair {
 
  protected:
   struct ACEALImpl *aceimpl;
-  int nmax;
+  int nmax = 0, nmax_corerep = 0;
 
   virtual void allocate();
   std::vector<std::string> element_names;    // list of elements (used by dump pace/extrapolation)
-  double *extrapolation_grade_gamma;         //per-atom gamma value
+  double *extrapolation_grade_gamma = nullptr;         //per-atom gamma value
+  double *corerep_factor = nullptr;                    //per-atom core-rep factor (= 1 - fcut)
 
-  int flag_compute_extrapolation_grade;
+  int flag_compute_extrapolation_grade = 0;
+  int flag_corerep_factor = 0;
 
   double **scale;
 
diff --git a/src/ML-POD/fitpod_command.cpp b/src/ML-POD/fitpod_command.cpp
index 87de65c3e2..ef39962e0b 100644
--- a/src/ML-POD/fitpod_command.cpp
+++ b/src/ML-POD/fitpod_command.cpp
@@ -33,8 +33,7 @@
 using namespace LAMMPS_NS;
 using MathSpecial::powint;
 
-#define MAXLINE 1024
-
+static constexpr int MAXLINE = 1024;
 static constexpr double SMALL = 1.0e-10;
 
 FitPOD::FitPOD(LAMMPS *_lmp) : Command(_lmp), podptr(nullptr)
@@ -151,7 +150,8 @@ int FitPOD::read_data_file(double *fitting_weights, std::string &file_format,
 
   // loop through lines of training data file and parse keywords
 
-  char line[MAXLINE],*ptr;
+  char line[MAXLINE] = {'\0'};
+  char *ptr;
   int eof = 0;
   while (true) {
     if (comm->me == 0) {
@@ -252,7 +252,8 @@ int FitPOD::get_number_atom_exyz(std::vector<int>& num_atom, int& num_atom_sum,
       error->one(FLERR,"Cannot open POD coefficient file {}: ", filename, utils::getsyserror());
   }
 
-  char line[MAXLINE],*ptr;
+  char line[MAXLINE] = {'\0'};
+  char *ptr;
   int eof = 0;
   int num_configs = 0;
   num_atom_sum = 0;
@@ -324,7 +325,8 @@ void FitPOD::read_exyz_file(double *lattice, double *stress, double *energy, dou
       error->one(FLERR,"Cannot open POD coefficient file {}: ", filename, utils::getsyserror());
   }
 
-  char line[MAXLINE],*ptr;
+  char line[MAXLINE] = {'\0'};
+  char *ptr;
   int eof = 0;
   int cfi = 0;
   int nat = 0;
diff --git a/src/ML-POD/mlpod.cpp b/src/ML-POD/mlpod.cpp
index f20498b50b..088b9abadc 100644
--- a/src/ML-POD/mlpod.cpp
+++ b/src/ML-POD/mlpod.cpp
@@ -35,7 +35,7 @@ using MathConst::MY_PI;
 using MathSpecial::cube;
 using MathSpecial::powint;
 
-#define MAXLINE 1024
+static constexpr int MAXLINE = 1024;
 
 MLPOD::podstruct::podstruct() :
     twobody{4, 8, 6}, threebody{4, 8, 5, 4}, fourbody{0, 0, 0, 0}, pbc(nullptr),
@@ -302,7 +302,8 @@ void MLPOD::read_pod(const std::string &pod_file)
 
   // loop through lines of POD file and parse keywords
 
-  char line[MAXLINE],*ptr;
+  char line[MAXLINE] = {'\0'};
+  char *ptr;
   int eof = 0;
   while (true) {
     if (comm->me == 0) {
@@ -639,7 +640,8 @@ void MLPOD::read_coeff_file(const std::string &coeff_file)
 
   // check format for first line of file
 
-  char line[MAXLINE],*ptr;
+  char line[MAXLINE] = {'\0'};
+  char *ptr;
   int eof = 0;
   int nwords = 0;
   while (nwords == 0) {
diff --git a/src/ML-RANN/pair_rann.cpp b/src/ML-RANN/pair_rann.cpp
index f2948cbb43..4ec1f45703 100644
--- a/src/ML-RANN/pair_rann.cpp
+++ b/src/ML-RANN/pair_rann.cpp
@@ -55,7 +55,7 @@ DISTRIBUTION A. Approved for public release; distribution unlimited. OPSEC#4918
 #include "rann_fingerprint_radialscreenedspin.h"
 #include "rann_fingerprint_radialspin.h"
 
-#define MAXLINE 1024
+static constexpr int MAXLINE = 1024;
 
 using namespace LAMMPS_NS;
 
@@ -616,7 +616,8 @@ void PairRANN::read_weight(std::vector<std::string> line,std::vector<std::string
 
 void PairRANN::read_bias(std::vector<std::string> line,std::vector<std::string> line1,FILE* fp,char *filename,int *linenum) {
   int i,j,l;
-  char linetemp[MAXLINE],*ptr;
+  char linetemp[MAXLINE] = {'\0'};
+  char *ptr;
   for (l=0;l<nelements;l++) {
     if (line[1].compare(elements[l])==0) {
       if (net[l].layers==0)error->one(filename,*linenum-1,"networklayers must be defined before biases.");
diff --git a/src/ML-SNAP/compute_sna_atom.cpp b/src/ML-SNAP/compute_sna_atom.cpp
index 2de25b09b6..b1b4a46482 100644
--- a/src/ML-SNAP/compute_sna_atom.cpp
+++ b/src/ML-SNAP/compute_sna_atom.cpp
@@ -56,7 +56,10 @@ ComputeSNAAtom::ComputeSNAAtom(LAMMPS *lmp, int narg, char **arg) :
   wselfallflag = 0;
   switchinnerflag = 0;
   nelements = 1;
-
+  nnn = 12;
+  wmode = 0;
+  delta = 1.e-3;
+  nearest_neighbors_mode = false;
   // process required arguments
 
   memory->create(radelem, ntypes + 1, "sna/atom:radelem"); // offset by 1 to match up with types
@@ -114,6 +117,22 @@ ComputeSNAAtom::ComputeSNAAtom(LAMMPS *lmp, int narg, char **arg) :
       if (iarg + 2 > narg) error->all(FLERR, "Illegal compute {} command", style);
       quadraticflag = utils::inumeric(FLERR, arg[iarg + 1], false, lmp);
       iarg += 2;
+    } else if (strcmp(arg[iarg],"nnn") == 0) {
+      if (iarg + 2 > narg) error->all(FLERR, "Illegal compute {} command", style);
+      nnn = utils::inumeric(FLERR, arg[iarg + 1], false, lmp);
+      nearest_neighbors_mode = true;
+      if (nnn <= 0) error->all(FLERR, "Illegal compute compute {} command", style);
+      iarg += 2;
+    } else if (strcmp(arg[iarg],"wmode") == 0) {
+      if (iarg + 2 > narg) error->all(FLERR, "Illegal compute {} command", style);
+      wmode = utils::inumeric(FLERR, arg[iarg + 1], false, lmp);
+      if (wmode < 0) error->all(FLERR, "Illegal compute compute {} command", style);
+      iarg += 2;
+    } else if (strcmp(arg[iarg],"delta") == 0) {
+      if (iarg + 2 > narg) error->all(FLERR, "Illegal compute {} command", style);
+      delta = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
+      if (delta < 1.0e-3) error->all(FLERR, "Illegal compute compute {} command", style);
+      iarg += 2;
     } else if (strcmp(arg[iarg], "chem") == 0) {
       if (iarg + 2 > narg) error->all(FLERR, "Illegal compute {} command", style);
       chemflag = 1;
@@ -183,6 +202,7 @@ ComputeSNAAtom::ComputeSNAAtom(LAMMPS *lmp, int narg, char **arg) :
 
   nmax = 0;
   sna = nullptr;
+
 }
 
 /* ---------------------------------------------------------------------- */
@@ -209,7 +229,7 @@ void ComputeSNAAtom::init()
 {
   if (force->pair == nullptr)
     error->all(FLERR,"Compute sna/atom requires a pair style be defined");
-
+  rcutsq = force->pair->cutforce * force->pair->cutforce;
   if (cutmax > force->pair->cutforce)
     error->all(FLERR,"Compute sna/atom cutoff is longer than pairwise cutoff");
 
@@ -275,63 +295,163 @@ void ComputeSNAAtom::compute_peratom()
       const int* const jlist = firstneigh[i];
       const int jnum = numneigh[i];
 
-      // ensure rij, inside, and typej  are of size jnum
 
-      snaptr->grow_rij(jnum);
+      // ############################################################################## //
+      // ##### Start of section for computing bispectrum on nnn nearest neighbors ##### //
+      // ############################################################################## //
+      if (nearest_neighbors_mode) {
+        // ##### 1) : consider full neighbor list in rlist
+        memory->create(distsq, jnum, "snann/atom:distsq");
+        memory->create(rlist, jnum, 3, "snann/atom:rlist");
 
-      // rij[][3] = displacements between atom I and those neighbors
-      // inside = indices of neighbors of I within cutoff
-      // typej = types of neighbors of I within cutoff
+        int ncount = 0;
+        for (int jj = 0; jj < jnum; jj++) {
+          int j = jlist[jj];
+          j &= NEIGHMASK;
 
-      int ninside = 0;
-      for (int jj = 0; jj < jnum; jj++) {
-        int j = jlist[jj];
-        j &= NEIGHMASK;
+          const double delx = xtmp - x[j][0];
+          const double dely = ytmp - x[j][1];
+          const double delz = ztmp - x[j][2];
+          const double rsq = delx * delx + dely * dely + delz * delz;
 
-        const double delx = xtmp - x[j][0];
-        const double dely = ytmp - x[j][1];
-        const double delz = ztmp - x[j][2];
-        const double rsq = delx*delx + dely*dely + delz*delz;
-        int jtype = type[j];
-        int jelem = 0;
-        if (chemflag)
-          jelem = map[jtype];
-        if (rsq < cutsq[itype][jtype] && rsq>1e-20) {
-          snaptr->rij[ninside][0] = delx;
-          snaptr->rij[ninside][1] = dely;
-          snaptr->rij[ninside][2] = delz;
-          snaptr->inside[ninside] = j;
-          snaptr->wj[ninside] = wjelem[jtype];
-          snaptr->rcutij[ninside] = (radi+radelem[jtype])*rcutfac;
-          if (switchinnerflag) {
-            snaptr->sinnerij[ninside] = 0.5*(sinnerelem[itype]+sinnerelem[jtype]);
-            snaptr->dinnerij[ninside] = 0.5*(dinnerelem[itype]+dinnerelem[jtype]);
+          if (rsq < rcutsq) {
+            distsq[ncount] = rsq;
+            rlist[ncount][0] = delx;
+            rlist[ncount][1] = dely;
+            rlist[ncount][2] = delz;
+            ncount++;
           }
-          if (chemflag) snaptr->element[ninside] = jelem;
-          ninside++;
         }
+
+        // ##### 2) : compute optimal cutoff such that sum weights S_target = nnn
+        double S_target=1.*nnn;
+        double rc_start=0.1;
+        double rc_max=sqrt(rcutsq);
+        double tol=1.e-8;
+        double * sol_dich = dichotomie(S_target, rc_start, rc_max, tol, distsq, ncount, wmode, delta);
+        memory->destroy(distsq);
+
+        // ##### 3) : assign that optimal cutoff radius to bispectrum context using rcsol
+        double rcsol = (sol_dich[0]+sol_dich[1])/2.;
+        memory->destroy(sol_dich);
+        snaptr->grow_rij(ncount);
+
+        int ninside = 0;
+        for (int jj = 0; jj < ncount; jj++) {
+          int j = jlist[jj];
+          j &= NEIGHMASK;
+
+          const double rsq = rlist[jj][0]*rlist[jj][0]+rlist[jj][1]*rlist[jj][1]+rlist[jj][2]*rlist[jj][2];
+          int jtype = type[j];
+          int jelem = 0;
+          if (chemflag)
+            jelem = map[jtype];
+
+          if (rsq < rcsol*rcsol) {
+            snaptr->rij[ninside][0] = rlist[jj][0];//rijmax;
+            snaptr->rij[ninside][1] = rlist[jj][1];//rijmax;
+            snaptr->rij[ninside][2] = rlist[jj][2];//rijmax;
+            snaptr->inside[ninside] = j;
+            snaptr->wj[ninside] = 1.;
+            snaptr->rcutij[ninside] = rcsol;
+
+            if (switchinnerflag) {
+              snaptr->sinnerij[ninside] = 0.5*(sinnerelem[itype]+sinnerelem[jtype]);
+              snaptr->dinnerij[ninside] = 0.5*(dinnerelem[itype]+dinnerelem[jtype]);
+            }
+            if (chemflag) snaptr->element[ninside] = jelem;
+            ninside++;
+          }
+        }
+
+        memory->destroy(rlist);
+
+        // ############################################################################ //
+        // ##### End of section for computing bispectrum on nnn nearest neighbors ##### //
+        // ############################################################################ //
+        snaptr->compute_ui(ninside, ielem);
+        snaptr->compute_zi();
+        snaptr->compute_bi(ielem);
+
+        for (int icoeff = 0; icoeff < ncoeff; icoeff++)
+          sna[i][icoeff] = snaptr->blist[icoeff];
+        if (quadraticflag) {
+          int ncount = ncoeff;
+          for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
+            double bi = snaptr->blist[icoeff];
+
+            // diagonal element of quadratic matrix
+
+            sna[i][ncount++] = 0.5*bi*bi;
+
+            // upper-triangular elements of quadratic matrix
+
+            for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++)
+              sna[i][ncount++] = bi*snaptr->blist[jcoeff];
+          }
+        }
+
+      } else {
+        // ensure rij, inside, and typej  are of size jnum
+
+        snaptr->grow_rij(jnum);
+
+        // rij[][3] = displacements between atom I and those neighbors
+        // inside = indices of neighbors of I within cutoff
+        // typej = types of neighbors of I within cutoff
+
+        int ninside = 0;
+        for (int jj = 0; jj < jnum; jj++) {
+          int j = jlist[jj];
+          j &= NEIGHMASK;
+
+          const double delx = xtmp - x[j][0];
+          const double dely = ytmp - x[j][1];
+          const double delz = ztmp - x[j][2];
+          const double rsq = delx*delx + dely*dely + delz*delz;
+          int jtype = type[j];
+          int jelem = 0;
+          if (chemflag)
+            jelem = map[jtype];
+          if (rsq < cutsq[itype][jtype] && rsq>1e-20) {
+            snaptr->rij[ninside][0] = delx;
+            snaptr->rij[ninside][1] = dely;
+            snaptr->rij[ninside][2] = delz;
+            snaptr->inside[ninside] = j;
+            snaptr->wj[ninside] = wjelem[jtype];
+            snaptr->rcutij[ninside] = (radi+radelem[jtype])*rcutfac;
+            if (switchinnerflag) {
+              snaptr->sinnerij[ninside] = 0.5*(sinnerelem[itype]+sinnerelem[jtype]);
+              snaptr->dinnerij[ninside] = 0.5*(dinnerelem[itype]+dinnerelem[jtype]);
+            }
+            if (chemflag) snaptr->element[ninside] = jelem;
+            ninside++;
+          }
+        }
+        snaptr->compute_ui(ninside, ielem);
+        snaptr->compute_zi();
+        snaptr->compute_bi(ielem);
+
+        for (int icoeff = 0; icoeff < ncoeff; icoeff++)
+          sna[i][icoeff] = snaptr->blist[icoeff];
+        if (quadraticflag) {
+          int ncount = ncoeff;
+          for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
+            double bi = snaptr->blist[icoeff];
+
+            // diagonal element of quadratic matrix
+
+            sna[i][ncount++] = 0.5*bi*bi;
+
+            // upper-triangular elements of quadratic matrix
+
+            for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++)
+              sna[i][ncount++] = bi*snaptr->blist[jcoeff];
+          }
+        }
+
       }
 
-      snaptr->compute_ui(ninside, ielem);
-      snaptr->compute_zi();
-      snaptr->compute_bi(ielem);
-      for (int icoeff = 0; icoeff < ncoeff; icoeff++)
-        sna[i][icoeff] = snaptr->blist[icoeff];
-      if (quadraticflag) {
-        int ncount = ncoeff;
-        for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
-          double bi = snaptr->blist[icoeff];
-
-          // diagonal element of quadratic matrix
-
-          sna[i][ncount++] = 0.5*bi*bi;
-
-          // upper-triangular elements of quadratic matrix
-
-          for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++)
-            sna[i][ncount++] = bi*snaptr->blist[jcoeff];
-        }
-      }
     } else {
       for (int icoeff = 0; icoeff < size_peratom_cols; icoeff++)
         sna[i][icoeff] = 0.0;
@@ -352,3 +472,197 @@ double ComputeSNAAtom::memory_usage()
   return bytes;
 }
 
+/* ----------------------------------------------------------------------
+   select3 routine from Numerical Recipes (slightly modified)
+   find k smallest values in array of length n
+   sort auxiliary arrays at same time
+------------------------------------------------------------------------- */
+
+// Use no-op do while to create single statement
+
+#define SWAP(a, b) \
+  do {             \
+    tmp = a;       \
+    (a) = b;       \
+    (b) = tmp;     \
+  } while (0)
+
+#define ISWAP(a, b) \
+  do {              \
+    itmp = a;       \
+    (a) = b;        \
+    (b) = itmp;     \
+  } while (0)
+
+#define SWAP3(a, b)  \
+  do {               \
+    tmp = (a)[0];    \
+    (a)[0] = (b)[0]; \
+    (b)[0] = tmp;    \
+    tmp = (a)[1];    \
+    (a)[1] = (b)[1]; \
+    (b)[1] = tmp;    \
+    tmp = (a)[2];    \
+    (a)[2] = (b)[2]; \
+    (b)[2] = tmp;    \
+  } while (0)
+
+/* ---------------------------------------------------------------------- */
+
+void ComputeSNAAtom::select3(int k, int n, double *arr, int *iarr, double **arr3)
+{
+  int i, ir, j, l, mid, ia, itmp;
+  double a, tmp, a3[3];
+
+  arr--;
+  iarr--;
+  arr3--;
+  l = 1;
+  ir = n;
+  for (;;) {
+    if (ir <= l + 1) {
+      if (ir == l + 1 && arr[ir] < arr[l]) {
+        SWAP(arr[l], arr[ir]);
+        ISWAP(iarr[l], iarr[ir]);
+        SWAP3(arr3[l], arr3[ir]);
+      }
+      return;
+    } else {
+      mid = (l + ir) >> 1;
+      SWAP(arr[mid], arr[l + 1]);
+      ISWAP(iarr[mid], iarr[l + 1]);
+      SWAP3(arr3[mid], arr3[l + 1]);
+      if (arr[l] > arr[ir]) {
+        SWAP(arr[l], arr[ir]);
+        ISWAP(iarr[l], iarr[ir]);
+        SWAP3(arr3[l], arr3[ir]);
+      }
+      if (arr[l + 1] > arr[ir]) {
+        SWAP(arr[l + 1], arr[ir]);
+        ISWAP(iarr[l + 1], iarr[ir]);
+        SWAP3(arr3[l + 1], arr3[ir]);
+      }
+      if (arr[l] > arr[l + 1]) {
+        SWAP(arr[l], arr[l + 1]);
+        ISWAP(iarr[l], iarr[l + 1]);
+        SWAP3(arr3[l], arr3[l + 1]);
+      }
+      i = l + 1;
+      j = ir;
+      a = arr[l + 1];
+      ia = iarr[l + 1];
+      a3[0] = arr3[l + 1][0];
+      a3[1] = arr3[l + 1][1];
+      a3[2] = arr3[l + 1][2];
+      for (;;) {
+        do i++;
+        while (arr[i] < a);
+        do j--;
+        while (arr[j] > a);
+        if (j < i) break;
+        SWAP(arr[i], arr[j]);
+        ISWAP(iarr[i], iarr[j]);
+        SWAP3(arr3[i], arr3[j]);
+      }
+      arr[l + 1] = arr[j];
+      arr[j] = a;
+      iarr[l + 1] = iarr[j];
+      iarr[j] = ia;
+      arr3[l + 1][0] = arr3[j][0];
+      arr3[l + 1][1] = arr3[j][1];
+      arr3[l + 1][2] = arr3[j][2];
+      arr3[j][0] = a3[0];
+      arr3[j][1] = a3[1];
+      arr3[j][2] = a3[2];
+      if (j >= k) ir = j - 1;
+      if (j <= k) l = i;
+    }
+  }
+}
+
+double *ComputeSNAAtom::weights(double *rsq, double rcut, int ncounts)
+{
+  double *w=nullptr;
+  memory->destroy(w);
+  memory->create(w, ncounts, "snann:gauss_weights");
+  double rloc=0.;
+  for (int i=0; i<ncounts; i++) {
+    rloc = sqrt(rsq[i]);
+    if (rloc > rcut){
+      w[i]=0.;
+    } else {
+      w[i]=1.;
+    }
+  }
+  return w;
+}
+
+double *ComputeSNAAtom::tanh_weights(double *rsq, double rcut, double delta, int ncounts)
+{
+  double *w=nullptr;
+  memory->destroy(w);
+  memory->create(w, ncounts, "snann:gauss_weights");
+  double rloc=0.;
+
+  for (int i=0; i<ncounts; i++) {
+    rloc = sqrt(rsq[i]);
+    w[i] = 0.5*(1.-tanh((rloc-rcut)/delta));
+  }
+  return w;
+}
+
+double ComputeSNAAtom::sum_weights(double * /*rsq*/, double *w, int ncounts)
+{
+  double S=0.0;
+  for (int i=0; i<ncounts; i++) {
+    S += w[i];
+  }
+  return S;
+}
+
+double ComputeSNAAtom::get_target_rcut(double S_target, double *rsq, double rcut, int ncounts,
+                                       int weightmode, double delta)
+{
+  double S_sol = 0.0;
+  if (weightmode == 0) {
+    double *www = weights(rsq, rcut, ncounts);
+    S_sol = sum_weights(rsq, www, ncounts);
+    memory->destroy(www);
+  } else if (weightmode == 1) {
+    double *www = tanh_weights(rsq, rcut, delta, ncounts);
+    S_sol = sum_weights(rsq, www, ncounts);
+    memory->destroy(www);
+  }
+  double err = S_sol - S_target;
+  return err;
+}
+
+double *ComputeSNAAtom::dichotomie(double S_target, double a, double b, double e, double *rsq,
+                                   int ncounts, int weightmode, double delta)
+{
+
+  double d=b-a;
+  double *sol = nullptr;
+  memory->destroy(sol);
+  memory->create(sol, 2, "snann:sol");
+  double m=0.0;
+
+  do {
+    m = (a + b) / 2.0;
+    d = fabs(b - a);
+    double f_ra = get_target_rcut(S_target, rsq, a, ncounts, weightmode, delta);
+    double f_rm = get_target_rcut(S_target, rsq, m, ncounts, weightmode, delta);
+    if (f_rm == 0.0) {
+      sol[0]=m;
+      sol[1]=m;
+      return sol;
+    } else if (f_rm*f_ra > 0.0) {
+      a = m;
+    } else {
+      b = m;
+    }
+  } while (d > e);
+  sol[0]=a;
+  sol[1]=b;
+  return sol;
+}
diff --git a/src/ML-SNAP/compute_sna_atom.h b/src/ML-SNAP/compute_sna_atom.h
index 29a84c8dcf..2283865431 100644
--- a/src/ML-SNAP/compute_sna_atom.h
+++ b/src/ML-SNAP/compute_sna_atom.h
@@ -11,6 +11,10 @@
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
+/* ----------------------------------------------------------------------
+   Contributing author: Paul Lafourcade (CEA-DAM-DIF, Arpajon, France)
+------------------------------------------------------------------------- */
+
 #ifdef COMPUTE_CLASS
 // clang-format off
 ComputeStyle(sna/atom,ComputeSNAAtom);
@@ -32,10 +36,25 @@ class ComputeSNAAtom : public Compute {
   void init_list(int, class NeighList *) override;
   void compute_peratom() override;
   double memory_usage() override;
+  double rcutsq;
+
+  void select3(int, int, double *, int *, double **);
+  double * weights(double *, double, int);
+  double * tanh_weights(double *, double, double, int);
+  double sum_weights(double *, double *, int);
+  double get_target_rcut(double, double *, double, int, int, double);
+  double * dichotomie(double, double, double, double, double *, int, int, double);
 
  private:
   int nmax;
   int ncoeff;
+  int nnn;
+  int wmode;
+  double delta;
+  bool nearest_neighbors_mode;
+  double *distsq;
+  double **rlist;
+  int *nearest;
   double **cutsq;
   class NeighList *list;
   double **sna;
diff --git a/src/ML-SNAP/pair_snap.cpp b/src/ML-SNAP/pair_snap.cpp
index 3029e9212d..f0fcd5c4e7 100644
--- a/src/ML-SNAP/pair_snap.cpp
+++ b/src/ML-SNAP/pair_snap.cpp
@@ -29,8 +29,8 @@
 
 using namespace LAMMPS_NS;
 
-#define MAXLINE 1024
-#define MAXWORD 3
+static constexpr int MAXLINE = 1024;
+static constexpr int MAXWORD = 3;
 
 /* ---------------------------------------------------------------------- */
 
@@ -475,7 +475,8 @@ void PairSNAP::read_files(char *coefffilename, char *paramfilename)
                                    coefffilename, utils::getsyserror());
   }
 
-  char line[MAXLINE],*ptr;
+  char line[MAXLINE] = {'\0'};
+  char *ptr;
   int eof = 0;
   int nwords = 0;
   while (nwords == 0) {
diff --git a/src/MOFFF/angle_class2_p6.cpp b/src/MOFFF/angle_class2_p6.cpp
index bfa6a068f5..39dec0d9d6 100644
--- a/src/MOFFF/angle_class2_p6.cpp
+++ b/src/MOFFF/angle_class2_p6.cpp
@@ -34,7 +34,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/MOFFF/angle_cosine_buck6d.cpp b/src/MOFFF/angle_cosine_buck6d.cpp
index dbdf39292e..0ab9cbbf1f 100644
--- a/src/MOFFF/angle_cosine_buck6d.cpp
+++ b/src/MOFFF/angle_cosine_buck6d.cpp
@@ -34,7 +34,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/MOFFF/improper_inversion_harmonic.cpp b/src/MOFFF/improper_inversion_harmonic.cpp
index 817b35332a..cc9bd8ad94 100644
--- a/src/MOFFF/improper_inversion_harmonic.cpp
+++ b/src/MOFFF/improper_inversion_harmonic.cpp
@@ -35,8 +35,8 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/MOLECULE/angle_table.cpp b/src/MOLECULE/angle_table.cpp
index c5e65be402..b1984a6918 100644
--- a/src/MOLECULE/angle_table.cpp
+++ b/src/MOLECULE/angle_table.cpp
@@ -38,8 +38,8 @@ using MathConst::RAD2DEG;
 
 enum { LINEAR, SPLINE };
 
-#define SMALL 0.001
-#define TINY 1.E-10
+static constexpr double SMALL = 0.001;
+static constexpr double TINY = 1.E-10;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/MOLECULE/bond_table.cpp b/src/MOLECULE/bond_table.cpp
index 4068e98e8d..67cd3769ca 100644
--- a/src/MOLECULE/bond_table.cpp
+++ b/src/MOLECULE/bond_table.cpp
@@ -33,7 +33,7 @@ using namespace LAMMPS_NS;
 
 enum { NONE, LINEAR, SPLINE };
 
-#define BIGNUM 1.0e300
+static constexpr double BIGNUM = 1.0e300;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/MOLECULE/fix_cmap.cpp b/src/MOLECULE/fix_cmap.cpp
index f74c05ef06..cb4cb8cadc 100644
--- a/src/MOLECULE/fix_cmap.cpp
+++ b/src/MOLECULE/fix_cmap.cpp
@@ -39,6 +39,7 @@
 #include "memory.h"
 #include "potential_file_reader.h"
 #include "respa.h"
+#include "text_file_reader.h"
 #include "update.h"
 
 #include <cmath>
@@ -49,15 +50,14 @@ using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathConst;
 
-#define MAXLINE 256
-#define LISTDELTA 10000
-#define LB_FACTOR 1.5
+static constexpr int LISTDELTA = 10000;
+static constexpr double LB_FACTOR = 1.5;
 
-#define CMAPMAX 6   // max # of CMAP terms stored by one atom
-#define CMAPDIM 24  // grid map dimension is 24 x 24
-#define CMAPXMIN -360.0
-#define CMAPXMIN2 -180.0
-#define CMAPDX 15.0 // 360/CMAPDIM
+static constexpr int CMAPMAX = 6;   // max # of CMAP terms stored by one atom
+static constexpr int CMAPDIM = 24;  // grid map dimension is 24 x 24
+static constexpr double CMAPXMIN = -360.0;
+static constexpr double CMAPXMIN2 = -180.0;
+static constexpr double CMAPDX = 15.0; // 360/CMAPDIM
 
 /* ---------------------------------------------------------------------- */
 
@@ -86,17 +86,15 @@ FixCMAP::FixCMAP(LAMMPS *lmp, int narg, char **arg) :
   wd_section = 1;
   respa_level_support = 1;
   ilevel_respa = 0;
-
-  MPI_Comm_rank(world,&me);
-  MPI_Comm_size(world,&nprocs);
+  eflag_caller = 1;
 
   // allocate memory for CMAP data
 
   memory->create(g_axis,CMAPDIM,"cmap:g_axis");
-  memory->create(cmapgrid,6,CMAPDIM,CMAPDIM,"cmap:grid");
-  memory->create(d1cmapgrid,6,CMAPDIM,CMAPDIM,"cmap:d1grid");
-  memory->create(d2cmapgrid,6,CMAPDIM,CMAPDIM,"cmap:d2grid");
-  memory->create(d12cmapgrid,6,CMAPDIM,CMAPDIM,"cmap:d12grid");
+  memory->create(cmapgrid,CMAPMAX,CMAPDIM,CMAPDIM,"cmap:grid");
+  memory->create(d1cmapgrid,CMAPMAX,CMAPDIM,CMAPDIM,"cmap:d1grid");
+  memory->create(d2cmapgrid,CMAPMAX,CMAPDIM,CMAPDIM,"cmap:d2grid");
+  memory->create(d12cmapgrid,CMAPMAX,CMAPDIM,CMAPDIM,"cmap:d12grid");
 
   // read and setup CMAP data
 
@@ -184,10 +182,6 @@ void FixCMAP::init()
   for (i = 0; i < 6; i++)
     set_map_derivatives(cmapgrid[i],d1cmapgrid[i],d2cmapgrid[i],d12cmapgrid[i]);
 
-  // define newton_bond here in case restart file was read (not data file)
-
-  newton_bond = force->newton_bond;
-
   if (utils::strmatch(update->integrate_style,"^respa")) {
     ilevel_respa = (dynamic_cast<Respa *>(update->integrate))->nlevels-1;
     if (respa_level >= 0) ilevel_respa = MIN(respa_level,ilevel_respa);
@@ -238,6 +232,8 @@ void FixCMAP::min_setup(int vflag)
 void FixCMAP::pre_neighbor()
 {
   int i,m,atom1,atom2,atom3,atom4,atom5;
+  const int me = comm->me;
+  const int nprocs = comm->nprocs;
 
   // guesstimate initial length of local crossterm list
   // if ncmap was not set (due to read_restart, no read_data),
@@ -637,15 +633,22 @@ void FixCMAP::read_grid_map(char *cmapfile)
 {
   if (comm->me == 0) {
     try {
-      memset(&cmapgrid[0][0][0], 0, 6*CMAPDIM*CMAPDIM*sizeof(double));
+      ncrosstermtypes = 0;
+      memset(&cmapgrid[0][0][0], 0, CMAPMAX*CMAPDIM*CMAPDIM*sizeof(double));
+      utils::logmesg(lmp, "Reading CMAP parameters from: {}\n", cmapfile);
       PotentialFileReader reader(lmp, cmapfile, "cmap grid");
 
-      // there are six maps in this order.
+      // there may be up to six maps.
+      // the charmm36.cmap file has in this order.
       // alanine, alanine-proline, proline, proline-proline, glycine, glycine-proline.
-      // read as one big blob of numbers while ignoring comments
-
-      reader.next_dvector(&cmapgrid[0][0][0],6*CMAPDIM*CMAPDIM);
+      // custom CMAP files created by charmm-gui may have fewer entries
+      // read one map at a time as a blob of numbers while ignoring comments
+      // and stop reading when whe have reached EOF.
+      for (ncrosstermtypes = 0; ncrosstermtypes < CMAPMAX; ++ncrosstermtypes)
+        reader.next_dvector(&cmapgrid[ncrosstermtypes][0][0],CMAPDIM*CMAPDIM);
 
+    } catch (EOFException &) {
+      utils::logmesg(lmp, "  Read in CMAP data for {} crossterm types\n", ncrosstermtypes);
     } catch (std::exception &e) {
       error->one(FLERR,"Error reading CMAP potential file: {}", e.what());
     }
@@ -934,10 +937,6 @@ void FixCMAP::read_data_header(char *line)
   } catch (std::exception &e) {
     error->all(FLERR,"Invalid read data header line for fix cmap: {}", e.what());
   }
-
-  // not set in constructor because this fix could be defined before newton command
-
-  newton_bond = force->newton_bond;
 }
 
 /* ----------------------------------------------------------------------
@@ -957,10 +956,10 @@ void FixCMAP::read_data_section(char * /*keyword*/, int /*n*/, char *buf,
 
   // loop over lines of CMAP crossterms
   // tokenize the line into values
-  // add crossterm to one of my atoms, depending on newton_bond
+  // add crossterm to one of my atoms
 
   for (const auto &line : lines) {
-    ValueTokenizer values(line);
+    ValueTokenizer values(utils::trim_comment(line));
     try {
       values.skip();
       itype = values.next_int();
diff --git a/src/MOLECULE/fix_cmap.h b/src/MOLECULE/fix_cmap.h
index fce76aa540..1c6aba95e0 100644
--- a/src/MOLECULE/fix_cmap.h
+++ b/src/MOLECULE/fix_cmap.h
@@ -65,8 +65,7 @@ class FixCMAP : public Fix {
   double memory_usage() override;
 
  private:
-  int nprocs, me;
-  int newton_bond, eflag_caller;
+  int eflag_caller;
   int ctype, ilevel_respa;
   int ncrosstermtypes, crossterm_per_atom, maxcrossterm;
   int ncrosstermlist;
diff --git a/src/MOLECULE/pair_hbond_dreiding_lj.cpp b/src/MOLECULE/pair_hbond_dreiding_lj.cpp
index 496e368452..dbd7db7780 100644
--- a/src/MOLECULE/pair_hbond_dreiding_lj.cpp
+++ b/src/MOLECULE/pair_hbond_dreiding_lj.cpp
@@ -37,8 +37,8 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
-#define SMALL 0.001
-#define CHUNK 8
+static constexpr double SMALL = 0.001;
+static constexpr int CHUNK = 8;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/MOLECULE/pair_hbond_dreiding_morse.cpp b/src/MOLECULE/pair_hbond_dreiding_morse.cpp
index 7ec2db073d..5cc45ea234 100644
--- a/src/MOLECULE/pair_hbond_dreiding_morse.cpp
+++ b/src/MOLECULE/pair_hbond_dreiding_morse.cpp
@@ -37,8 +37,8 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
-#define SMALL 0.001
-#define CHUNK 8
+static constexpr double SMALL = 0.001;
+static constexpr int CHUNK = 8;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/MOLFILE/reader_molfile.cpp b/src/MOLFILE/reader_molfile.cpp
index 441a152ad9..43154d658b 100644
--- a/src/MOLFILE/reader_molfile.cpp
+++ b/src/MOLFILE/reader_molfile.cpp
@@ -29,7 +29,7 @@ using namespace LAMMPS_NS;
 typedef MolfileInterface MFI;
 using namespace MathConst;
 
-#define SMALL 1.0e-6
+static constexpr double SMALL = 1.0e-6;
 
 // true if the difference between two floats is "small".
 // cannot use fabsf() since it is not fully portable.
diff --git a/src/Makefile b/src/Makefile
index 196641d67d..b9f1bcbdef 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -474,7 +474,7 @@ tar:
 	@cd STUBS; $(MAKE)
 	@echo "Created $(ROOT)_src.tar.gz"
 
-check: check-whitespace check-permissions check-homepage check-errordocs check-version
+check: check-whitespace check-permissions check-homepage check-errordocs check-docs check-version
 
 check-whitespace:
 	$(PYTHON) ../tools/coding_standard/whitespace.py ..
@@ -500,8 +500,12 @@ check-errordocs:
 fix-errordocs:
 	$(PYTHON) ../tools/coding_standard/errordocs.py .. -f
 
+check-docs:
+	$(MAKE) $(MFLAGS) -C ../doc anchor_check style_check package_check role_check
+
 check-version:
-	$(PYTHON) ../tools/coding_standard/versiontags.py ..
+	$(PYTHON) ../tools/coding_standard/versiontags.py .. || echo
+
 
 format-src:
 	clang-format -i --verbose --style=file *.cpp *.h */*.cpp */*.h
diff --git a/src/OPENMP/angle_charmm_omp.cpp b/src/OPENMP/angle_charmm_omp.cpp
index 9eb91bcb27..bb5dcca0da 100644
--- a/src/OPENMP/angle_charmm_omp.cpp
+++ b/src/OPENMP/angle_charmm_omp.cpp
@@ -28,7 +28,7 @@
 #include "suffix.h"
 using namespace LAMMPS_NS;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/angle_class2_omp.cpp b/src/OPENMP/angle_class2_omp.cpp
index 300f8f2b3c..9480d2ebfb 100644
--- a/src/OPENMP/angle_class2_omp.cpp
+++ b/src/OPENMP/angle_class2_omp.cpp
@@ -28,7 +28,7 @@
 #include "suffix.h"
 using namespace LAMMPS_NS;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/angle_cosine_delta_omp.cpp b/src/OPENMP/angle_cosine_delta_omp.cpp
index 98b6a7ac56..cc8370d053 100644
--- a/src/OPENMP/angle_cosine_delta_omp.cpp
+++ b/src/OPENMP/angle_cosine_delta_omp.cpp
@@ -28,7 +28,7 @@
 #include "suffix.h"
 using namespace LAMMPS_NS;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/angle_cosine_omp.cpp b/src/OPENMP/angle_cosine_omp.cpp
index 1985633c38..e4e32bd769 100644
--- a/src/OPENMP/angle_cosine_omp.cpp
+++ b/src/OPENMP/angle_cosine_omp.cpp
@@ -28,7 +28,7 @@
 #include "suffix.h"
 using namespace LAMMPS_NS;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/angle_cosine_periodic_omp.cpp b/src/OPENMP/angle_cosine_periodic_omp.cpp
index 43b3a54a47..4abe7bd692 100644
--- a/src/OPENMP/angle_cosine_periodic_omp.cpp
+++ b/src/OPENMP/angle_cosine_periodic_omp.cpp
@@ -30,7 +30,7 @@
 using namespace LAMMPS_NS;
 using namespace MathSpecial;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
@@ -140,7 +140,7 @@ void AngleCosinePeriodicOMP::eval(int nfrom, int nto, ThrData * const thr)
     tn = 1.0;
     tn_1 = 1.0;
     tn_2 = 0.0;
-    un = 1.0;
+    un = (m==1) ? 2.0 : 1.0;
     un_1 = 2.0;
     un_2 = 0.0;
 
diff --git a/src/OPENMP/angle_cosine_shift_exp_omp.cpp b/src/OPENMP/angle_cosine_shift_exp_omp.cpp
index 5831f59fab..c131efd839 100644
--- a/src/OPENMP/angle_cosine_shift_exp_omp.cpp
+++ b/src/OPENMP/angle_cosine_shift_exp_omp.cpp
@@ -28,7 +28,7 @@
 #include "suffix.h"
 using namespace LAMMPS_NS;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/angle_cosine_shift_omp.cpp b/src/OPENMP/angle_cosine_shift_omp.cpp
index 020f7583ff..47fed634aa 100644
--- a/src/OPENMP/angle_cosine_shift_omp.cpp
+++ b/src/OPENMP/angle_cosine_shift_omp.cpp
@@ -28,7 +28,7 @@
 #include "suffix.h"
 using namespace LAMMPS_NS;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/angle_cosine_squared_omp.cpp b/src/OPENMP/angle_cosine_squared_omp.cpp
index 9d98455fe4..f14af0b3e4 100644
--- a/src/OPENMP/angle_cosine_squared_omp.cpp
+++ b/src/OPENMP/angle_cosine_squared_omp.cpp
@@ -28,7 +28,7 @@
 #include "suffix.h"
 using namespace LAMMPS_NS;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/angle_dipole_omp.cpp b/src/OPENMP/angle_dipole_omp.cpp
index 3a8099ac73..338c63e42e 100644
--- a/src/OPENMP/angle_dipole_omp.cpp
+++ b/src/OPENMP/angle_dipole_omp.cpp
@@ -29,7 +29,7 @@
 #include "suffix.h"
 using namespace LAMMPS_NS;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/angle_fourier_omp.cpp b/src/OPENMP/angle_fourier_omp.cpp
index 0c85264342..27d5f62595 100644
--- a/src/OPENMP/angle_fourier_omp.cpp
+++ b/src/OPENMP/angle_fourier_omp.cpp
@@ -28,7 +28,7 @@
 #include "suffix.h"
 using namespace LAMMPS_NS;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/angle_fourier_simple_omp.cpp b/src/OPENMP/angle_fourier_simple_omp.cpp
index a8f234918b..ad63fa0862 100644
--- a/src/OPENMP/angle_fourier_simple_omp.cpp
+++ b/src/OPENMP/angle_fourier_simple_omp.cpp
@@ -28,7 +28,7 @@
 #include "suffix.h"
 using namespace LAMMPS_NS;
 
-#define SMALL 0.0001
+static constexpr double SMALL = 0.0001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/angle_harmonic_omp.cpp b/src/OPENMP/angle_harmonic_omp.cpp
index e83146a3ad..5d779fbe21 100644
--- a/src/OPENMP/angle_harmonic_omp.cpp
+++ b/src/OPENMP/angle_harmonic_omp.cpp
@@ -28,7 +28,7 @@
 #include "suffix.h"
 using namespace LAMMPS_NS;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/angle_lepton_omp.cpp b/src/OPENMP/angle_lepton_omp.cpp
index 7e86a9e9bb..f57cf916a2 100644
--- a/src/OPENMP/angle_lepton_omp.cpp
+++ b/src/OPENMP/angle_lepton_omp.cpp
@@ -91,10 +91,17 @@ void AngleLeptonOMP::eval(int nfrom, int nto, ThrData *const thr)
 {
   std::vector<Lepton::CompiledExpression> angleforce;
   std::vector<Lepton::CompiledExpression> anglepot;
+  std::vector<bool> has_ref;
   try {
     for (const auto &expr : expressions) {
       auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp));
       angleforce.emplace_back(parsed.differentiate("theta").createCompiledExpression());
+      has_ref.push_back(true);
+      try {
+        angleforce.back().getVariableReference("theta");
+      } catch (Lepton::Exception &) {
+        has_ref.back() = false;
+      }
       if (EFLAG) anglepot.emplace_back(parsed.createCompiledExpression());
     }
   } catch (std::exception &e) {
@@ -146,8 +153,7 @@ void AngleLeptonOMP::eval(int nfrom, int nto, ThrData *const thr)
 
     const double dtheta = acos(c) - theta0[type];
     const int idx = type2expression[type];
-    angleforce[idx].getVariableReference("theta") = dtheta;
-
+    if (has_ref[idx]) angleforce[idx].getVariableReference("theta") = dtheta;
     const double a = -angleforce[idx].evaluate() * s;
     const double a11 = a * c / rsq1;
     const double a12 = -a / (r1 * r2);
@@ -183,7 +189,11 @@ void AngleLeptonOMP::eval(int nfrom, int nto, ThrData *const thr)
 
     double eangle = 0.0;
     if (EFLAG) {
-      anglepot[idx].getVariableReference("theta") = dtheta;
+      try {
+        anglepot[idx].getVariableReference("theta") = dtheta;
+      } catch (Lepton::Exception &) {
+        ;    // ignore -> constant force
+      }
       eangle = anglepot[idx].evaluate() - offset[type];
     }
     if (EVFLAG)
diff --git a/src/OPENMP/angle_quartic_omp.cpp b/src/OPENMP/angle_quartic_omp.cpp
index bb9a9837d2..7c1d1b4a17 100644
--- a/src/OPENMP/angle_quartic_omp.cpp
+++ b/src/OPENMP/angle_quartic_omp.cpp
@@ -28,7 +28,7 @@
 #include "suffix.h"
 using namespace LAMMPS_NS;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/angle_spica_omp.cpp b/src/OPENMP/angle_spica_omp.cpp
index 728e2ff435..f1dd7d40b5 100644
--- a/src/OPENMP/angle_spica_omp.cpp
+++ b/src/OPENMP/angle_spica_omp.cpp
@@ -30,7 +30,7 @@
 using namespace LAMMPS_NS;
 using namespace LJSPICAParms;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/angle_table_omp.cpp b/src/OPENMP/angle_table_omp.cpp
index d75bd8a694..11436723b1 100644
--- a/src/OPENMP/angle_table_omp.cpp
+++ b/src/OPENMP/angle_table_omp.cpp
@@ -29,7 +29,7 @@
 #include "suffix.h"
 using namespace LAMMPS_NS;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/bond_lepton_omp.cpp b/src/OPENMP/bond_lepton_omp.cpp
index 0029062366..d9982b08f8 100644
--- a/src/OPENMP/bond_lepton_omp.cpp
+++ b/src/OPENMP/bond_lepton_omp.cpp
@@ -89,10 +89,17 @@ void BondLeptonOMP::eval(int nfrom, int nto, ThrData *const thr)
 {
   std::vector<Lepton::CompiledExpression> bondforce;
   std::vector<Lepton::CompiledExpression> bondpot;
+  std::vector<bool> has_ref;
   try {
     for (const auto &expr : expressions) {
       auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp));
       bondforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
+      has_ref.push_back(true);
+      try {
+        bondforce.back().getVariableReference("r");
+      } catch (Lepton::Exception &) {
+        has_ref.back() = false;
+      }
       if (EFLAG) bondpot.emplace_back(parsed.createCompiledExpression());
     }
   } catch (std::exception &e) {
@@ -122,7 +129,7 @@ void BondLeptonOMP::eval(int nfrom, int nto, ThrData *const thr)
 
     double fbond = 0.0;
     if (r > 0.0) {
-      bondforce[idx].getVariableReference("r") = dr;
+      if (has_ref[idx]) bondforce[idx].getVariableReference("r") = dr;
       fbond = -bondforce[idx].evaluate() / r;
     }
 
@@ -142,7 +149,11 @@ void BondLeptonOMP::eval(int nfrom, int nto, ThrData *const thr)
 
     double ebond = 0.0;
     if (EFLAG) {
-      bondpot[idx].getVariableReference("r") = dr;
+      try {
+        bondpot[idx].getVariableReference("r") = dr;
+      } catch (Lepton::Exception &) {
+        ;    // ignore -> constant potential
+      }
       ebond = bondpot[idx].evaluate() - offset[type];
     }
     if (EVFLAG)
diff --git a/src/OPENMP/dihedral_charmm_omp.cpp b/src/OPENMP/dihedral_charmm_omp.cpp
index e78a3e8919..42fbea419f 100644
--- a/src/OPENMP/dihedral_charmm_omp.cpp
+++ b/src/OPENMP/dihedral_charmm_omp.cpp
@@ -30,8 +30,8 @@
 #include "suffix.h"
 using namespace LAMMPS_NS;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/dihedral_class2_omp.cpp b/src/OPENMP/dihedral_class2_omp.cpp
index 8ec39ceee8..ffcdf4fa1a 100644
--- a/src/OPENMP/dihedral_class2_omp.cpp
+++ b/src/OPENMP/dihedral_class2_omp.cpp
@@ -29,8 +29,8 @@
 #include "omp_compat.h"
 using namespace LAMMPS_NS;
 
-#define TOLERANCE 0.05
-#define SMALL     0.0000001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.0000001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/dihedral_cosine_shift_exp_omp.cpp b/src/OPENMP/dihedral_cosine_shift_exp_omp.cpp
index 106db995c4..4032f9a13d 100644
--- a/src/OPENMP/dihedral_cosine_shift_exp_omp.cpp
+++ b/src/OPENMP/dihedral_cosine_shift_exp_omp.cpp
@@ -29,8 +29,8 @@
 #include "suffix.h"
 using namespace LAMMPS_NS;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/dihedral_fourier_omp.cpp b/src/OPENMP/dihedral_fourier_omp.cpp
index aae1eec559..34a145a06d 100644
--- a/src/OPENMP/dihedral_fourier_omp.cpp
+++ b/src/OPENMP/dihedral_fourier_omp.cpp
@@ -29,7 +29,7 @@
 #include "suffix.h"
 using namespace LAMMPS_NS;
 
-#define TOLERANCE 0.05
+static constexpr double TOLERANCE = 0.05;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/dihedral_harmonic_omp.cpp b/src/OPENMP/dihedral_harmonic_omp.cpp
index 8737c61cdd..f8df7dd41e 100644
--- a/src/OPENMP/dihedral_harmonic_omp.cpp
+++ b/src/OPENMP/dihedral_harmonic_omp.cpp
@@ -29,8 +29,8 @@
 #include "suffix.h"
 using namespace LAMMPS_NS;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/dihedral_helix_omp.cpp b/src/OPENMP/dihedral_helix_omp.cpp
index d778285974..64ba0b9af5 100644
--- a/src/OPENMP/dihedral_helix_omp.cpp
+++ b/src/OPENMP/dihedral_helix_omp.cpp
@@ -31,9 +31,9 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
-#define SMALLER   0.00001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
+static constexpr double SMALLER =   0.00001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/dihedral_lepton_omp.cpp b/src/OPENMP/dihedral_lepton_omp.cpp
index 13a1328058..37748ce9d5 100644
--- a/src/OPENMP/dihedral_lepton_omp.cpp
+++ b/src/OPENMP/dihedral_lepton_omp.cpp
@@ -19,9 +19,9 @@
 #include "atom.h"
 #include "comm.h"
 #include "force.h"
+#include "math_extra.h"
 #include "neighbor.h"
 #include "suffix.h"
-#include "math_extra.h"
 
 #include <cmath>
 
@@ -94,10 +94,17 @@ void DihedralLeptonOMP::eval(int nfrom, int nto, ThrData *const thr)
 {
   std::vector<Lepton::CompiledExpression> dihedralforce;
   std::vector<Lepton::CompiledExpression> dihedralpot;
+  std::vector<bool> has_ref;
   try {
     for (const auto &expr : expressions) {
       auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp));
       dihedralforce.emplace_back(parsed.differentiate("phi").createCompiledExpression());
+      has_ref.push_back(true);
+      try {
+        dihedralforce.back().getVariableReference("phi");
+      } catch (Lepton::Exception &) {
+        has_ref.back() = false;
+      }
       if (EFLAG) dihedralpot.emplace_back(parsed.createCompiledExpression());
     }
   } catch (std::exception &e) {
@@ -106,7 +113,7 @@ void DihedralLeptonOMP::eval(int nfrom, int nto, ThrData *const thr)
 
   const double *const *const x = atom->x;
   auto *_noalias const f = (dbl3_t *) thr->get_f()[0];
-  const int * const * const dihedrallist = neighbor->dihedrallist;
+  const int *const *const dihedrallist = neighbor->dihedrallist;
   const int nlocal = atom->nlocal;
 
   // The dihedral angle "phi" is the angle between n123 and n234
@@ -279,7 +286,7 @@ void DihedralLeptonOMP::eval(int nfrom, int nto, ThrData *const thr)
     }
 
     const int idx = type2expression[type];
-    dihedralforce[idx].getVariableReference("phi") = phi;
+    if (has_ref[idx]) dihedralforce[idx].getVariableReference("phi") = phi;
     double m_du_dphi = -dihedralforce[idx].evaluate();
 
     // ----- Step 4: Calculate the force direction in real space -----
@@ -323,7 +330,11 @@ void DihedralLeptonOMP::eval(int nfrom, int nto, ThrData *const thr)
 
     double edihedral = 0.0;
     if (EFLAG) {
-      dihedralpot[idx].getVariableReference("phi") = phi;
+      try {
+        dihedralpot[idx].getVariableReference("phi") = phi;
+      } catch (Lepton::Exception &) {
+        ;    // ignore -> constant potential
+      }
       edihedral = dihedralpot[idx].evaluate();
     }
     if (EVFLAG)
diff --git a/src/OPENMP/dihedral_multi_harmonic_omp.cpp b/src/OPENMP/dihedral_multi_harmonic_omp.cpp
index d3f4447d08..57c4b77565 100644
--- a/src/OPENMP/dihedral_multi_harmonic_omp.cpp
+++ b/src/OPENMP/dihedral_multi_harmonic_omp.cpp
@@ -29,8 +29,8 @@
 #include "suffix.h"
 using namespace LAMMPS_NS;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/dihedral_nharmonic_omp.cpp b/src/OPENMP/dihedral_nharmonic_omp.cpp
index f9a3f1328d..34e54f6c7c 100644
--- a/src/OPENMP/dihedral_nharmonic_omp.cpp
+++ b/src/OPENMP/dihedral_nharmonic_omp.cpp
@@ -29,8 +29,8 @@
 #include "suffix.h"
 using namespace LAMMPS_NS;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/dihedral_opls_omp.cpp b/src/OPENMP/dihedral_opls_omp.cpp
index fbdc408c4d..ccfd2ea42c 100644
--- a/src/OPENMP/dihedral_opls_omp.cpp
+++ b/src/OPENMP/dihedral_opls_omp.cpp
@@ -29,9 +29,9 @@
 #include "suffix.h"
 using namespace LAMMPS_NS;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
-#define SMALLER   0.00001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
+static constexpr double SMALLER =   0.00001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/dihedral_quadratic_omp.cpp b/src/OPENMP/dihedral_quadratic_omp.cpp
index 43d97da5f2..52b7c331fd 100644
--- a/src/OPENMP/dihedral_quadratic_omp.cpp
+++ b/src/OPENMP/dihedral_quadratic_omp.cpp
@@ -31,9 +31,9 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
-#define SMALLER   0.00001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
+static constexpr double SMALLER =   0.00001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/dihedral_table_omp.cpp b/src/OPENMP/dihedral_table_omp.cpp
index 7405ae4a13..db90e32e6b 100644
--- a/src/OPENMP/dihedral_table_omp.cpp
+++ b/src/OPENMP/dihedral_table_omp.cpp
@@ -34,8 +34,8 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathExtra;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
 
 // --------------------------------------------
 // ------- Calculate the dihedral angle -------
diff --git a/src/OPENMP/ewald_omp.cpp b/src/OPENMP/ewald_omp.cpp
index ee2963659a..fc32efd3c0 100644
--- a/src/OPENMP/ewald_omp.cpp
+++ b/src/OPENMP/ewald_omp.cpp
@@ -31,7 +31,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define SMALL 0.00001
+static constexpr double SMALL = 0.00001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/fix_nh_omp.cpp b/src/OPENMP/fix_nh_omp.cpp
index a7fe2738ca..d3565c4994 100644
--- a/src/OPENMP/fix_nh_omp.cpp
+++ b/src/OPENMP/fix_nh_omp.cpp
@@ -33,7 +33,7 @@ using namespace FixConst;
 enum{NOBIAS,BIAS};
 enum{ISO,ANISO,TRICLINIC};
 
-#define TILTMAX 1.5
+static constexpr double TILTMAX = 1.5;
 
 typedef struct { double x,y,z; } dbl3_t;
 
diff --git a/src/OPENMP/fix_nh_sphere_omp.cpp b/src/OPENMP/fix_nh_sphere_omp.cpp
index 93a674cbce..beaa4fd1cb 100644
--- a/src/OPENMP/fix_nh_sphere_omp.cpp
+++ b/src/OPENMP/fix_nh_sphere_omp.cpp
@@ -28,7 +28,7 @@ using namespace FixConst;
 
 enum{NOBIAS,BIAS};
 
-#define INERTIA 0.4          // moment of inertia prefactor for sphere
+static constexpr double INERTIA = 0.4;          // moment of inertia prefactor for sphere
 
 typedef struct { double x,y,z; } dbl3_t;
 
@@ -37,8 +37,8 @@ typedef struct { double x,y,z; } dbl3_t;
 FixNHSphereOMP::FixNHSphereOMP(LAMMPS *lmp, int narg, char **arg) :
   FixNHOMP(lmp, narg, arg)
 {
-  if (!atom->sphere_flag)
-    error->all(FLERR,"Fix nvt/nph/npt sphere requires atom style sphere");
+  if (!atom->omega_flag) error->all(FLERR,"Fix {} requires atom attribute omega", style);
+  if (!atom->radius_flag) error->all(FLERR,"Fix {} requires atom attribute radius", style);
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/OPENMP/fix_nve_sphere_omp.cpp b/src/OPENMP/fix_nve_sphere_omp.cpp
index be3fc8d147..8c65a0df6a 100644
--- a/src/OPENMP/fix_nve_sphere_omp.cpp
+++ b/src/OPENMP/fix_nve_sphere_omp.cpp
@@ -25,7 +25,7 @@ using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathExtra;
 
-#define INERTIA 0.4          // moment of inertia prefactor for sphere
+static constexpr double INERTIA = 0.4;          // moment of inertia prefactor for sphere
 
 enum{NONE,DIPOLE};
 enum{NODLM,DLM};
diff --git a/src/OPENMP/improper_class2_omp.cpp b/src/OPENMP/improper_class2_omp.cpp
index b7387ea75a..38a2f12f5e 100644
--- a/src/OPENMP/improper_class2_omp.cpp
+++ b/src/OPENMP/improper_class2_omp.cpp
@@ -29,8 +29,8 @@
 #include "suffix.h"
 using namespace LAMMPS_NS;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/improper_cossq_omp.cpp b/src/OPENMP/improper_cossq_omp.cpp
index 3bfc86bcab..4ccc0d730a 100644
--- a/src/OPENMP/improper_cossq_omp.cpp
+++ b/src/OPENMP/improper_cossq_omp.cpp
@@ -29,8 +29,8 @@
 #include "suffix.h"
 using namespace LAMMPS_NS;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/improper_cvff_omp.cpp b/src/OPENMP/improper_cvff_omp.cpp
index 310806d872..b7fe9ffb03 100644
--- a/src/OPENMP/improper_cvff_omp.cpp
+++ b/src/OPENMP/improper_cvff_omp.cpp
@@ -29,8 +29,8 @@
 #include "suffix.h"
 using namespace LAMMPS_NS;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/improper_fourier_omp.cpp b/src/OPENMP/improper_fourier_omp.cpp
index b70e3fb0ac..000bc02066 100644
--- a/src/OPENMP/improper_fourier_omp.cpp
+++ b/src/OPENMP/improper_fourier_omp.cpp
@@ -29,8 +29,8 @@
 #include "suffix.h"
 using namespace LAMMPS_NS;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/improper_harmonic_omp.cpp b/src/OPENMP/improper_harmonic_omp.cpp
index d17fe9407d..12c2699663 100644
--- a/src/OPENMP/improper_harmonic_omp.cpp
+++ b/src/OPENMP/improper_harmonic_omp.cpp
@@ -29,8 +29,8 @@
 #include "suffix.h"
 using namespace LAMMPS_NS;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/improper_ring_omp.cpp b/src/OPENMP/improper_ring_omp.cpp
index 899727c685..366d03e02d 100644
--- a/src/OPENMP/improper_ring_omp.cpp
+++ b/src/OPENMP/improper_ring_omp.cpp
@@ -31,8 +31,8 @@
 using namespace LAMMPS_NS;
 using namespace MathSpecial;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/improper_umbrella_omp.cpp b/src/OPENMP/improper_umbrella_omp.cpp
index e5d03863df..50c1ce4676 100644
--- a/src/OPENMP/improper_umbrella_omp.cpp
+++ b/src/OPENMP/improper_umbrella_omp.cpp
@@ -29,8 +29,8 @@
 #include "suffix.h"
 using namespace LAMMPS_NS;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/msm_cg_omp.cpp b/src/OPENMP/msm_cg_omp.cpp
index 6904830fed..00230c9ece 100644
--- a/src/OPENMP/msm_cg_omp.cpp
+++ b/src/OPENMP/msm_cg_omp.cpp
@@ -38,8 +38,8 @@
 
 using namespace LAMMPS_NS;
 
-#define OFFSET 16384
-#define SMALLQ 0.00001
+static constexpr int OFFSET = 16384;
+static constexpr double SMALLQ = 0.00001;
 
 enum{REVERSE_RHO,REVERSE_AD,REVERSE_AD_PERATOM};
 enum{FORWARD_RHO,FORWARD_AD,FORWARD_AD_PERATOM};
diff --git a/src/OPENMP/npair_full_bin_ghost_omp.cpp b/src/OPENMP/npair_bin_ghost_omp.cpp
similarity index 58%
rename from src/OPENMP/npair_full_bin_ghost_omp.cpp
rename to src/OPENMP/npair_bin_ghost_omp.cpp
index 0825d61b49..93dd7b1110 100644
--- a/src/OPENMP/npair_full_bin_ghost_omp.cpp
+++ b/src/OPENMP/npair_bin_ghost_omp.cpp
@@ -12,30 +12,41 @@
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
-#include "omp_compat.h"
-#include "npair_full_bin_ghost_omp.h"
+#include "npair_bin_ghost_omp.h"
 #include "npair_omp.h"
-#include "neigh_list.h"
+#include "omp_compat.h"
+
 #include "atom.h"
 #include "atom_vec.h"
-#include "molecule.h"
 #include "domain.h"
-#include "my_page.h"
 #include "error.h"
+#include "molecule.h"
+#include "my_page.h"
+#include "neigh_list.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-NPairFullBinGhostOmp::NPairFullBinGhostOmp(LAMMPS *lmp) : NPair(lmp) {}
+template<int HALF>
+NPairBinGhostOmp<HALF>::NPairBinGhostOmp(LAMMPS *lmp) : NPair(lmp) {}
 
 /* ----------------------------------------------------------------------
-   binned neighbor list construction for all neighbors
-   include neighbors of ghost atoms, but no "special neighbors" for ghosts
-   every neighbor pair appears in list of both atoms i and j
+   Full:
+     binned neighbor list construction for all neighbors
+     include neighbors of ghost atoms, but no "special neighbors" for ghosts
+     every neighbor pair appears in list of both atoms i and j
+   Half + Newtoff:
+     binned neighbor list construction with partial Newton's 3rd law
+     include neighbors of ghost atoms, but no "special neighbors" for ghosts
+     owned and ghost atoms check own bin and other bins in stencil
+     pair stored once if i,j are both owned and i < j
+     pair stored by me if i owned and j ghost (also stored by proc owning j)
+     pair stored once if i,j are both ghost and i < j
 ------------------------------------------------------------------------- */
 
-void NPairFullBinGhostOmp::build(NeighList *list)
+template<int HALF>
+void NPairBinGhostOmp<HALF>::build(NeighList *list)
 {
   const int nlocal = atom->nlocal;
   const int nall = nlocal + atom->nghost;
@@ -48,10 +59,10 @@ void NPairFullBinGhostOmp::build(NeighList *list)
 #endif
   NPAIR_OMP_SETUP(nall);
 
-  int i,j,k,n,itype,jtype,ibin,which,imol,iatom;
+  int i, j, k, n, itype, jtype, ibin, which, imol, iatom;
   tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int xbin,ybin,zbin,xbin2,ybin2,zbin2;
+  double xtmp, ytmp, ztmp, delx, dely, delz, rsq;
+  int xbin, ybin, zbin, xbin2, ybin2, zbin2;
   int *neighptr;
 
   double **x = atom->x;
@@ -93,43 +104,56 @@ void NPairFullBinGhostOmp::build(NeighList *list)
 
     // loop over all atoms in surrounding bins in stencil including self
     // when i is a ghost atom, must check if stencil bin is out of bounds
-    // skip i = j
     // no molecular test when i = ghost atom
 
     if (i < nlocal) {
       ibin = atom2bin[i];
       for (k = 0; k < nstencil; k++) {
-        for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
-          if (i == j) continue;
+        for (j = binhead[ibin + stencil[k]]; j >= 0; j = bins[j]) {
+          if (HALF) {
+            // Half neighbor list, newton off
+            // only store pair if i < j
+            // stores own/own pairs only once
+            // stores own/ghost pairs on both procs
+            // stores ghost/ghost pairs only once
+            if (j <= i) continue;
+          } else {
+            // Full neighbor list
+            // only skip i = j
+            if (i == j) continue;
+          }
 
           jtype = type[j];
-          if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
+          if (exclude && exclusion(i, j, itype, jtype, mask, molecule)) continue;
 
           delx = xtmp - x[j][0];
           dely = ytmp - x[j][1];
           delz = ztmp - x[j][2];
-          rsq = delx*delx + dely*dely + delz*delz;
+          rsq = delx * delx + dely * dely + delz * delz;
 
           if (rsq <= cutneighsq[itype][jtype]) {
             if (molecular != Atom::ATOMIC) {
               if (!moltemplate)
-                which = find_special(special[i],nspecial[i],tag[j]);
+                which = find_special(special[i], nspecial[i], tag[j]);
               else if (imol >= 0)
-                which = find_special(onemols[imol]->special[iatom],
-                                     onemols[imol]->nspecial[iatom],
-                                     tag[j]-tagprev);
-              else which = 0;
-              if (which == 0) neighptr[n++] = j;
-              else if (domain->minimum_image_check(delx,dely,delz))
+                which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom],
+                                     tag[j] - tagprev);
+              else
+                which = 0;
+              if (which == 0)
                 neighptr[n++] = j;
-              else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-            } else neighptr[n++] = j;
+              else if (domain->minimum_image_check(delx, dely, delz))
+                neighptr[n++] = j;
+              else if (which > 0)
+                neighptr[n++] = j ^ (which << SBBITS);
+            } else
+              neighptr[n++] = j;
           }
         }
       }
 
     } else {
-      ibin = coord2bin(x[i],xbin,ybin,zbin);
+      ibin = coord2bin(x[i], xbin, ybin, zbin);
       for (k = 0; k < nstencil; k++) {
         xbin2 = xbin + stencilxyz[k][0];
         ybin2 = ybin + stencilxyz[k][1];
@@ -137,16 +161,20 @@ void NPairFullBinGhostOmp::build(NeighList *list)
         if (xbin2 < 0 || xbin2 >= mbinx ||
             ybin2 < 0 || ybin2 >= mbiny ||
             zbin2 < 0 || zbin2 >= mbinz) continue;
-        for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
-          if (i == j) continue;
+        for (j = binhead[ibin + stencil[k]]; j >= 0; j = bins[j]) {
+          if (HALF) {
+            if (j <= i) continue;
+          } else {
+            if (i == j) continue;
+          }
 
           jtype = type[j];
-          if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
+          if (exclude && exclusion(i, j, itype, jtype, mask, molecule)) continue;
 
           delx = xtmp - x[j][0];
           dely = ytmp - x[j][1];
           delz = ztmp - x[j][2];
-          rsq = delx*delx + dely*dely + delz*delz;
+          rsq = delx * delx + dely * dely + delz * delz;
 
           if (rsq <= cutneighghostsq[itype][jtype]) neighptr[n++] = j;
         }
@@ -157,10 +185,14 @@ void NPairFullBinGhostOmp::build(NeighList *list)
     firstneigh[i] = neighptr;
     numneigh[i] = n;
     ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
+    if (ipage.status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
   }
   NPAIR_OMP_CLOSE;
   list->inum = nlocal;
   list->gnum = nall - nlocal;
 }
+
+namespace LAMMPS_NS {
+template class NPairBinGhostOmp<0>;
+template class NPairBinGhostOmp<1>;
+}
diff --git a/src/OPENMP/npair_full_bin_ghost_omp.h b/src/OPENMP/npair_bin_ghost_omp.h
similarity index 67%
rename from src/OPENMP/npair_full_bin_ghost_omp.h
rename to src/OPENMP/npair_bin_ghost_omp.h
index 6de134dcf8..df18886e91 100644
--- a/src/OPENMP/npair_full_bin_ghost_omp.h
+++ b/src/OPENMP/npair_bin_ghost_omp.h
@@ -13,23 +13,29 @@
 
 #ifdef NPAIR_CLASS
 // clang-format off
+typedef NPairBinGhostOmp<0> NPairFullBinGhostOmp;
 NPairStyle(full/bin/ghost/omp,
            NPairFullBinGhostOmp,
-           NP_FULL | NP_BIN | NP_GHOST | NP_OMP | NP_NEWTON | NP_NEWTOFF |
-           NP_ORTHO | NP_TRI);
+           NP_FULL | NP_BIN | NP_GHOST | NP_OMP | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairBinGhostOmp<1> NPairHalfBinNewtoffGhostOmp;
+NPairStyle(half/bin/newtoff/ghost/omp,
+           NPairHalfBinNewtoffGhostOmp,
+           NP_HALF | NP_BIN | NP_GHOST | NP_OMP | NP_NEWTOFF | NP_ORTHO | NP_TRI);
 // clang-format on
 #else
 
-#ifndef LMP_NPAIR_FULL_BIN_GHOST_OMP_H
-#define LMP_NPAIR_FULL_BIN_GHOST_OMP_H
+#ifndef LMP_NPAIR_BIN_GHOST_OMP_H
+#define LMP_NPAIR_BIN_GHOST_OMP_H
 
 #include "npair.h"
 
 namespace LAMMPS_NS {
 
-class NPairFullBinGhostOmp : public NPair {
+template<int HALF>
+class NPairBinGhostOmp : public NPair {
  public:
-  NPairFullBinGhostOmp(class LAMMPS *);
+  NPairBinGhostOmp(class LAMMPS *);
   void build(class NeighList *) override;
 };
 
diff --git a/src/OPENMP/npair_bin_omp.cpp b/src/OPENMP/npair_bin_omp.cpp
new file mode 100644
index 0000000000..5b2189dec2
--- /dev/null
+++ b/src/OPENMP/npair_bin_omp.cpp
@@ -0,0 +1,277 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "npair_bin_omp.h"
+#include "npair_omp.h"
+#include "omp_compat.h"
+
+#include "atom.h"
+#include "atom_vec.h"
+#include "domain.h"
+#include "error.h"
+#include "force.h"
+#include "molecule.h"
+#include "my_page.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+template<int HALF, int NEWTON, int TRI, int SIZE, int ATOMONLY>
+NPairBinOmp<HALF, NEWTON, TRI, SIZE, ATOMONLY>::NPairBinOmp(LAMMPS *lmp) : NPair(lmp) {}
+
+/* ----------------------------------------------------------------------
+   Full:
+     binned neighbor list construction for all neighbors
+     every neighbor pair appears in list of both atoms i and j
+   Half + Newtoff:
+     binned neighbor list construction with partial Newton's 3rd law
+     each owned atom i checks own bin and other bins in stencil
+     pair stored once if i,j are both owned and i < j
+     pair stored by me if j is ghost (also stored by proc owning j)
+   Half + Newton:
+     binned neighbor list construction with full Newton's 3rd law
+     each owned atom i checks its own bin and other bins in Newton stencil
+     every pair stored exactly once by some processor
+------------------------------------------------------------------------- */
+
+template<int HALF, int NEWTON, int TRI, int SIZE, int ATOMONLY>
+void NPairBinOmp<HALF, NEWTON, TRI, SIZE, ATOMONLY>::build(NeighList *list)
+{
+  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
+  const int molecular = atom->molecular;
+  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
+  const double delta = 0.01 * force->angstrom;
+
+  NPAIR_OMP_INIT;
+#if defined(_OPENMP)
+#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
+#endif
+  NPAIR_OMP_SETUP(nlocal);
+
+  int i, j, jh, k, n, itype, jtype, ibin, bin_start, which, imol, iatom;
+  tagint itag, jtag, tagprev;
+  double xtmp, ytmp, ztmp, delx, dely, delz, rsq, radsum, cut, cutsq;
+  int *neighptr;
+
+  double **x = atom->x;
+  double *radius = atom->radius;
+  int *type = atom->type;
+  int *mask = atom->mask;
+  tagint *tag = atom->tag;
+  tagint *molecule = atom->molecule;
+  tagint **special = atom->special;
+  int **nspecial = atom->nspecial;
+  int *molindex = atom->molindex;
+  int *molatom = atom->molatom;
+  Molecule **onemols = atom->avec->onemols;
+
+  int history = list->history;
+  int mask_history = 1 << HISTBITS;
+
+  int *ilist = list->ilist;
+  int *numneigh = list->numneigh;
+  int **firstneigh = list->firstneigh;
+
+  // each thread has its own page allocator
+  MyPage<int> &ipage = list->ipage[tid];
+  ipage.reset();
+
+  // loop over owned atoms, storing neighbors
+
+  for (i = ifrom; i < ito; i++) {
+
+    n = 0;
+    neighptr = ipage.vget();
+
+    itag = tag[i];
+    itype = type[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    if (!ATOMONLY) {
+      if (moltemplate) {
+        imol = molindex[i];
+        iatom = molatom[i];
+        tagprev = tag[i] - iatom - 1;
+      }
+    }
+
+    // loop over all atoms in surrounding bins in stencil including self
+    // skip i = j
+
+    ibin = atom2bin[i];
+
+    for (k = 0; k < nstencil; k++) {
+      bin_start = binhead[ibin + stencil[k]];
+      if (HALF && NEWTON && (!TRI)) {
+        if (k == 0) {
+          // Half neighbor list, newton on, orthonormal
+          // loop over rest of atoms in i's bin, ghosts are at end of linked list
+          bin_start = bins[i];
+        }
+      }
+
+      for (j = bin_start; j >= 0; j = bins[j]) {
+        if (!HALF) {
+          // Full neighbor list
+          // only skip i = j
+          if (i == j) continue;
+        } else if (!NEWTON) {
+          // Half neighbor list, newton off
+          // only store pair if i < j
+          // stores own/own pairs only once
+          // stores own/ghost pairs on both procs
+          if (j <= i) continue;
+        } else if (TRI) {
+          // Half neighbor list, newton on, triclinic
+          // for triclinic, bin stencil is full in all 3 dims
+          // must use itag/jtag to eliminate half the I/J interactions
+          // cannot use I/J exact coord comparision
+          //   b/c transforming orthog -> lambda -> orthog for ghost atoms
+          //   with an added PBC offset can shift all 3 coords by epsilon
+          if (j <= i) continue;
+          if (j >= nlocal) {
+            jtag = tag[j];
+            if (itag > jtag) {
+              if ((itag + jtag) % 2 == 0) continue;
+            } else if (itag < jtag) {
+              if ((itag + jtag) % 2 == 1) continue;
+            } else {
+              if (fabs(x[j][2] - ztmp) > delta) {
+                if (x[j][2] < ztmp) continue;
+              } else if (fabs(x[j][1] - ytmp) > delta) {
+                if (x[j][1] < ytmp) continue;
+              } else {
+                if (x[j][0] < xtmp) continue;
+              }
+            }
+          }
+        } else {
+          // Half neighbor list, newton on, orthonormal
+          // store every pair for every bin in stencil, except for i's bin
+
+          if (k == 0) {
+            // if j is owned atom, store it, since j is beyond i in linked list
+            // if j is ghost, only store if j coords are "above and to the "right" of i
+            if (j >= nlocal) {
+              if (x[j][2] < ztmp) continue;
+              if (x[j][2] == ztmp) {
+                if (x[j][1] < ytmp) continue;
+                if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
+              }
+            }
+          }
+        }
+
+        jtype = type[j];
+        if (exclude && exclusion(i, j, itype, jtype, mask, molecule)) continue;
+
+        delx = xtmp - x[j][0];
+        dely = ytmp - x[j][1];
+        delz = ztmp - x[j][2];
+        rsq = delx * delx + dely * dely + delz * delz;
+
+        if (SIZE) {
+          radsum = radius[i] + radius[j];
+          cut = radsum + skin;
+          cutsq = cut * cut;
+
+          if (ATOMONLY) {
+            if (rsq <= cutsq) {
+              jh = j;
+              if (history && rsq < radsum * radsum)
+                jh = jh ^ mask_history;
+              neighptr[n++] = jh;
+            }
+          } else {
+            if (rsq <= cutsq) {
+              jh = j;
+              if (history && rsq < radsum * radsum)
+                jh = jh ^ mask_history;
+
+              if (molecular != Atom::ATOMIC) {
+                if (!moltemplate)
+                  which = find_special(special[i], nspecial[i], tag[j]);
+                else if (imol >= 0)
+                  which = find_special(onemols[imol]->special[iatom], onemols[imol]  ->nspecial[iatom],
+                                       tag[j] - tagprev);
+                else
+                  which = 0;
+                if (which == 0)
+                  neighptr[n++] = jh;
+                else if (domain->minimum_image_check(delx, dely, delz))
+                  neighptr[n++] = jh;
+                else if (which > 0)
+                  neighptr[n++] = jh ^ (which << SBBITS);
+              } else
+                neighptr[n++] = jh;
+            }
+          }
+        } else {
+          if (ATOMONLY) {
+            if (rsq <= cutneighsq[itype][jtype]) neighptr[n++] = j;
+          } else {
+            if (rsq <= cutneighsq[itype][jtype]) {
+              if (molecular != Atom::ATOMIC) {
+                if (!moltemplate)
+                  which = find_special(special[i], nspecial[i], tag[j]);
+                else if (imol >= 0)
+                  which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom],
+                                       tag[j] - tagprev);
+                else which = 0;
+                if (which == 0)
+                  neighptr[n++] = j;
+                else if (domain->minimum_image_check(delx, dely, delz))
+                  neighptr[n++] = j;
+                else if (which > 0)
+                  neighptr[n++] = j ^ (which << SBBITS);
+              } else
+                neighptr[n++] = j;
+            }
+          }
+        }
+      }
+    }
+
+    ilist[i] = i;
+    firstneigh[i] = neighptr;
+    numneigh[i] = n;
+    ipage.vgot(n);
+    if (ipage.status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
+  }
+  NPAIR_OMP_CLOSE;
+  list->inum = nlocal;
+  if (!HALF) list->gnum = 0;
+}
+
+namespace LAMMPS_NS {
+template class NPairBinOmp<0,1,0,0,0>;
+template class NPairBinOmp<1,0,0,0,0>;
+template class NPairBinOmp<1,1,0,0,0>;
+template class NPairBinOmp<1,1,1,0,0>;
+template class NPairBinOmp<0,1,0,1,0>;
+template class NPairBinOmp<1,0,0,1,0>;
+template class NPairBinOmp<1,1,0,1,0>;
+template class NPairBinOmp<1,1,1,1,0>;
+template class NPairBinOmp<0,1,0,0,1>;
+template class NPairBinOmp<1,0,0,0,1>;
+template class NPairBinOmp<1,1,0,0,1>;
+template class NPairBinOmp<1,1,1,0,1>;
+template class NPairBinOmp<0,1,0,1,1>;
+template class NPairBinOmp<1,0,0,1,1>;
+template class NPairBinOmp<1,1,0,1,1>;
+template class NPairBinOmp<1,1,1,1,1>;
+}
diff --git a/src/OPENMP/npair_bin_omp.h b/src/OPENMP/npair_bin_omp.h
new file mode 100644
index 0000000000..dfe5429ff4
--- /dev/null
+++ b/src/OPENMP/npair_bin_omp.h
@@ -0,0 +1,119 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef NPAIR_CLASS
+// clang-format off
+typedef NPairBinOmp<0, 1, 0, 0, 0> NPairFullBinOmp;
+NPairStyle(full/bin/omp,
+           NPairFullBinOmp,
+           NP_FULL | NP_BIN | NP_OMP | NP_MOLONLY |
+           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairBinOmp<1, 0, 0, 0, 0> NPairHalfBinNewtoffOmp;
+NPairStyle(half/bin/newtoff/omp,
+           NPairHalfBinNewtoffOmp,
+           NP_HALF | NP_BIN | NP_OMP | NP_MOLONLY | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairBinOmp<1, 1, 0, 0, 0> NPairHalfBinNewtonOmp;
+NPairStyle(half/bin/newton/omp,
+           NPairHalfBinNewtonOmp,
+           NP_HALF | NP_BIN | NP_OMP | NP_MOLONLY | NP_NEWTON | NP_ORTHO);
+
+typedef NPairBinOmp<1, 1, 1, 0, 0> NPairHalfBinNewtonTriOmp;
+NPairStyle(half/bin/newton/tri/omp,
+           NPairHalfBinNewtonTriOmp,
+           NP_HALF | NP_BIN | NP_OMP | NP_MOLONLY | NP_NEWTON | NP_TRI);
+
+typedef NPairBinOmp<0, 1, 0, 1, 0> NPairFullSizeBinOmp;
+NPairStyle(full/size/bin/omp,
+           NPairFullSizeBinOmp,
+           NP_FULL | NP_SIZE | NP_BIN | NP_OMP | NP_MOLONLY |
+           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairBinOmp<1, 0, 0, 1, 0> NPairHalfSizeBinNewtoffOmp;
+NPairStyle(half/size/bin/newtoff/omp,
+           NPairHalfSizeBinNewtoffOmp,
+           NP_HALF | NP_SIZE | NP_BIN | NP_OMP | NP_MOLONLY | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairBinOmp<1, 1, 0, 1, 0> NPairHalfSizeBinNewtonOmp;
+NPairStyle(half/size/bin/newton/omp,
+           NPairHalfSizeBinNewtonOmp,
+           NP_HALF | NP_SIZE | NP_BIN | NP_OMP | NP_MOLONLY | NP_NEWTON | NP_ORTHO);
+
+typedef NPairBinOmp<1, 1, 1, 1, 0> NPairHalfSizeBinNewtonTriOmp;
+NPairStyle(half/size/bin/newton/tri/omp,
+           NPairHalfSizeBinNewtonTriOmp,
+           NP_HALF | NP_SIZE | NP_BIN | NP_OMP | NP_MOLONLY | NP_NEWTON | NP_TRI);
+
+typedef NPairBinOmp<0, 1, 0, 0, 1> NPairFullBinAtomonlyOmp;
+NPairStyle(full/bin/atomonly/omp,
+           NPairFullBinAtomonlyOmp,
+           NP_FULL | NP_BIN | NP_OMP | NP_ATOMONLY |
+           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairBinOmp<1, 0, 0, 0, 1> NPairHalfBinNewtoffAtomonlyOmp;
+NPairStyle(half/bin/newtoff/atomonly/omp,
+           NPairHalfBinNewtoffAtomonlyOmp,
+           NP_HALF | NP_BIN | NP_OMP | NP_ATOMONLY | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairBinOmp<1, 1, 0, 0, 1> NPairHalfBinNewtonAtomonlyOmp;
+NPairStyle(half/bin/newton/atomonly/omp,
+           NPairHalfBinNewtonAtomonlyOmp,
+           NP_HALF | NP_BIN | NP_OMP | NP_ATOMONLY | NP_NEWTON | NP_ORTHO);
+
+typedef NPairBinOmp<1, 1, 1, 0, 1> NPairHalfBinNewtonTriAtomonlyOmp;
+NPairStyle(half/bin/newton/tri/atomonly/omp,
+           NPairHalfBinNewtonTriAtomonlyOmp,
+           NP_HALF | NP_BIN | NP_OMP | NP_ATOMONLY | NP_NEWTON | NP_TRI);
+
+typedef NPairBinOmp<0, 1, 0, 1, 1> NPairFullSizeBinAtomonlyOmp;
+NPairStyle(full/size/bin/atomonly/omp,
+           NPairFullSizeBinAtomonlyOmp,
+           NP_FULL | NP_SIZE | NP_BIN | NP_OMP | NP_ATOMONLY |
+           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairBinOmp<1, 0, 0, 1, 1> NPairHalfSizeBinNewtoffAtomonlyOmp;
+NPairStyle(half/size/bin/newtoff/atomonly/omp,
+           NPairHalfSizeBinNewtoffAtomonlyOmp,
+           NP_HALF | NP_SIZE | NP_BIN | NP_OMP | NP_ATOMONLY | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairBinOmp<1, 1, 0, 1, 1> NPairHalfSizeBinNewtonAtomonlyOmp;
+NPairStyle(half/size/bin/newton/atomonly/omp,
+           NPairHalfSizeBinNewtonAtomonlyOmp,
+           NP_HALF | NP_SIZE | NP_BIN | NP_OMP | NP_ATOMONLY | NP_NEWTON | NP_ORTHO);
+
+typedef NPairBinOmp<1, 1, 1, 1, 1> NPairHalfSizeBinNewtonTriAtomonlyOmp;
+NPairStyle(half/size/bin/newton/tri/atomonly/omp,
+           NPairHalfSizeBinNewtonTriAtomonlyOmp,
+           NP_HALF | NP_SIZE | NP_BIN | NP_OMP | NP_ATOMONLY | NP_NEWTON | NP_TRI);
+// clang-format on
+#else
+
+#ifndef LMP_NPAIR_BIN_OMP_H
+#define LMP_NPAIR_BIN_OMP_H
+
+#include "npair.h"
+
+namespace LAMMPS_NS {
+
+template<int HALF, int NEWTON, int TRI, int SIZE, int ATOMONLY>
+class NPairBinOmp : public NPair {
+ public:
+  NPairBinOmp(class LAMMPS *);
+  void build(class NeighList *) override;
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/OPENMP/npair_full_bin_atomonly_omp.cpp b/src/OPENMP/npair_full_bin_atomonly_omp.cpp
deleted file mode 100644
index 0a37cca287..0000000000
--- a/src/OPENMP/npair_full_bin_atomonly_omp.cpp
+++ /dev/null
@@ -1,106 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_full_bin_atomonly_omp.h"
-
-#include "atom.h"
-#include "error.h"
-#include "my_page.h"
-#include "neigh_list.h"
-#include "npair_omp.h"
-
-#include "omp_compat.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairFullBinAtomonlyOmp::NPairFullBinAtomonlyOmp(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction for all neighbors
-   every neighbor pair appears in list of both atoms i and j
-------------------------------------------------------------------------- */
-
-void NPairFullBinAtomonlyOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-
-  NPAIR_OMP_INIT;
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nlocal);
-
-  int i,j,k,n,itype,jtype,ibin;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr;
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *molecule = atom->molecule;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  // loop over owned atoms, storing neighbors
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-
-    // loop over all atoms in surrounding bins in stencil including self
-    // skip i = j
-
-    ibin = atom2bin[i];
-
-    for (k = 0; k < nstencil; k++) {
-      for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
-        if (i == j) continue;
-
-        jtype = type[j];
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-
-        if (rsq <= cutneighsq[itype][jtype]) neighptr[n++] = j;
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-  list->gnum = 0;
-}
diff --git a/src/OPENMP/npair_full_bin_atomonly_omp.h b/src/OPENMP/npair_full_bin_atomonly_omp.h
deleted file mode 100644
index 50b1aa753c..0000000000
--- a/src/OPENMP/npair_full_bin_atomonly_omp.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(full/bin/atomonly/omp,
-           NPairFullBinAtomonlyOmp,
-           NP_FULL | NP_BIN | NP_ATOMONLY | NP_OMP |
-           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_FULL_BIN_ATOMONLY_OMP_H
-#define LMP_NPAIR_FULL_BIN_ATOMONLY_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairFullBinAtomonlyOmp : public NPair {
- public:
-  NPairFullBinAtomonlyOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_full_bin_omp.cpp b/src/OPENMP/npair_full_bin_omp.cpp
deleted file mode 100644
index 65af519850..0000000000
--- a/src/OPENMP/npair_full_bin_omp.cpp
+++ /dev/null
@@ -1,135 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "omp_compat.h"
-#include "npair_full_bin_omp.h"
-#include "npair_omp.h"
-#include "neigh_list.h"
-#include "atom.h"
-#include "atom_vec.h"
-#include "molecule.h"
-#include "domain.h"
-#include "my_page.h"
-#include "error.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairFullBinOmp::NPairFullBinOmp(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction for all neighbors
-   every neighbor pair appears in list of both atoms i and j
-------------------------------------------------------------------------- */
-
-void NPairFullBinOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-
-  NPAIR_OMP_INIT;
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nlocal);
-
-  int i,j,k,n,itype,jtype,ibin,which,imol,iatom;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr;
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  // loop over owned atoms, storing neighbors
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over all atoms in surrounding bins in stencil including self
-    // skip i = j
-
-    ibin = atom2bin[i];
-
-    for (k = 0; k < nstencil; k++) {
-      for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
-        if (i == j) continue;
-
-        jtype = type[j];
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-
-        if (rsq <= cutneighsq[itype][jtype]) {
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = j;
-            else if (domain->minimum_image_check(delx,dely,delz))
-              neighptr[n++] = j;
-            else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-          } else neighptr[n++] = j;
-        }
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-  list->gnum = 0;
-}
diff --git a/src/OPENMP/npair_full_bin_omp.h b/src/OPENMP/npair_full_bin_omp.h
deleted file mode 100644
index 333025a1fb..0000000000
--- a/src/OPENMP/npair_full_bin_omp.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(full/bin/omp,
-           NPairFullBinOmp,
-           NP_FULL | NP_BIN | NP_OMP | NP_NEWTON | NP_NEWTOFF |
-           NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_FULL_BIN_OMP_H
-#define LMP_NPAIR_FULL_BIN_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairFullBinOmp : public NPair {
- public:
-  NPairFullBinOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_full_multi_old_omp.h b/src/OPENMP/npair_full_multi_old_omp.h
deleted file mode 100644
index 5d9f4c2f88..0000000000
--- a/src/OPENMP/npair_full_multi_old_omp.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(full/multi/old/omp,
-           NPairFullMultiOldOmp,
-           NP_FULL | NP_MULTI_OLD | NP_OMP |
-           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_FULL_MULTI_OLD_OMP_H
-#define LMP_NPAIR_FULL_MULTI_OLD_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairFullMultiOldOmp : public NPair {
- public:
-  NPairFullMultiOldOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_full_multi_omp.cpp b/src/OPENMP/npair_full_multi_omp.cpp
deleted file mode 100644
index 1e39838381..0000000000
--- a/src/OPENMP/npair_full_multi_omp.cpp
+++ /dev/null
@@ -1,154 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "omp_compat.h"
-#include "npair_full_multi_omp.h"
-#include "npair_omp.h"
-#include "neighbor.h"
-#include "neigh_list.h"
-#include "atom.h"
-#include "atom_vec.h"
-#include "molecule.h"
-#include "domain.h"
-#include "my_page.h"
-#include "error.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairFullMultiOmp::NPairFullMultiOmp(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction for all neighbors
-   multi stencil is icollection-jcollection dependent
-   every neighbor pair appears in list of both atoms i and j
-------------------------------------------------------------------------- */
-
-void NPairFullMultiOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-
-  NPAIR_OMP_INIT;
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nlocal);
-
-  int i,j,k,n,itype,jtype,icollection,jcollection,ibin,jbin,which,ns,imol,iatom;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr,*s;
-  int js;
-
-  // loop over each atom, storing neighbors
-
-  int *collection = neighbor->collection;
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    itype = type[i];
-    icollection = collection[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    ibin = atom2bin[i];
-
-    // loop through stencils for all collections
-    for (jcollection = 0; jcollection < ncollections; jcollection++) {
-
-      // if same collection use own bin
-      if (icollection == jcollection) jbin = ibin;
-          else jbin = coord2bin(x[i], jcollection);
-
-      // loop over all atoms in surrounding bins in stencil including self
-      // skip i = j
-      // use full stencil for all collection combinations
-
-      s = stencil_multi[icollection][jcollection];
-      ns = nstencil_multi[icollection][jcollection];
-
-      for (k = 0; k < ns; k++) {
-            js = binhead_multi[jcollection][jbin + s[k]];
-            for (j = js; j >= 0; j = bins[j]) {
-              if (i == j) continue;
-
-          jtype = type[j];
-              if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-              delx = xtmp - x[j][0];
-              dely = ytmp - x[j][1];
-              delz = ztmp - x[j][2];
-              rsq = delx*delx + dely*dely + delz*delz;
-
-              if (rsq <= cutneighsq[itype][jtype]) {
-                if (molecular != Atom::ATOMIC) {
-                  if (!moltemplate)
-                        which = find_special(special[i],nspecial[i],tag[j]);
-                  else if (imol >= 0)
-                        which = find_special(onemols[imol]->special[iatom],
-                                     onemols[imol]->nspecial[iatom],
-                                     tag[j]-tagprev);
-                  else which = 0;
-                  if (which == 0) neighptr[n++] = j;
-                  else if (domain->minimum_image_check(delx,dely,delz))
-                        neighptr[n++] = j;
-                  else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-                } else neighptr[n++] = j;
-              }
-            }
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-  list->gnum = 0;
-}
diff --git a/src/OPENMP/npair_full_multi_omp.h b/src/OPENMP/npair_full_multi_omp.h
deleted file mode 100644
index 0d71bf7bc6..0000000000
--- a/src/OPENMP/npair_full_multi_omp.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(full/multi/omp,
-           NPairFullMultiOmp,
-           NP_FULL | NP_MULTI | NP_OMP |
-           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_FULL_MULTI_OMP_H
-#define LMP_NPAIR_FULL_MULTI_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairFullMultiOmp : public NPair {
- public:
-  NPairFullMultiOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_full_nsq_ghost_omp.cpp b/src/OPENMP/npair_full_nsq_ghost_omp.cpp
deleted file mode 100644
index c1270d9fdc..0000000000
--- a/src/OPENMP/npair_full_nsq_ghost_omp.cpp
+++ /dev/null
@@ -1,148 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "omp_compat.h"
-#include "npair_full_nsq_ghost_omp.h"
-#include "npair_omp.h"
-#include "neigh_list.h"
-#include "atom.h"
-#include "atom_vec.h"
-#include "molecule.h"
-#include "domain.h"
-#include "my_page.h"
-#include "error.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairFullNsqGhostOmp::NPairFullNsqGhostOmp(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   N^2 search for all neighbors
-   include neighbors of ghost atoms, but no "special neighbors" for ghosts
-   every neighbor pair appears in list of both atoms i and j
-------------------------------------------------------------------------- */
-
-void NPairFullNsqGhostOmp::build(NeighList *list)
-{
-  const int nlocal = atom->nlocal;
-  const int nall = nlocal + atom->nghost;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-
-  NPAIR_OMP_INIT;
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nall);
-
-  int i,j,n,itype,jtype,which,imol,iatom;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr;
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  // loop over owned & ghost atoms, storing neighbors
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over all atoms, owned and ghost
-    // skip i = j
-    // no molecular test when i = ghost atom
-
-    if (i < nlocal) {
-      for (j = 0; j < nall; j++) {
-        if (i == j) continue;
-        jtype = type[j];
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-        if (rsq <= cutneighsq[itype][jtype]) {
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = j;
-            else if (domain->minimum_image_check(delx,dely,delz))
-              neighptr[n++] = j;
-            else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-          } else neighptr[n++] = j;
-        }
-      }
-    } else {
-      for (j = 0; j < nall; j++) {
-        if (i == j) continue;
-        jtype = type[j];
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-
-        if (rsq <= cutneighghostsq[itype][jtype]) neighptr[n++] = j;
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-  list->gnum = nall - nlocal;
-}
diff --git a/src/OPENMP/npair_full_nsq_ghost_omp.h b/src/OPENMP/npair_full_nsq_ghost_omp.h
deleted file mode 100644
index 448354d4ba..0000000000
--- a/src/OPENMP/npair_full_nsq_ghost_omp.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(full/nsq/ghost/omp,
-           NPairFullNsqGhostOmp,
-           NP_FULL | NP_NSQ | NP_GHOST | NP_OMP | NP_NEWTON | NP_NEWTOFF |
-           NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_FULL_NSQ_GHOST_OMP_H
-#define LMP_NPAIR_FULL_NSQ_GHOST_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairFullNsqGhostOmp : public NPair {
- public:
-  NPairFullNsqGhostOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_full_nsq_omp.cpp b/src/OPENMP/npair_full_nsq_omp.cpp
deleted file mode 100644
index 695cb1bc48..0000000000
--- a/src/OPENMP/npair_full_nsq_omp.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "omp_compat.h"
-#include "npair_full_nsq_omp.h"
-#include "npair_omp.h"
-#include "neigh_list.h"
-#include "atom.h"
-#include "atom_vec.h"
-#include "group.h"
-#include "molecule.h"
-#include "domain.h"
-#include "my_page.h"
-#include "error.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairFullNsqOmp::NPairFullNsqOmp(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   N^2 search for all neighbors
-   every neighbor pair appears in list of both atoms i and j
-------------------------------------------------------------------------- */
-
-void NPairFullNsqOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-
-  NPAIR_OMP_INIT;
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nlocal);
-
-  int i,j,n,itype,jtype,which,imol,iatom;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr;
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-
-  int nall = atom->nlocal + atom->nghost;
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  // loop over owned atoms, storing neighbors
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over all atoms, owned and ghost
-    // skip i = j
-
-    for (j = 0; j < nall; j++) {
-      if (includegroup && !(mask[j] & bitmask)) continue;
-      if (i == j) continue;
-      jtype = type[j];
-      if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
-      if (rsq <= cutneighsq[itype][jtype]) {
-        if (molecular != Atom::ATOMIC) {
-          if (!moltemplate)
-            which = find_special(special[i],nspecial[i],tag[j]);
-          else if (imol >= 0)
-            which = find_special(onemols[imol]->special[iatom],
-                                 onemols[imol]->nspecial[iatom],
-                                 tag[j]-tagprev);
-          else which = 0;
-          if (which == 0) neighptr[n++] = j;
-          else if (domain->minimum_image_check(delx,dely,delz))
-            neighptr[n++] = j;
-          else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-        } else neighptr[n++] = j;
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-  list->gnum = 0;
-}
diff --git a/src/OPENMP/npair_full_nsq_omp.h b/src/OPENMP/npair_full_nsq_omp.h
deleted file mode 100644
index 53e913a18c..0000000000
--- a/src/OPENMP/npair_full_nsq_omp.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(full/nsq/omp,
-           NPairFullNsqOmp,
-           NP_FULL | NP_NSQ | NP_OMP | NP_NEWTON | NP_NEWTOFF |
-           NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_FULL_NSQ_OMP_H
-#define LMP_NPAIR_FULL_NSQ_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairFullNsqOmp : public NPair {
- public:
-  NPairFullNsqOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_half_bin_atomonly_newton_omp.cpp b/src/OPENMP/npair_half_bin_atomonly_newton_omp.cpp
deleted file mode 100644
index 1bc1199628..0000000000
--- a/src/OPENMP/npair_half_bin_atomonly_newton_omp.cpp
+++ /dev/null
@@ -1,126 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_bin_atomonly_newton_omp.h"
-
-#include "atom.h"
-#include "error.h"
-#include "my_page.h"
-#include "neigh_list.h"
-#include "npair_omp.h"
-
-#include "omp_compat.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfBinAtomonlyNewtonOmp::NPairHalfBinAtomonlyNewtonOmp(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction with full Newton's 3rd law
-   each owned atom i checks its own bin and other bins in Newton stencil
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfBinAtomonlyNewtonOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-
-  NPAIR_OMP_INIT;
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nlocal);
-
-  int i,j,k,n,itype,jtype,ibin;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr;
-
-  // loop over each atom, storing neighbors
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *molecule = atom->molecule;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-
-    // loop over rest of atoms in i's bin, ghosts are at end of linked list
-    // if j is owned atom, store it, since j is beyond i in linked list
-    // if j is ghost, only store if j coords are "above and to the right" of i
-
-    for (j = bins[i]; j >= 0; j = bins[j]) {
-      if (j >= nlocal) {
-        if (x[j][2] < ztmp) continue;
-        if (x[j][2] == ztmp) {
-          if (x[j][1] < ytmp) continue;
-          if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
-        }
-      }
-
-      jtype = type[j];
-      if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
-
-      if (rsq <= cutneighsq[itype][jtype]) neighptr[n++] = j;
-    }
-
-    // loop over all atoms in other bins in stencil, store every pair
-
-    ibin = atom2bin[i];
-    for (k = 0; k < nstencil; k++) {
-      for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
-        jtype = type[j];
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-
-        if (rsq <= cutneighsq[itype][jtype]) neighptr[n++] = j;
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-}
diff --git a/src/OPENMP/npair_half_bin_atomonly_newton_omp.h b/src/OPENMP/npair_half_bin_atomonly_newton_omp.h
deleted file mode 100644
index dcec3aeee2..0000000000
--- a/src/OPENMP/npair_half_bin_atomonly_newton_omp.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/bin/atomonly/newton/omp,
-           NPairHalfBinAtomonlyNewtonOmp,
-           NP_HALF | NP_BIN | NP_ATOMONLY | NP_NEWTON | NP_OMP | NP_ORTHO);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_BIN_ATOMONLY_NEWTON_OMP_H
-#define LMP_NPAIR_HALF_BIN_ATOMONLY_NEWTON_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfBinAtomonlyNewtonOmp : public NPair {
- public:
-  NPairHalfBinAtomonlyNewtonOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_half_bin_newtoff_ghost_omp.cpp b/src/OPENMP/npair_half_bin_newtoff_ghost_omp.cpp
deleted file mode 100644
index e10d181a8c..0000000000
--- a/src/OPENMP/npair_half_bin_newtoff_ghost_omp.cpp
+++ /dev/null
@@ -1,174 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "omp_compat.h"
-#include "npair_half_bin_newtoff_ghost_omp.h"
-#include "npair_omp.h"
-#include "neigh_list.h"
-#include "atom.h"
-#include "atom_vec.h"
-#include "molecule.h"
-#include "domain.h"
-#include "my_page.h"
-#include "error.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfBinNewtoffGhostOmp::NPairHalfBinNewtoffGhostOmp(LAMMPS *lmp) :
-  NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction with partial Newton's 3rd law
-   include neighbors of ghost atoms, but no "special neighbors" for ghosts
-   owned and ghost atoms check own bin and other bins in stencil
-   pair stored once if i,j are both owned and i < j
-   pair stored by me if i owned and j ghost (also stored by proc owning j)
-   pair stored once if i,j are both ghost and i < j
-------------------------------------------------------------------------- */
-
-void NPairHalfBinNewtoffGhostOmp::build(NeighList *list)
-{
-  const int nlocal = atom->nlocal;
-  const int nall = nlocal + atom->nghost;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-
-  NPAIR_OMP_INIT;
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nall);
-
-  int i,j,k,n,itype,jtype,ibin,which,imol,iatom;
-  tagint tagprev;
-  int xbin,ybin,zbin,xbin2,ybin2,zbin2;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr;
-
-  // loop over each atom, storing neighbors
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over all atoms in other bins in stencil including self
-    // when i is a ghost atom, must check if stencil bin is out of bounds
-    // only store pair if i < j
-    // stores own/own pairs only once
-    // stores own/ghost pairs with owned atom only, on both procs
-    // stores ghost/ghost pairs only once
-    // no molecular test when i = ghost atom
-
-    if (i < nlocal) {
-      ibin = atom2bin[i];
-
-      for (k = 0; k < nstencil; k++) {
-        for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
-          if (j <= i) continue;
-
-          jtype = type[j];
-          if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-          delx = xtmp - x[j][0];
-          dely = ytmp - x[j][1];
-          delz = ztmp - x[j][2];
-          rsq = delx*delx + dely*dely + delz*delz;
-
-          if (rsq <= cutneighsq[itype][jtype]) {
-            if (molecular != Atom::ATOMIC) {
-              if (!moltemplate)
-                which = find_special(special[i],nspecial[i],tag[j]);
-              else if (imol >= 0)
-                which = find_special(onemols[imol]->special[iatom],
-                                     onemols[imol]->nspecial[iatom],
-                                     tag[j]-tagprev);
-              else which = 0;
-              if (which == 0) neighptr[n++] = j;
-              else if (domain->minimum_image_check(delx,dely,delz))
-                neighptr[n++] = j;
-              else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-            } else neighptr[n++] = j;
-          }
-        }
-      }
-
-    } else {
-      ibin = coord2bin(x[i],xbin,ybin,zbin);
-      for (k = 0; k < nstencil; k++) {
-        xbin2 = xbin + stencilxyz[k][0];
-        ybin2 = ybin + stencilxyz[k][1];
-        zbin2 = zbin + stencilxyz[k][2];
-        if (xbin2 < 0 || xbin2 >= mbinx ||
-            ybin2 < 0 || ybin2 >= mbiny ||
-            zbin2 < 0 || zbin2 >= mbinz) continue;
-        for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
-          if (j <= i) continue;
-
-          jtype = type[j];
-          if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-          delx = xtmp - x[j][0];
-          dely = ytmp - x[j][1];
-          delz = ztmp - x[j][2];
-          rsq = delx*delx + dely*dely + delz*delz;
-
-          if (rsq <= cutneighghostsq[itype][jtype]) neighptr[n++] = j;
-        }
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-  list->gnum = nall - atom->nlocal;
-}
diff --git a/src/OPENMP/npair_half_bin_newtoff_ghost_omp.h b/src/OPENMP/npair_half_bin_newtoff_ghost_omp.h
deleted file mode 100644
index 0258320f88..0000000000
--- a/src/OPENMP/npair_half_bin_newtoff_ghost_omp.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/bin/newtoff/ghost/omp,
-           NPairHalfBinNewtoffGhostOmp,
-           NP_HALF | NP_BIN | NP_NEWTOFF | NP_GHOST | NP_OMP |
-           NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_BIN_NEWTOFF_GHOST_OMP_H
-#define LMP_NPAIR_HALF_BIN_NEWTOFF_GHOST_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfBinNewtoffGhostOmp : public NPair {
- public:
-  NPairHalfBinNewtoffGhostOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_half_bin_newtoff_omp.cpp b/src/OPENMP/npair_half_bin_newtoff_omp.cpp
deleted file mode 100644
index 9d32cc7e2b..0000000000
--- a/src/OPENMP/npair_half_bin_newtoff_omp.cpp
+++ /dev/null
@@ -1,139 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "omp_compat.h"
-#include "npair_half_bin_newtoff_omp.h"
-#include "npair_omp.h"
-#include "neigh_list.h"
-#include "atom.h"
-#include "atom_vec.h"
-#include "molecule.h"
-#include "domain.h"
-#include "my_page.h"
-#include "error.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfBinNewtoffOmp::NPairHalfBinNewtoffOmp(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction with partial Newton's 3rd law
-   each owned atom i checks own bin and other bins in stencil
-   pair stored once if i,j are both owned and i < j
-   pair stored by me if j is ghost (also stored by proc owning j)
-------------------------------------------------------------------------- */
-
-void NPairHalfBinNewtoffOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-
-  NPAIR_OMP_INIT;
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nlocal);
-
-  int i,j,k,n,itype,jtype,ibin,which,imol,iatom;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr;
-
-  // loop over each atom, storing neighbors
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over all atoms in other bins in stencil including self
-    // only store pair if i < j
-    // stores own/own pairs only once
-    // stores own/ghost pairs on both procs
-
-    ibin = atom2bin[i];
-
-    for (k = 0; k < nstencil; k++) {
-      for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
-        if (j <= i) continue;
-
-        jtype = type[j];
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-
-        if (rsq <= cutneighsq[itype][jtype]) {
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = j;
-            else if (domain->minimum_image_check(delx,dely,delz))
-              neighptr[n++] = j;
-            else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-          } else neighptr[n++] = j;
-        }
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-}
diff --git a/src/OPENMP/npair_half_bin_newtoff_omp.h b/src/OPENMP/npair_half_bin_newtoff_omp.h
deleted file mode 100644
index e5d3034667..0000000000
--- a/src/OPENMP/npair_half_bin_newtoff_omp.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/bin/newtoff/omp,
-           NPairHalfBinNewtoffOmp,
-           NP_HALF | NP_BIN | NP_NEWTOFF | NP_OMP | NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_BIN_NEWTOFF_OMP_H
-#define LMP_NPAIR_HALF_BIN_NEWTOFF_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfBinNewtoffOmp : public NPair {
- public:
-  NPairHalfBinNewtoffOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_half_bin_newton_omp.cpp b/src/OPENMP/npair_half_bin_newton_omp.cpp
deleted file mode 100644
index d2da12962c..0000000000
--- a/src/OPENMP/npair_half_bin_newton_omp.cpp
+++ /dev/null
@@ -1,172 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "omp_compat.h"
-#include "npair_half_bin_newton_omp.h"
-#include "npair_omp.h"
-#include "neigh_list.h"
-#include "atom.h"
-#include "atom_vec.h"
-#include "molecule.h"
-#include "domain.h"
-#include "my_page.h"
-#include "error.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfBinNewtonOmp::NPairHalfBinNewtonOmp(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction with full Newton's 3rd law
-   each owned atom i checks its own bin and other bins in Newton stencil
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfBinNewtonOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-
-  NPAIR_OMP_INIT;
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nlocal);
-
-  int i,j,k,n,itype,jtype,ibin,which,imol,iatom;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr;
-
-  // loop over each atom, storing neighbors
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over rest of atoms in i's bin, ghosts are at end of linked list
-    // if j is owned atom, store it, since j is beyond i in linked list
-    // if j is ghost, only store if j coords are "above and to the right" of i
-
-    for (j = bins[i]; j >= 0; j = bins[j]) {
-      if (j >= nlocal) {
-        if (x[j][2] < ztmp) continue;
-        if (x[j][2] == ztmp) {
-          if (x[j][1] < ytmp) continue;
-          if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
-        }
-      }
-
-      jtype = type[j];
-      if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
-
-      if (rsq <= cutneighsq[itype][jtype]) {
-        if (molecular != Atom::ATOMIC) {
-          if (!moltemplate)
-            which = find_special(special[i],nspecial[i],tag[j]);
-          else if (imol >= 0)
-            which = find_special(onemols[imol]->special[iatom],
-                                 onemols[imol]->nspecial[iatom],
-                                 tag[j]-tagprev);
-          else which = 0;
-          if (which == 0) neighptr[n++] = j;
-          else if (domain->minimum_image_check(delx,dely,delz))
-            neighptr[n++] = j;
-          else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-          // OLD: if (which >= 0) neighptr[n++] = j ^ (which << SBBITS);
-        } else neighptr[n++] = j;
-      }
-    }
-
-    // loop over all atoms in other bins in stencil, store every pair
-
-    ibin = atom2bin[i];
-    for (k = 0; k < nstencil; k++) {
-      for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
-        jtype = type[j];
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-
-        if (rsq <= cutneighsq[itype][jtype]) {
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = j;
-            else if (domain->minimum_image_check(delx,dely,delz))
-              neighptr[n++] = j;
-            else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-            // OLD: if (which >= 0) neighptr[n++] = j ^ (which << SBBITS);
-          } else neighptr[n++] = j;
-        }
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-}
diff --git a/src/OPENMP/npair_half_bin_newton_omp.h b/src/OPENMP/npair_half_bin_newton_omp.h
deleted file mode 100644
index 68064cdf45..0000000000
--- a/src/OPENMP/npair_half_bin_newton_omp.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/bin/newton/omp,
-           NPairHalfBinNewtonOmp,
-           NP_HALF | NP_BIN | NP_NEWTON | NP_OMP | NP_ORTHO);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_BIN_NEWTON_OMP_H
-#define LMP_NPAIR_HALF_BIN_NEWTON_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfBinNewtonOmp : public NPair {
- public:
-  NPairHalfBinNewtonOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_half_bin_newton_tri_omp.cpp b/src/OPENMP/npair_half_bin_newton_tri_omp.cpp
deleted file mode 100644
index 47524474ed..0000000000
--- a/src/OPENMP/npair_half_bin_newton_tri_omp.cpp
+++ /dev/null
@@ -1,158 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_bin_newton_tri_omp.h"
-#include "npair_omp.h"
-#include "omp_compat.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "force.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfBinNewtonTriOmp::NPairHalfBinNewtonTriOmp(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction with Newton's 3rd law for triclinic
-   each owned atom i checks its own bin and other bins in triclinic stencil
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfBinNewtonTriOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-  const double delta = 0.01 * force->angstrom;
-
-  NPAIR_OMP_INIT;
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nlocal);
-
-  int i,j,k,n,itype,jtype,ibin,which,imol,iatom;
-  tagint itag,jtag,tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr;
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    itag = tag[i];
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over all atoms in bins in stencil
-    // for triclinic, bin stencil is full in all 3 dims
-    // must use itag/jtag to eliminate half the I/J interactions
-    // cannot use I/J exact coord comparision
-    //   b/c transforming orthog -> lambda -> orthog for ghost atoms
-    //   with an added PBC offset can shift all 3 coords by epsilon
-
-    ibin = atom2bin[i];
-    for (k = 0; k < nstencil; k++) {
-      for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
-
-        if (j <= i) continue;
-        if (j >= nlocal) {
-          jtag = tag[j];
-          if (itag > jtag) {
-            if ((itag+jtag) % 2 == 0) continue;
-          } else if (itag < jtag) {
-            if ((itag+jtag) % 2 == 1) continue;
-          } else {
-            if (fabs(x[j][2]-ztmp) > delta) {
-              if (x[j][2] < ztmp) continue;
-            } else if (fabs(x[j][1]-ytmp) > delta) {
-              if (x[j][1] < ytmp) continue;
-            } else {
-              if (x[j][0] < xtmp) continue;
-            }
-          }
-        }
-
-        jtype = type[j];
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-
-        if (rsq <= cutneighsq[itype][jtype]) {
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = j;
-            else if (domain->minimum_image_check(delx,dely,delz))
-              neighptr[n++] = j;
-            else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-          } else neighptr[n++] = j;
-        }
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-}
diff --git a/src/OPENMP/npair_half_bin_newton_tri_omp.h b/src/OPENMP/npair_half_bin_newton_tri_omp.h
deleted file mode 100644
index 90d5af5db1..0000000000
--- a/src/OPENMP/npair_half_bin_newton_tri_omp.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/bin/newton/tri/omp,
-           NPairHalfBinNewtonTriOmp,
-           NP_HALF | NP_BIN | NP_NEWTON | NP_TRI | NP_OMP);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_BIN_NEWTON_TRI_OMP_H
-#define LMP_NPAIR_HALF_BIN_NEWTON_TRI_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfBinNewtonTriOmp : public NPair {
- public:
-  NPairHalfBinNewtonTriOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_half_multi_newtoff_omp.cpp b/src/OPENMP/npair_half_multi_newtoff_omp.cpp
deleted file mode 100644
index 1b65653f76..0000000000
--- a/src/OPENMP/npair_half_multi_newtoff_omp.cpp
+++ /dev/null
@@ -1,157 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "omp_compat.h"
-#include "npair_half_multi_newtoff_omp.h"
-#include "npair_omp.h"
-#include "neighbor.h"
-#include "neigh_list.h"
-#include "atom.h"
-#include "atom_vec.h"
-#include "molecule.h"
-#include "domain.h"
-#include "my_page.h"
-#include "error.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfMultiNewtoffOmp::NPairHalfMultiNewtoffOmp(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction with partial Newton's 3rd law
-   multi stencil is icollection-jcollection dependent
-   each owned atom i checks own bin and other bins in stencil
-   pair stored once if i,j are both owned and i < j
-   pair stored by me if j is ghost (also stored by proc owning j)
-------------------------------------------------------------------------- */
-
-void NPairHalfMultiNewtoffOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-
-  NPAIR_OMP_INIT;
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nlocal);
-
-  int i,j,k,n,itype,jtype,icollection,jcollection,ibin,jbin,which,ns,imol,iatom;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr,*s;
-  int js;
-
-  // loop over each atom, storing neighbors
-
-  int *collection = neighbor->collection;
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    itype = type[i];
-    icollection = collection[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    ibin = atom2bin[i];
-
-    // loop through stencils for all collections
-    for (jcollection = 0; jcollection < ncollections; jcollection++) {
-
-      // if same collection use own bin
-      if (icollection == jcollection) jbin = ibin;
-          else jbin = coord2bin(x[i], jcollection);
-
-      // loop over all atoms in other bins in stencil including self
-      // only store pair if i < j
-      // stores own/own pairs only once
-      // stores own/ghost pairs on both procs
-      // use full stencil for all collection combinations
-
-      s = stencil_multi[icollection][jcollection];
-      ns = nstencil_multi[icollection][jcollection];
-
-      for (k = 0; k < ns; k++) {
-            js = binhead_multi[jcollection][jbin + s[k]];
-            for (j = js; j >= 0; j = bins[j]) {
-              if (j <= i) continue;
-
-          jtype = type[j];
-              if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-              delx = xtmp - x[j][0];
-              dely = ytmp - x[j][1];
-              delz = ztmp - x[j][2];
-          rsq = delx*delx + dely*dely + delz*delz;
-
-          if (rsq <= cutneighsq[itype][jtype]) {
-            if (molecular != Atom::ATOMIC) {
-              if (!moltemplate)
-                which = find_special(special[i],nspecial[i],tag[j]);
-              else if (imol >= 0)
-                which = find_special(onemols[imol]->special[iatom],
-                                     onemols[imol]->nspecial[iatom],
-                                     tag[j]-tagprev);
-              else which = 0;
-              if (which == 0) neighptr[n++] = j;
-              else if (domain->minimum_image_check(delx,dely,delz))
-                neighptr[n++] = j;
-              else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-            } else neighptr[n++] = j;
-          }
-        }
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-}
diff --git a/src/OPENMP/npair_half_multi_newtoff_omp.h b/src/OPENMP/npair_half_multi_newtoff_omp.h
deleted file mode 100644
index 658f41f926..0000000000
--- a/src/OPENMP/npair_half_multi_newtoff_omp.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/multi/newtoff/omp,
-           NPairHalfMultiNewtoffOmp,
-           NP_HALF | NP_MULTI | NP_NEWTOFF | NP_OMP | NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_MULTI_NEWTOFF_OMP_H
-#define LMP_NPAIR_HALF_MULTI_NEWTOFF_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfMultiNewtoffOmp : public NPair {
- public:
-  NPairHalfMultiNewtoffOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_half_multi_newton_omp.cpp b/src/OPENMP/npair_half_multi_newton_omp.cpp
deleted file mode 100644
index 8add1d3703..0000000000
--- a/src/OPENMP/npair_half_multi_newton_omp.cpp
+++ /dev/null
@@ -1,205 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "omp_compat.h"
-#include "npair_half_multi_newton_omp.h"
-#include "npair_omp.h"
-#include "neighbor.h"
-#include "neigh_list.h"
-#include "atom.h"
-#include "atom_vec.h"
-#include "molecule.h"
-#include "domain.h"
-#include "my_page.h"
-#include "error.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfMultiNewtonOmp::NPairHalfMultiNewtonOmp(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction with full Newton's 3rd law
-   multi stencil is icollection-jcollection dependent
-   each owned atom i checks its own bin and other bins in Newton stencil
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfMultiNewtonOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-
-  NPAIR_OMP_INIT;
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nlocal);
-
-  int i,j,k,n,itype,jtype,icollection,jcollection,ibin,jbin,which,ns,imol,iatom;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr,*s;
-  int js;
-
-  // loop over each atom, storing neighbors
-
-  int *collection = neighbor->collection;
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    itype = type[i];
-    icollection = collection[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    ibin = atom2bin[i];
-
-    // loop through stencils for all collections
-    for (jcollection = 0; jcollection < ncollections; jcollection++) {
-
-      // if same collection use own bin
-      if (icollection == jcollection) jbin = ibin;
-          else jbin = coord2bin(x[i], jcollection);
-
-      // if same size: uses half stencil so check central bin
-      if (cutcollectionsq[icollection][icollection] == cutcollectionsq[jcollection][jcollection]){
-
-        if (icollection == jcollection) js = bins[i];
-        else js = binhead_multi[jcollection][jbin];
-
-        // if same collection,
-        //   if j is owned atom, store it, since j is beyond i in linked list
-        //   if j is ghost, only store if j coords are "above and to the right" of i
-
-        // if different collections,
-        //   if j is owned atom, store it if j > i
-        //   if j is ghost, only store if j coords are "above and to the right" of i
-
-            for (j = js; j >= 0; j = bins[j]) {
-          if ((icollection != jcollection) && (j < i)) continue;
-
-              if (j >= nlocal) {
-                if (x[j][2] < ztmp) continue;
-                if (x[j][2] == ztmp) {
-                  if (x[j][1] < ytmp) continue;
-                  if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
-                }
-              }
-
-          jtype = type[j];
-          if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-              delx = xtmp - x[j][0];
-              dely = ytmp - x[j][1];
-              delz = ztmp - x[j][2];
-              rsq = delx*delx + dely*dely + delz*delz;
-
-              if (rsq <= cutneighsq[itype][jtype]) {
-                if (molecular) {
-                  if (!moltemplate)
-                    which = find_special(special[i],nspecial[i],tag[j]);
-                  else if (imol >= 0)
-                    which = find_special(onemols[imol]->special[iatom],
-                               onemols[imol]->nspecial[iatom],
-                               tag[j]-tagprev);
-                  else which = 0;
-                  if (which == 0) neighptr[n++] = j;
-                  else if (domain->minimum_image_check(delx,dely,delz))
-                    neighptr[n++] = j;
-                  else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-                } else neighptr[n++] = j;
-              }
-            }
-      }
-
-      // for all collections, loop over all atoms in other bins in stencil, store every pair
-      // stencil is empty if i larger than j
-      // stencil is half if i same size as j
-      // stencil is full if i smaller than j
-
-          s = stencil_multi[icollection][jcollection];
-          ns = nstencil_multi[icollection][jcollection];
-
-          for (k = 0; k < ns; k++) {
-            js = binhead_multi[jcollection][jbin + s[k]];
-            for (j = js; j >= 0; j = bins[j]) {
-
-          jtype = type[j];
-          if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-              delx = xtmp - x[j][0];
-              dely = ytmp - x[j][1];
-              delz = ztmp - x[j][2];
-              rsq = delx*delx + dely*dely + delz*delz;
-
-              if (rsq <= cutneighsq[itype][jtype]) {
-                if (molecular != Atom::ATOMIC) {
-                    if (!moltemplate)
-                      which = find_special(special[i],nspecial[i],tag[j]);
-                    else if (imol >= 0)
-                      which = find_special(onemols[imol]->special[iatom],
-                                       onemols[imol]->nspecial[iatom],
-                                       tag[j]-tagprev);
-                    else which = 0;
-                    if (which == 0) neighptr[n++] = j;
-                    else if (domain->minimum_image_check(delx,dely,delz))
-                      neighptr[n++] = j;
-                    else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-                } else neighptr[n++] = j;
-              }
-            }
-          }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-}
diff --git a/src/OPENMP/npair_half_multi_newton_omp.h b/src/OPENMP/npair_half_multi_newton_omp.h
deleted file mode 100644
index 44bee84653..0000000000
--- a/src/OPENMP/npair_half_multi_newton_omp.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/multi/newton/omp,
-           NPairHalfMultiNewtonOmp,
-           NP_HALF | NP_MULTI | NP_NEWTON | NP_OMP | NP_ORTHO);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_MULTI_NEWTON_OMP_H
-#define LMP_NPAIR_HALF_MULTI_NEWTON_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfMultiNewtonOmp : public NPair {
- public:
-  NPairHalfMultiNewtonOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_half_multi_newton_tri_omp.cpp b/src/OPENMP/npair_half_multi_newton_tri_omp.cpp
deleted file mode 100644
index e26bea990f..0000000000
--- a/src/OPENMP/npair_half_multi_newton_tri_omp.cpp
+++ /dev/null
@@ -1,188 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_multi_newton_tri_omp.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "force.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neigh_list.h"
-#include "neighbor.h"
-#include "npair_omp.h"
-#include "omp_compat.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfMultiNewtonTriOmp::NPairHalfMultiNewtonTriOmp(LAMMPS *lmp) :
-  NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction with Newton's 3rd law for triclinic
-   multi stencil is icollection-jcollection dependent
-   each owned atom i checks its own bin and other bins in triclinic stencil
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfMultiNewtonTriOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-  const double delta = 0.01 * force->angstrom;
-
-  NPAIR_OMP_INIT;
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nlocal);
-
-  int i,j,k,n,itype,jtype,ibin,jbin,icollection,jcollection,which,ns,imol,iatom;
-  tagint itag,jtag,tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr,*s;
-  int js;
-
-  int *collection = neighbor->collection;
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    itag = tag[i];
-    itype = type[i];
-    icollection = collection[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    ibin = atom2bin[i];
-
-    // loop through stencils for all collections
-
-    for (jcollection = 0; jcollection < ncollections; jcollection++) {
-
-      // if same collection use own bin
-
-      if (icollection == jcollection) jbin = ibin;
-      else jbin = coord2bin(x[i], jcollection);
-
-      // loop over all atoms in bins in stencil
-      // for triclinic:
-      //   stencil is empty if i larger than j
-      //   stencil is full if i smaller than j
-      //   stencil is full if i same size as j
-      // for i smaller than j:
-      //   must use itag/jtag to eliminate half the I/J interactions
-      //   cannot use I/J exact coord comparision
-      //     b/c transforming orthog -> lambda -> orthog for ghost atoms
-      //     with an added PBC offset can shift all 3 coords by epsilon
-
-      s = stencil_multi[icollection][jcollection];
-      ns = nstencil_multi[icollection][jcollection];
-
-      for (k = 0; k < ns; k++) {
-        js = binhead_multi[jcollection][jbin + s[k]];
-        for (j = js; j >= 0; j = bins[j]) {
-
-          // if same size (same collection), exclude half of interactions
-
-          if (cutcollectionsq[icollection][icollection] ==
-              cutcollectionsq[jcollection][jcollection]) {
-            if (j <= i) continue;
-            if (j >= nlocal) {
-              jtag = tag[j];
-              if (itag > jtag) {
-                if ((itag+jtag) % 2 == 0) continue;
-              } else if (itag < jtag) {
-                if ((itag+jtag) % 2 == 1) continue;
-              } else {
-                if (fabs(x[j][2]-ztmp) > delta) {
-                  if (x[j][2] < ztmp) continue;
-                } else if (fabs(x[j][1]-ytmp) > delta) {
-                  if (x[j][1] < ytmp) continue;
-                } else {
-                  if (x[j][0] < xtmp) continue;
-                }
-              }
-            }
-          }
-
-          jtype = type[j];
-          if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-          delx = xtmp - x[j][0];
-          dely = ytmp - x[j][1];
-          delz = ztmp - x[j][2];
-          rsq = delx*delx + dely*dely + delz*delz;
-
-          if (rsq <= cutneighsq[itype][jtype]) {
-            if (molecular != Atom::ATOMIC) {
-              if (!moltemplate)
-                which = find_special(special[i],nspecial[i],tag[j]);
-              else if (imol >= 0)
-                which = find_special(onemols[imol]->special[iatom],
-                                     onemols[imol]->nspecial[iatom],
-                                     tag[j]-tagprev);
-              else which = 0;
-              if (which == 0) neighptr[n++] = j;
-              else if (domain->minimum_image_check(delx,dely,delz))
-                neighptr[n++] = j;
-              else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-            } else neighptr[n++] = j;
-          }
-        }
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-}
diff --git a/src/OPENMP/npair_half_multi_newton_tri_omp.h b/src/OPENMP/npair_half_multi_newton_tri_omp.h
deleted file mode 100644
index 21731f4f0b..0000000000
--- a/src/OPENMP/npair_half_multi_newton_tri_omp.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/multi/newton/tri/omp,
-           NPairHalfMultiNewtonTriOmp,
-           NP_HALF | NP_MULTI | NP_NEWTON | NP_TRI | NP_OMP);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_MULTI_NEWTON_TRI_OMP_H
-#define LMP_NPAIR_HALF_MULTI_NEWTON_TRI_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfMultiNewtonTriOmp : public NPair {
- public:
-  NPairHalfMultiNewtonTriOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_half_multi_old_newtoff_omp.cpp b/src/OPENMP/npair_half_multi_old_newtoff_omp.cpp
deleted file mode 100644
index 4447b4414e..0000000000
--- a/src/OPENMP/npair_half_multi_old_newtoff_omp.cpp
+++ /dev/null
@@ -1,146 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "omp_compat.h"
-#include "npair_half_multi_old_newtoff_omp.h"
-#include "npair_omp.h"
-#include "neigh_list.h"
-#include "atom.h"
-#include "atom_vec.h"
-#include "molecule.h"
-#include "domain.h"
-#include "my_page.h"
-#include "error.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfMultiOldNewtoffOmp::NPairHalfMultiOldNewtoffOmp(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction with partial Newton's 3rd law
-   each owned atom i checks own bin and other bins in stencil
-   multi-type stencil is itype dependent and is distance checked
-   pair stored once if i,j are both owned and i < j
-   pair stored by me if j is ghost (also stored by proc owning j)
-------------------------------------------------------------------------- */
-
-void NPairHalfMultiOldNewtoffOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-
-  NPAIR_OMP_INIT;
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nlocal);
-
-  int i,j,k,n,itype,jtype,ibin,which,ns,imol,iatom;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr,*s;
-  double *cutsq,*distsq;
-
-  // loop over each atom, storing neighbors
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over all atoms in other bins in stencil including self
-    // only store pair if i < j
-    // skip if i,j neighbor cutoff is less than bin distance
-    // stores own/own pairs only once
-    // stores own/ghost pairs on both procs
-
-    ibin = atom2bin[i];
-    s = stencil_multi_old[itype];
-    distsq = distsq_multi_old[itype];
-    cutsq = cutneighsq[itype];
-    ns = nstencil_multi_old[itype];
-    for (k = 0; k < ns; k++) {
-      for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) {
-        if (j <= i) continue;
-        jtype = type[j];
-        if (cutsq[jtype] < distsq[k]) continue;
-
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-
-        if (rsq <= cutneighsq[itype][jtype]) {
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = j;
-            else if (domain->minimum_image_check(delx,dely,delz))
-              neighptr[n++] = j;
-            else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-          } else neighptr[n++] = j;
-        }
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-}
diff --git a/src/OPENMP/npair_half_multi_old_newtoff_omp.h b/src/OPENMP/npair_half_multi_old_newtoff_omp.h
deleted file mode 100644
index 26484d6c5b..0000000000
--- a/src/OPENMP/npair_half_multi_old_newtoff_omp.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/multi/old/newtoff/omp,
-           NPairHalfMultiOldNewtoffOmp,
-           NP_HALF | NP_MULTI_OLD | NP_NEWTOFF | NP_OMP | NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_MULTI_OLD_NEWTOFF_OMP_H
-#define LMP_NPAIR_HALF_MULTI_OLD_NEWTOFF_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfMultiOldNewtoffOmp : public NPair {
- public:
-  NPairHalfMultiOldNewtoffOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_half_multi_old_newton_omp.cpp b/src/OPENMP/npair_half_multi_old_newton_omp.cpp
deleted file mode 100644
index 29c551f17d..0000000000
--- a/src/OPENMP/npair_half_multi_old_newton_omp.cpp
+++ /dev/null
@@ -1,179 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "omp_compat.h"
-#include "npair_half_multi_old_newton_omp.h"
-#include "npair_omp.h"
-#include "neigh_list.h"
-#include "atom.h"
-#include "atom_vec.h"
-#include "molecule.h"
-#include "domain.h"
-#include "my_page.h"
-#include "error.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfMultiOldNewtonOmp::NPairHalfMultiOldNewtonOmp(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction with full Newton's 3rd law
-   each owned atom i checks its own bin and other bins in Newton stencil
-   multi-type stencil is itype dependent and is distance checked
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfMultiOldNewtonOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-
-  NPAIR_OMP_INIT;
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nlocal);
-
-  int i,j,k,n,itype,jtype,ibin,which,ns,imol,iatom;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr,*s;
-  double *cutsq,*distsq;
-
-  // loop over each atom, storing neighbors
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over rest of atoms in i's bin, ghosts are at end of linked list
-    // if j is owned atom, store it, since j is beyond i in linked list
-    // if j is ghost, only store if j coords are "above and to the right" of i
-
-    for (j = bins[i]; j >= 0; j = bins[j]) {
-      if (j >= nlocal) {
-        if (x[j][2] < ztmp) continue;
-        if (x[j][2] == ztmp) {
-          if (x[j][1] < ytmp) continue;
-          if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
-        }
-      }
-
-      jtype = type[j];
-      if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
-
-      if (rsq <= cutneighsq[itype][jtype]) {
-        if (molecular != Atom::ATOMIC) {
-          if (!moltemplate)
-            which = find_special(special[i],nspecial[i],tag[j]);
-          else if (imol >= 0)
-            which = find_special(onemols[imol]->special[iatom],
-                                 onemols[imol]->nspecial[iatom],
-                                 tag[j]-tagprev);
-          else which = 0;
-          if (which == 0) neighptr[n++] = j;
-          else if (domain->minimum_image_check(delx,dely,delz))
-            neighptr[n++] = j;
-          else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-        } else neighptr[n++] = j;
-      }
-    }
-
-    // loop over all atoms in other bins in stencil, store every pair
-    // skip if i,j neighbor cutoff is less than bin distance
-
-    ibin = atom2bin[i];
-    s = stencil_multi_old[itype];
-    distsq = distsq_multi_old[itype];
-    cutsq = cutneighsq[itype];
-    ns = nstencil_multi_old[itype];
-    for (k = 0; k < ns; k++) {
-      for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) {
-        jtype = type[j];
-        if (cutsq[jtype] < distsq[k]) continue;
-
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-
-        if (rsq <= cutneighsq[itype][jtype]) {
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = j;
-            else if (domain->minimum_image_check(delx,dely,delz))
-              neighptr[n++] = j;
-            else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-          } else neighptr[n++] = j;
-        }
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-}
diff --git a/src/OPENMP/npair_half_multi_old_newton_omp.h b/src/OPENMP/npair_half_multi_old_newton_omp.h
deleted file mode 100644
index 8182b49bd7..0000000000
--- a/src/OPENMP/npair_half_multi_old_newton_omp.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/multi/old/newton/omp,
-           NPairHalfMultiOldNewtonOmp,
-           NP_HALF | NP_MULTI_OLD | NP_NEWTON | NP_OMP | NP_ORTHO);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_MULTI_OLD_NEWTON_OMP_H
-#define LMP_NPAIR_HALF_MULTI_OLD_NEWTON_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfMultiOldNewtonOmp : public NPair {
- public:
-  NPairHalfMultiOldNewtonOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_half_multi_old_newton_tri_omp.cpp b/src/OPENMP/npair_half_multi_old_newton_tri_omp.cpp
deleted file mode 100644
index 38f645abad..0000000000
--- a/src/OPENMP/npair_half_multi_old_newton_tri_omp.cpp
+++ /dev/null
@@ -1,165 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "omp_compat.h"
-#include "npair_half_multi_old_newton_tri_omp.h"
-#include "npair_omp.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "force.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfMultiOldNewtonTriOmp::NPairHalfMultiOldNewtonTriOmp(LAMMPS *lmp) :
-  NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction with Newton's 3rd law for triclinic
-   each owned atom i checks its own bin and other bins in triclinic stencil
-   multi-type stencil is itype dependent and is distance checked
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfMultiOldNewtonTriOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-  const double delta = 0.01 * force->angstrom;
-
-  NPAIR_OMP_INIT;
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nlocal);
-
-  int i,j,k,n,itype,jtype,ibin,which,ns,imol,iatom;
-  tagint itag,jtag,tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr,*s;
-  double *cutsq,*distsq;
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    itag = tag[i];
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over all atoms in bins in stencil
-    // for triclinic, bin stencil is full in all 3 dims
-    // must use itag/jtag to eliminate half the I/J interactions
-    // cannot use I/J exact coord comparision
-    //   b/c transforming orthog -> lambda -> orthog for ghost atoms
-    //   with an added PBC offset can shift all 3 coords by epsilon
-
-    ibin = atom2bin[i];
-    s = stencil_multi_old[itype];
-    distsq = distsq_multi_old[itype];
-    cutsq = cutneighsq[itype];
-    ns = nstencil_multi_old[itype];
-    for (k = 0; k < ns; k++) {
-      for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) {
-        jtype = type[j];
-        if (cutsq[jtype] < distsq[k]) continue;
-
-        if (j >= nlocal) {
-          jtag = tag[j];
-          if (itag > jtag) {
-            if ((itag+jtag) % 2 == 0) continue;
-          } else if (itag < jtag) {
-            if ((itag+jtag) % 2 == 1) continue;
-          } else {
-            if (fabs(x[j][2]-ztmp) > delta) {
-              if (x[j][2] < ztmp) continue;
-            } else if (fabs(x[j][1]-ytmp) > delta) {
-              if (x[j][1] < ytmp) continue;
-            } else {
-              if (x[j][0] < xtmp) continue;
-            }
-          }
-        }
-
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-
-        if (rsq <= cutneighsq[itype][jtype]) {
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = j;
-            else if (domain->minimum_image_check(delx,dely,delz))
-              neighptr[n++] = j;
-            else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-          } else neighptr[n++] = j;
-        }
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-}
diff --git a/src/OPENMP/npair_half_multi_old_newton_tri_omp.h b/src/OPENMP/npair_half_multi_old_newton_tri_omp.h
deleted file mode 100644
index 5efb007dc1..0000000000
--- a/src/OPENMP/npair_half_multi_old_newton_tri_omp.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/multi/old/newton/tri/omp,
-           NPairHalfMultiOldNewtonTriOmp,
-           NP_HALF | NP_MULTI_OLD | NP_NEWTON | NP_TRI | NP_OMP);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_MULTI_OLD_NEWTON_TRI_OMP_H
-#define LMP_NPAIR_HALF_MULTI_OLD_NEWTON_TRI_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfMultiOldNewtonTriOmp : public NPair {
- public:
-  NPairHalfMultiOldNewtonTriOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_half_nsq_newtoff_ghost_omp.cpp b/src/OPENMP/npair_half_nsq_newtoff_ghost_omp.cpp
deleted file mode 100644
index 54de8b9607..0000000000
--- a/src/OPENMP/npair_half_nsq_newtoff_ghost_omp.cpp
+++ /dev/null
@@ -1,158 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "omp_compat.h"
-#include "npair_half_nsq_newtoff_ghost_omp.h"
-#include "npair_omp.h"
-#include "neigh_list.h"
-#include "atom.h"
-#include "atom_vec.h"
-#include "group.h"
-#include "molecule.h"
-#include "domain.h"
-#include "my_page.h"
-#include "error.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfNsqNewtoffGhostOmp::NPairHalfNsqNewtoffGhostOmp(LAMMPS *lmp) :
-  NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   N^2 / 2 search for neighbor pairs with partial Newton's 3rd law
-   include neighbors of ghost atoms, but no "special neighbors" for ghosts
-   pair stored once if i,j are both owned and i < j
-   pair stored by me if i owned and j ghost (also stored by proc owning j)
-   pair stored once if i,j are both ghost and i < j
-------------------------------------------------------------------------- */
-
-void NPairHalfNsqNewtoffGhostOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0;
-  const int nall = nlocal + atom->nghost;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-
-  NPAIR_OMP_INIT;
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nall);
-
-  int i,j,n,itype,jtype,which,imol,iatom;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr;
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  // loop over owned & ghost atoms, storing neighbors
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over remaining atoms, owned and ghost
-    // only store pair if i < j
-    // stores own/own pairs only once
-    // stores own/ghost pairs with owned atom only, on both procs
-    // stores ghost/ghost pairs only once
-    // no molecular test when i = ghost atom
-
-    if (i < nlocal) {
-      for (j = i+1; j < nall; j++) {
-        if (includegroup && !(mask[j] & bitmask)) continue;
-        jtype = type[j];
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-
-        if (rsq <= cutneighsq[itype][jtype]) {
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = j;
-            else if (domain->minimum_image_check(delx,dely,delz))
-              neighptr[n++] = j;
-            else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-          } else neighptr[n++] = j;
-        }
-      }
-
-    } else {
-      for (j = i+1; j < nall; j++) {
-        if (includegroup && !(mask[j] & bitmask)) continue;
-        jtype = type[j];
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-
-        if (rsq <= cutneighsq[itype][jtype]) neighptr[n++] = j;
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = atom->nlocal;
-  list->gnum = nall - atom->nlocal;
-}
diff --git a/src/OPENMP/npair_half_nsq_newtoff_omp.cpp b/src/OPENMP/npair_half_nsq_newtoff_omp.cpp
deleted file mode 100644
index 54a90d9f2b..0000000000
--- a/src/OPENMP/npair_half_nsq_newtoff_omp.cpp
+++ /dev/null
@@ -1,134 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "omp_compat.h"
-#include "npair_half_nsq_newtoff_omp.h"
-#include "npair_omp.h"
-#include "neigh_list.h"
-#include "atom.h"
-#include "atom_vec.h"
-#include "group.h"
-#include "molecule.h"
-#include "domain.h"
-#include "my_page.h"
-#include "error.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfNsqNewtoffOmp::NPairHalfNsqNewtoffOmp(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   N^2 / 2 search for neighbor pairs with partial Newton's 3rd law
-   pair stored once if i,j are both owned and i < j
-   pair stored by me if j is ghost (also stored by proc owning j)
-------------------------------------------------------------------------- */
-
-void NPairHalfNsqNewtoffOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0;
-  const int nall = atom->nlocal + atom->nghost;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-
-  NPAIR_OMP_INIT;
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nlocal);
-
-  int i,j,n,itype,jtype,which,imol,iatom;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr;
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  // loop over owned atoms, storing neighbors
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over remaining atoms, owned and ghost
-    // only store pair if i < j
-
-    for (j = i+1; j < nall; j++) {
-      if (includegroup && !(mask[j] & bitmask)) continue;
-      jtype = type[j];
-      if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
-
-      if (rsq <= cutneighsq[itype][jtype]) {
-        if (molecular != Atom::ATOMIC) {
-          if (!moltemplate)
-            which = find_special(special[i],nspecial[i],tag[j]);
-          else if (imol >= 0)
-            which = find_special(onemols[imol]->special[iatom],
-                                 onemols[imol]->nspecial[iatom],
-                                 tag[j]-tagprev);
-          else which = 0;
-          if (which == 0) neighptr[n++] = j;
-          else if (domain->minimum_image_check(delx,dely,delz))
-            neighptr[n++] = j;
-          else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-        } else neighptr[n++] = j;
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-}
diff --git a/src/OPENMP/npair_half_nsq_newtoff_omp.h b/src/OPENMP/npair_half_nsq_newtoff_omp.h
deleted file mode 100644
index 47a03750f7..0000000000
--- a/src/OPENMP/npair_half_nsq_newtoff_omp.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/nsq/newtoff/omp,
-           NPairHalfNsqNewtoffOmp,
-           NP_HALF | NP_NSQ | NP_NEWTOFF | NP_OMP | NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_NSQ_NEWTOFF_OMP_H
-#define LMP_NPAIR_HALF_NSQ_NEWTOFF_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfNsqNewtoffOmp : public NPair {
- public:
-  NPairHalfNsqNewtoffOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_half_nsq_newton_omp.cpp b/src/OPENMP/npair_half_nsq_newton_omp.cpp
deleted file mode 100644
index 42cf63278a..0000000000
--- a/src/OPENMP/npair_half_nsq_newton_omp.cpp
+++ /dev/null
@@ -1,167 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "omp_compat.h"
-#include "npair_half_nsq_newton_omp.h"
-#include "npair_omp.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "force.h"
-#include "group.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfNsqNewtonOmp::NPairHalfNsqNewtonOmp(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   N^2 / 2 search for neighbor pairs with full Newton's 3rd law
-   every pair stored exactly once by some processor
-   decision on ghost atoms based on itag,jtag tests
-------------------------------------------------------------------------- */
-
-void NPairHalfNsqNewtonOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-  const double delta = 0.01 * force->angstrom;
-  const int triclinic = domain->triclinic;
-
-  NPAIR_OMP_INIT;
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nlocal);
-
-  int i,j,n,itype,jtype,which,imol,iatom;
-  tagint itag,jtag,tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr;
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int nall = atom->nlocal + atom->nghost;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    itag = tag[i];
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over remaining atoms, owned and ghost
-    // use itag/jtap comparision to eliminate half the interactions
-    // itag = jtag is possible for long cutoffs that include images of self
-    // for triclinic, must use delta to eliminate half the I/J interactions
-    // cannot use I/J exact coord comparision as for orthog
-    //   b/c transforming orthog -> lambda -> orthog for ghost atoms
-    //   with an added PBC offset can shift all 3 coords by epsilon
-
-    for (j = i+1; j < nall; j++) {
-      if (includegroup && !(mask[j] & bitmask)) continue;
-
-      if (j >= nlocal) {
-        jtag = tag[j];
-        if (itag > jtag) {
-          if ((itag+jtag) % 2 == 0) continue;
-        } else if (itag < jtag) {
-          if ((itag+jtag) % 2 == 1) continue;
-        } else if (triclinic) {
-          if (fabs(x[j][2]-ztmp) > delta) {
-            if (x[j][2] < ztmp) continue;
-          } else if (fabs(x[j][1]-ytmp) > delta) {
-            if (x[j][1] < ytmp) continue;
-          } else {
-            if (x[j][0] < xtmp) continue;
-          }
-        } else {
-          if (x[j][2] < ztmp) continue;
-          if (x[j][2] == ztmp) {
-            if (x[j][1] < ytmp) continue;
-            if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
-          }
-        }
-      }
-
-      jtype = type[j];
-      if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
-
-      if (rsq <= cutneighsq[itype][jtype]) {
-        if (molecular != Atom::ATOMIC) {
-          if (!moltemplate)
-            which = find_special(special[i],nspecial[i],tag[j]);
-          else if (imol >= 0)
-            which = find_special(onemols[imol]->special[iatom],
-                                 onemols[imol]->nspecial[iatom],
-                                 tag[j]-tagprev);
-          else which = 0;
-          if (which == 0) neighptr[n++] = j;
-          else if (domain->minimum_image_check(delx,dely,delz))
-            neighptr[n++] = j;
-          else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-        } else neighptr[n++] = j;
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-}
diff --git a/src/OPENMP/npair_half_nsq_newton_omp.h b/src/OPENMP/npair_half_nsq_newton_omp.h
deleted file mode 100644
index 00e975d389..0000000000
--- a/src/OPENMP/npair_half_nsq_newton_omp.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/nsq/newton/omp,
-           NPairHalfNsqNewtonOmp,
-           NP_HALF | NP_NSQ | NP_NEWTON | NP_OMP | NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_NSQ_NEWTON_OMP_H
-#define LMP_NPAIR_HALF_NSQ_NEWTON_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfNsqNewtonOmp : public NPair {
- public:
-  NPairHalfNsqNewtonOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_half_respa_bin_newtoff_omp.cpp b/src/OPENMP/npair_half_respa_bin_newtoff_omp.cpp
deleted file mode 100644
index e1d4ee59b1..0000000000
--- a/src/OPENMP/npair_half_respa_bin_newtoff_omp.cpp
+++ /dev/null
@@ -1,203 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "omp_compat.h"
-#include "npair_half_respa_bin_newtoff_omp.h"
-#include "npair_omp.h"
-#include "neigh_list.h"
-#include "atom.h"
-#include "atom_vec.h"
-#include "molecule.h"
-#include "domain.h"
-#include "my_page.h"
-#include "error.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfRespaBinNewtoffOmp::NPairHalfRespaBinNewtoffOmp(LAMMPS *lmp) :
-  NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   multiple respa lists
-   binned neighbor list construction with partial Newton's 3rd law
-   each owned atom i checks own bin and surrounding bins in non-Newton stencil
-   pair stored once if i,j are both owned and i < j
-   pair stored by me if j is ghost (also stored by proc owning j)
-------------------------------------------------------------------------- */
-
-void NPairHalfRespaBinNewtoffOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-
-  NPAIR_OMP_INIT;
-
-  const int respamiddle = list->respamiddle;
-
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nlocal);
-
-  int i,j,k,n,itype,jtype,ibin,n_inner,n_middle,imol,iatom;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr,*neighptr_inner,*neighptr_middle;
-
-  // loop over each atom, storing neighbors
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  int *ilist_inner = list->ilist_inner;
-  int *numneigh_inner = list->numneigh_inner;
-  int **firstneigh_inner = list->firstneigh_inner;
-
-  int *ilist_middle,*numneigh_middle,**firstneigh_middle;
-  if (respamiddle) {
-    ilist_middle = list->ilist_middle;
-    numneigh_middle = list->numneigh_middle;
-    firstneigh_middle = list->firstneigh_middle;
-  }
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  MyPage<int> &ipage_inner = list->ipage_inner[tid];
-  ipage.reset();
-  ipage_inner.reset();
-
-  MyPage<int> *ipage_middle;
-  if (respamiddle) {
-    ipage_middle = list->ipage_middle + tid;
-    ipage_middle->reset();
-  }
-
-  int which = 0;
-  int minchange = 0;
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = n_inner = 0;
-    neighptr = ipage.vget();
-    neighptr_inner = ipage_inner.vget();
-    if (respamiddle) {
-      n_middle = 0;
-      neighptr_middle = ipage_middle->vget();
-    }
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    ibin = atom2bin[i];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over all atoms in surrounding bins in stencil including self
-    // only store pair if i < j
-    // stores own/own pairs only once
-    // stores own/ghost pairs on both procs
-
-    for (k = 0; k < nstencil; k++) {
-      for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
-        if (j <= i) continue;
-
-        jtype = type[j];
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-
-        if (rsq <= cutneighsq[itype][jtype]) {
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = j;
-            else if ((minchange = domain->minimum_image_check(delx,dely,delz)))
-              neighptr[n++] = j;
-            else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-          } else neighptr[n++] = j;
-
-          if (rsq < cut_inner_sq) {
-            if (which == 0) neighptr_inner[n_inner++] = j;
-            else if (minchange) neighptr_inner[n_inner++] = j;
-            else if (which > 0)
-              neighptr_inner[n_inner++] = j ^ (which << SBBITS);
-          }
-
-          if (respamiddle &&
-              rsq < cut_middle_sq && rsq > cut_middle_inside_sq) {
-            if (which == 0) neighptr_middle[n_middle++] = j;
-            else if (minchange) neighptr_middle[n_middle++] = j;
-            else if (which > 0)
-              neighptr_middle[n_middle++] = j ^ (which << SBBITS);
-          }
-        }
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-
-    ilist_inner[i] = i;
-    firstneigh_inner[i] = neighptr_inner;
-    numneigh_inner[i] = n_inner;
-    ipage.vgot(n_inner);
-    if (ipage_inner.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-
-    if (respamiddle) {
-      ilist_middle[i] = i;
-      firstneigh_middle[i] = neighptr_middle;
-      numneigh_middle[i] = n_middle;
-      ipage_middle->vgot(n_middle);
-      if (ipage_middle->status())
-        error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-    }
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-  list->inum_inner = nlocal;
-  if (respamiddle) list->inum_middle = nlocal;
-}
diff --git a/src/OPENMP/npair_half_respa_bin_newtoff_omp.h b/src/OPENMP/npair_half_respa_bin_newtoff_omp.h
deleted file mode 100644
index 8ad6209d21..0000000000
--- a/src/OPENMP/npair_half_respa_bin_newtoff_omp.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/respa/bin/newtoff/omp,
-           NPairHalfRespaBinNewtoffOmp,
-           NP_HALF | NP_RESPA | NP_BIN | NP_NEWTOFF | NP_OMP |
-           NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_RESPA_BIN_NEWTOFF_OMP_H
-#define LMP_NPAIR_HALF_RESPA_BIN_NEWTOFF_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfRespaBinNewtoffOmp : public NPair {
- public:
-  NPairHalfRespaBinNewtoffOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_half_respa_bin_newton_omp.cpp b/src/OPENMP/npair_half_respa_bin_newton_omp.cpp
deleted file mode 100644
index ce8a103170..0000000000
--- a/src/OPENMP/npair_half_respa_bin_newton_omp.cpp
+++ /dev/null
@@ -1,249 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "omp_compat.h"
-#include "npair_half_respa_bin_newton_omp.h"
-#include "npair_omp.h"
-#include "neigh_list.h"
-#include "atom.h"
-#include "atom_vec.h"
-#include "molecule.h"
-#include "domain.h"
-#include "my_page.h"
-#include "error.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfRespaBinNewtonOmp::NPairHalfRespaBinNewtonOmp(LAMMPS *lmp) :
-  NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   multiple respa lists
-   binned neighbor list construction with full Newton's 3rd law
-   each owned atom i checks its own bin and other bins in Newton stencil
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfRespaBinNewtonOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-
-  NPAIR_OMP_INIT;
-
-  const int respamiddle = list->respamiddle;
-
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nlocal);
-
-  int i,j,k,n,itype,jtype,ibin,n_inner,n_middle,imol,iatom;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr,*neighptr_inner,*neighptr_middle;
-
-  // loop over each atom, storing neighbors
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  int *ilist_inner = list->ilist_inner;
-  int *numneigh_inner = list->numneigh_inner;
-  int **firstneigh_inner = list->firstneigh_inner;
-
-  int *ilist_middle,*numneigh_middle,**firstneigh_middle;
-  if (respamiddle) {
-    ilist_middle = list->ilist_middle;
-    numneigh_middle = list->numneigh_middle;
-    firstneigh_middle = list->firstneigh_middle;
-  }
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  MyPage<int> &ipage_inner = list->ipage_inner[tid];
-  ipage.reset();
-  ipage_inner.reset();
-
-  MyPage<int> *ipage_middle;
-  if (respamiddle) {
-    ipage_middle = list->ipage_middle + tid;
-    ipage_middle->reset();
-  }
-
-  int which = 0;
-  int minchange = 0;
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = n_inner = 0;
-    neighptr = ipage.vget();
-    neighptr_inner = ipage_inner.vget();
-    if (respamiddle) {
-      n_middle = 0;
-      neighptr_middle = ipage_middle->vget();
-    }
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over rest of atoms in i's bin, ghosts are at end of linked list
-    // if j is owned atom, store it, since j is beyond i in linked list
-    // if j is ghost, only store if j coords are "above and to the right" of i
-
-    for (j = bins[i]; j >= 0; j = bins[j]) {
-      if (j >= nlocal) {
-        if (x[j][2] < ztmp) continue;
-        if (x[j][2] == ztmp) {
-          if (x[j][1] < ytmp) continue;
-          if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
-        }
-      }
-
-      jtype = type[j];
-      if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
-
-      if (rsq <= cutneighsq[itype][jtype]) {
-        if (molecular != Atom::ATOMIC) {
-          if (!moltemplate)
-            which = find_special(special[i],nspecial[i],tag[j]);
-          else if (imol >= 0)
-            which = find_special(onemols[imol]->special[iatom],
-                                 onemols[imol]->nspecial[iatom],
-                                 tag[j]-tagprev);
-            else which = 0;
-          if (which == 0) neighptr[n++] = j;
-          else if ((minchange = domain->minimum_image_check(delx,dely,delz)))
-            neighptr[n++] = j;
-          else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-        } else neighptr[n++] = j;
-
-        if (rsq < cut_inner_sq) {
-          if (which == 0) neighptr_inner[n_inner++] = j;
-          else if (minchange) neighptr_inner[n_inner++] = j;
-          else if (which > 0) neighptr_inner[n_inner++] = j ^ (which << SBBITS);
-        }
-
-        if (respamiddle &&
-            rsq < cut_middle_sq && rsq > cut_middle_inside_sq) {
-          if (which == 0) neighptr_middle[n_middle++] = j;
-          else if (minchange) neighptr_middle[n_middle++] = j;
-          else if (which > 0)
-            neighptr_middle[n_middle++] = j ^ (which << SBBITS);
-        }
-      }
-    }
-
-    // loop over all atoms in other bins in stencil, store every pair
-
-    ibin = atom2bin[i];
-    for (k = 0; k < nstencil; k++) {
-      for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
-        jtype = type[j];
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-
-        if (rsq <= cutneighsq[itype][jtype]) {
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = j;
-            else if ((minchange = domain->minimum_image_check(delx,dely,delz)))
-              neighptr[n++] = j;
-            else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-          } else neighptr[n++] = j;
-
-          if (rsq < cut_inner_sq) {
-            if (which == 0) neighptr_inner[n_inner++] = j;
-            else if (minchange) neighptr_inner[n_inner++] = j;
-            else if (which > 0)
-              neighptr_inner[n_inner++] = j ^ (which << SBBITS);
-          }
-
-          if (respamiddle &&
-              rsq < cut_middle_sq && rsq > cut_middle_inside_sq) {
-            if (which == 0) neighptr_middle[n_middle++] = j;
-            else if (minchange) neighptr_middle[n_middle++] = j;
-            else if (which > 0)
-              neighptr_middle[n_middle++] = j ^ (which << SBBITS);
-          }
-        }
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-
-    ilist_inner[i] = i;
-    firstneigh_inner[i] = neighptr_inner;
-    numneigh_inner[i] = n_inner;
-    ipage.vgot(n_inner);
-    if (ipage_inner.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-
-    if (respamiddle) {
-      ilist_middle[i] = i;
-      firstneigh_middle[i] = neighptr_middle;
-      numneigh_middle[i] = n_middle;
-      ipage_middle->vgot(n_middle);
-      if (ipage_middle->status())
-        error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-    }
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-  list->inum_inner = nlocal;
-  if (respamiddle) list->inum_middle = nlocal;
-}
diff --git a/src/OPENMP/npair_half_respa_bin_newton_tri_omp.h b/src/OPENMP/npair_half_respa_bin_newton_tri_omp.h
deleted file mode 100644
index df45372960..0000000000
--- a/src/OPENMP/npair_half_respa_bin_newton_tri_omp.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/respa/bin/newton/tri/omp,
-           NPairHalfRespaBinNewtonTriOmp,
-           NP_HALF | NP_RESPA | NP_BIN | NP_NEWTON | NP_TRI | NP_OMP);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_RESPA_BIN_NEWTON_TRI_OMP_H
-#define LMP_NPAIR_HALF_RESPA_BIN_NEWTON_TRI_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfRespaBinNewtonTriOmp : public NPair {
- public:
-  NPairHalfRespaBinNewtonTriOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_half_respa_nsq_newtoff_omp.cpp b/src/OPENMP/npair_half_respa_nsq_newtoff_omp.cpp
deleted file mode 100644
index 428ca778e8..0000000000
--- a/src/OPENMP/npair_half_respa_nsq_newtoff_omp.cpp
+++ /dev/null
@@ -1,197 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "omp_compat.h"
-#include "npair_half_respa_nsq_newtoff_omp.h"
-#include "npair_omp.h"
-#include "neigh_list.h"
-#include "atom.h"
-#include "atom_vec.h"
-#include "group.h"
-#include "molecule.h"
-#include "domain.h"
-#include "my_page.h"
-#include "error.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfRespaNsqNewtoffOmp::NPairHalfRespaNsqNewtoffOmp(LAMMPS *lmp) :
-  NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   multiple respa lists
-   N^2 / 2 search for neighbor pairs with partial Newton's 3rd law
-   pair added to list if atoms i and j are both owned and i < j
-   pair added if j is ghost (also stored by proc owning j)
-------------------------------------------------------------------------- */
-
-void NPairHalfRespaNsqNewtoffOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-
-  NPAIR_OMP_INIT;
-
-  const int respamiddle = list->respamiddle;
-
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nlocal);
-
-  int i,j,n,itype,jtype,n_inner,n_middle,imol,iatom;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr,*neighptr_inner,*neighptr_middle;
-
-  // loop over each atom, storing neighbors
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int nall = atom->nlocal + atom->nghost;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  int *ilist_inner = list->ilist_inner;
-  int *numneigh_inner = list->numneigh_inner;
-  int **firstneigh_inner = list->firstneigh_inner;
-
-  int *ilist_middle,*numneigh_middle,**firstneigh_middle;
-  if (respamiddle) {
-    ilist_middle = list->ilist_middle;
-    numneigh_middle = list->numneigh_middle;
-    firstneigh_middle = list->firstneigh_middle;
-  }
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  MyPage<int> &ipage_inner = list->ipage_inner[tid];
-  ipage.reset();
-  ipage_inner.reset();
-
-  MyPage<int> *ipage_middle;
-  if (respamiddle) {
-    ipage_middle = list->ipage_middle + tid;
-    ipage_middle->reset();
-  }
-
-  int which = 0;
-  int minchange = 0;
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = n_inner = 0;
-    neighptr = ipage.vget();
-    neighptr_inner = ipage_inner.vget();
-    if (respamiddle) {
-      n_middle = 0;
-      neighptr_middle = ipage_middle->vget();
-    }
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over remaining atoms, owned and ghost
-
-    for (j = i+1; j < nall; j++) {
-      if (includegroup && !(mask[j] & bitmask)) continue;
-      jtype = type[j];
-      if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
-
-      if (rsq <= cutneighsq[itype][jtype]) {
-        if (molecular != Atom::ATOMIC) {
-          if (!moltemplate)
-            which = find_special(special[i],nspecial[i],tag[j]);
-          else if (imol >= 0)
-            which = find_special(onemols[imol]->special[iatom],
-                                 onemols[imol]->nspecial[iatom],
-                                 tag[j]-tagprev);
-          else which = 0;
-          if (which == 0) neighptr[n++] = j;
-          else if ((minchange = domain->minimum_image_check(delx,dely,delz)))
-            neighptr[n++] = j;
-          else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-        } else neighptr[n++] = j;
-
-        if (rsq < cut_inner_sq) {
-          if (which == 0) neighptr_inner[n_inner++] = j;
-          else if (minchange) neighptr_inner[n_inner++] = j;
-          else if (which > 0) neighptr_inner[n_inner++] = j ^ (which << SBBITS);
-        }
-
-        if (respamiddle && rsq < cut_middle_sq && rsq > cut_middle_inside_sq) {
-          if (which == 0) neighptr_middle[n_middle++] = j;
-          else if (minchange) neighptr_middle[n_middle++] = j;
-          else if (which > 0)
-            neighptr_middle[n_middle++] = j ^ (which << SBBITS);
-        }
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-
-    ilist_inner[i] = i;
-    firstneigh_inner[i] = neighptr_inner;
-    numneigh_inner[i] = n_inner;
-    ipage.vgot(n_inner);
-    if (ipage_inner.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-
-    if (respamiddle) {
-      ilist_middle[i] = i;
-      firstneigh_middle[i] = neighptr_middle;
-      numneigh_middle[i] = n_middle;
-      ipage_middle->vgot(n_middle);
-      if (ipage_middle->status())
-        error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-    }
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-  list->inum_inner = nlocal;
-  if (respamiddle) list->inum_middle = nlocal;
-}
diff --git a/src/OPENMP/npair_half_respa_nsq_newton_omp.h b/src/OPENMP/npair_half_respa_nsq_newton_omp.h
deleted file mode 100644
index d949ef7e93..0000000000
--- a/src/OPENMP/npair_half_respa_nsq_newton_omp.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/respa/nsq/newton/omp,
-           NPairHalfRespaNsqNewtonOmp,
-           NP_HALF | NP_RESPA | NP_NSQ | NP_NEWTON | NP_OMP |
-           NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_RESPA_NSQ_NEWTON_OMP_H
-#define LMP_NPAIR_HALF_RESPA_NSQ_NEWTON_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfRespaNsqNewtonOmp : public NPair {
- public:
-  NPairHalfRespaNsqNewtonOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_half_size_bin_newtoff_omp.cpp b/src/OPENMP/npair_half_size_bin_newtoff_omp.cpp
deleted file mode 100644
index 478e28a5f4..0000000000
--- a/src/OPENMP/npair_half_size_bin_newtoff_omp.cpp
+++ /dev/null
@@ -1,151 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_size_bin_newtoff_omp.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neigh_list.h"
-#include "npair_omp.h"
-
-#include "omp_compat.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfSizeBinNewtoffOmp::NPairHalfSizeBinNewtoffOmp(LAMMPS *lmp) :
-  NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   size particles
-   binned neighbor list construction with partial Newton's 3rd law
-   each owned atom i checks own bin and surrounding bins in non-Newton stencil
-   pair stored once if i,j are both owned and i < j
-   pair stored by me if j is ghost (also stored by proc owning j)
-------------------------------------------------------------------------- */
-
-void NPairHalfSizeBinNewtoffOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-  const int history = list->history;
-  const int mask_history = 1 << HISTBITS;
-
-  NPAIR_OMP_INIT;
-
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nlocal);
-
-  int i,j,jh,k,n,ibin,which,imol,iatom;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  double radi,radsum,cutsq;
-  int *neighptr;
-
-  // loop over each atom, storing neighbors
-
-  double **x = atom->x;
-  double *radius = atom->radius;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    radi = radius[i];
-    ibin = atom2bin[i];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over all atoms in surrounding bins in stencil including self
-    // only store pair if i < j
-    // stores own/own pairs only once
-    // stores own/ghost pairs on both procs
-
-    for (k = 0; k < nstencil; k++) {
-      for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
-        if (j <= i) continue;
-        if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-        radsum = radi + radius[j];
-        cutsq = (radsum+skin) * (radsum+skin);
-
-        if (rsq <= cutsq) {
-          jh = j;
-          if (history && rsq < radsum*radsum)
-            jh = jh ^ mask_history;
-
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = jh;
-            else if (domain->minimum_image_check(delx,dely,delz))
-              neighptr[n++] = jh;
-            else if (which > 0) neighptr[n++] = jh ^ (which << SBBITS);
-          } else neighptr[n++] = jh;
-        }
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-}
diff --git a/src/OPENMP/npair_half_size_bin_newtoff_omp.h b/src/OPENMP/npair_half_size_bin_newtoff_omp.h
deleted file mode 100644
index a91836b152..0000000000
--- a/src/OPENMP/npair_half_size_bin_newtoff_omp.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/size/bin/newtoff/omp,
-           NPairHalfSizeBinNewtoffOmp,
-           NP_HALF | NP_SIZE | NP_BIN | NP_NEWTOFF | NP_OMP |
-           NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_SIZE_BIN_NEWTOFF_OMP_H
-#define LMP_NPAIR_HALF_SIZE_BIN_NEWTOFF_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfSizeBinNewtoffOmp : public NPair {
- public:
-  NPairHalfSizeBinNewtoffOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_half_size_bin_newton_omp.cpp b/src/OPENMP/npair_half_size_bin_newton_omp.cpp
deleted file mode 100644
index dba392781e..0000000000
--- a/src/OPENMP/npair_half_size_bin_newton_omp.cpp
+++ /dev/null
@@ -1,187 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_size_bin_newton_omp.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neigh_list.h"
-#include "npair_omp.h"
-
-#include "omp_compat.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfSizeBinNewtonOmp::NPairHalfSizeBinNewtonOmp(LAMMPS *lmp) :
-  NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   size particles
-   binned neighbor list construction with full Newton's 3rd law
-   each owned atom i checks its own bin and other bins in Newton stencil
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfSizeBinNewtonOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-  const int history = list->history;
-  const int mask_history = 1 << HISTBITS;
-
-  NPAIR_OMP_INIT;
-
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nlocal);
-
-  int i,j,jh,k,n,ibin,which,imol,iatom;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  double radi,radsum,cutsq;
-  int *neighptr;
-
-  double **x = atom->x;
-  double *radius = atom->radius;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    radi = radius[i];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over rest of atoms in i's bin, ghosts are at end of linked list
-    // if j is owned atom, store it, since j is beyond i in linked list
-    // if j is ghost, only store if j coords are "above and to the right" of i
-
-    for (j = bins[i]; j >= 0; j = bins[j]) {
-      if (j >= nlocal) {
-        if (x[j][2] < ztmp) continue;
-        if (x[j][2] == ztmp) {
-          if (x[j][1] < ytmp) continue;
-          if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
-        }
-      }
-
-      if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
-      radsum = radi + radius[j];
-      cutsq = (radsum+skin) * (radsum+skin);
-
-      if (rsq <= cutsq) {
-        jh = j;
-        if (history && rsq < radsum*radsum)
-          jh = jh ^ mask_history;
-
-        if (molecular != Atom::ATOMIC) {
-          if (!moltemplate)
-            which = find_special(special[i],nspecial[i],tag[j]);
-          else if (imol >= 0)
-            which = find_special(onemols[imol]->special[iatom],
-                                 onemols[imol]->nspecial[iatom],
-                                 tag[j]-tagprev);
-          else which = 0;
-          if (which == 0) neighptr[n++] = jh;
-          else if (domain->minimum_image_check(delx,dely,delz))
-            neighptr[n++] = jh;
-          else if (which > 0) neighptr[n++] = jh ^ (which << SBBITS);
-        } else neighptr[n++] = jh;
-      }
-    }
-
-    // loop over all atoms in other bins in stencil, store every pair
-
-    ibin = atom2bin[i];
-    for (k = 0; k < nstencil; k++) {
-      for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
-        if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-        radsum = radi + radius[j];
-        cutsq = (radsum+skin) * (radsum+skin);
-
-        if (rsq <= cutsq) {
-          jh = j;
-          if (history && rsq < radsum*radsum)
-            jh = jh ^ mask_history;
-
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = jh;
-            else if (domain->minimum_image_check(delx,dely,delz))
-              neighptr[n++] = jh;
-            else if (which > 0) neighptr[n++] = jh ^ (which << SBBITS);
-          } else neighptr[n++] = jh;
-        }
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-}
diff --git a/src/OPENMP/npair_half_size_bin_newton_omp.h b/src/OPENMP/npair_half_size_bin_newton_omp.h
deleted file mode 100644
index efc554bb3c..0000000000
--- a/src/OPENMP/npair_half_size_bin_newton_omp.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/size/bin/newton/omp,
-           NPairHalfSizeBinNewtonOmp,
-           NP_HALF | NP_SIZE | NP_BIN | NP_NEWTON | NP_OMP | NP_ORTHO);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_SIZE_BIN_NEWTON_OMP_H
-#define LMP_NPAIR_HALF_SIZE_BIN_NEWTON_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfSizeBinNewtonOmp : public NPair {
- public:
-  NPairHalfSizeBinNewtonOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_half_size_bin_newton_tri_omp.cpp b/src/OPENMP/npair_half_size_bin_newton_tri_omp.cpp
deleted file mode 100644
index 7fcf07e9c8..0000000000
--- a/src/OPENMP/npair_half_size_bin_newton_tri_omp.cpp
+++ /dev/null
@@ -1,170 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_size_bin_newton_tri_omp.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "force.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neigh_list.h"
-#include "npair_omp.h"
-
-#include "omp_compat.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfSizeBinNewtonTriOmp::NPairHalfSizeBinNewtonTriOmp(LAMMPS *lmp) :
-  NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   size particles
-   binned neighbor list construction with Newton's 3rd law for triclinic
-   each owned atom i checks its own bin and other bins in triclinic stencil
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfSizeBinNewtonTriOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-  const int history = list->history;
-  const int mask_history = 1 << HISTBITS;
-  const double delta = 0.01 * force->angstrom;
-
-  NPAIR_OMP_INIT;
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nlocal);
-
-  int i,j,jh,k,n,ibin,which,imol,iatom;
-  tagint itag,jtag,tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  double radi,radsum,cutsq;
-  int *neighptr;
-
-  double **x = atom->x;
-  double *radius = atom->radius;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    itag = tag[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    radi = radius[i];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over all atoms in bins in stencil
-    // for triclinic, bin stencil is full in all 3 dims
-    // must use itag/jtag to eliminate half the I/J interactions
-    // cannot use I/J exact coord comparision
-    //   b/c transforming orthog -> lambda -> orthog for ghost atoms
-    //   with an added PBC offset can shift all 3 coords by epsilon
-
-    ibin = atom2bin[i];
-    for (k = 0; k < nstencil; k++) {
-      for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
-
-        if (j <= i) continue;
-        if (j >= nlocal) {
-          jtag = tag[j];
-          if (itag > jtag) {
-            if ((itag+jtag) % 2 == 0) continue;
-          } else if (itag < jtag) {
-            if ((itag+jtag) % 2 == 1) continue;
-          } else {
-            if (fabs(x[j][2]-ztmp) > delta) {
-              if (x[j][2] < ztmp) continue;
-            } else if (fabs(x[j][1]-ytmp) > delta) {
-              if (x[j][1] < ytmp) continue;
-            } else {
-              if (x[j][0] < xtmp) continue;
-            }
-          }
-        }
-
-        if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-        radsum = radi + radius[j];
-        cutsq = (radsum+skin) * (radsum+skin);
-
-        if (rsq <= cutsq) {
-          jh = j;
-          if (history && rsq < radsum*radsum)
-            jh = jh ^ mask_history;
-
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = jh;
-            else if (domain->minimum_image_check(delx,dely,delz))
-              neighptr[n++] = jh;
-            else if (which > 0) neighptr[n++] = jh ^ (which << SBBITS);
-          } else neighptr[n++] = jh;
-        }
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-}
diff --git a/src/OPENMP/npair_half_size_bin_newton_tri_omp.h b/src/OPENMP/npair_half_size_bin_newton_tri_omp.h
deleted file mode 100644
index 65b46395ca..0000000000
--- a/src/OPENMP/npair_half_size_bin_newton_tri_omp.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/size/bin/newton/tri/omp,
-           NPairHalfSizeBinNewtonTriOmp,
-           NP_HALF | NP_SIZE | NP_BIN | NP_NEWTON | NP_TRI | NP_OMP);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_SIZE_BIN_NEWTON_TRI_OMP_H
-#define LMP_NPAIR_HALF_SIZE_BIN_NEWTON_TRI_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfSizeBinNewtonTriOmp : public NPair {
- public:
-  NPairHalfSizeBinNewtonTriOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_half_size_multi_newtoff_omp.cpp b/src/OPENMP/npair_half_size_multi_newtoff_omp.cpp
deleted file mode 100644
index ba0dfc16be..0000000000
--- a/src/OPENMP/npair_half_size_multi_newtoff_omp.cpp
+++ /dev/null
@@ -1,172 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_size_multi_newtoff_omp.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neighbor.h"
-#include "neigh_list.h"
-#include "npair_omp.h"
-
-#include "omp_compat.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfSizeMultiNewtoffOmp::NPairHalfSizeMultiNewtoffOmp(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   size particles
-   binned neighbor list construction with partial Newton's 3rd law
-   multi stencil is icollection-jcollection dependent
-   each owned atom i checks own bin and other bins in stencil
-   pair stored once if i,j are both owned and i < j
-   pair stored by me if j is ghost (also stored by proc owning j)
-------------------------------------------------------------------------- */
-
-void NPairHalfSizeMultiNewtoffOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-  const int history = list->history;
-  const int mask_history = 1 << HISTBITS;
-
-  NPAIR_OMP_INIT;
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nlocal);
-
-  int i,j,jh,k,n,itype,jtype,icollection,jcollection,ibin,jbin,ns;
-  int which,imol,iatom;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  double radi,radsum,cutdistsq;
-  int *neighptr,*s;
-  int js;
-
-  // loop over each atom, storing neighbors
-
-  int *collection = neighbor->collection;
-  double **x = atom->x;
-  double *radius = atom->radius;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    itype = type[i];
-    icollection = collection[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    radi = radius[i];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    ibin = atom2bin[i];
-
-    // loop through stencils for all collections
-    for (jcollection = 0; jcollection < ncollections; jcollection++) {
-
-      // if same collection use own bin
-      if(icollection == jcollection) jbin = ibin;
-          else jbin = coord2bin(x[i], jcollection);
-
-      // loop over all atoms in other bins in stencil including self
-      // only store pair if i < j
-      // stores own/own pairs only once
-      // stores own/ghost pairs on both procs
-      // use full stencil for all collection combinations
-
-      s = stencil_multi[icollection][jcollection];
-      ns = nstencil_multi[icollection][jcollection];
-
-      for (k = 0; k < ns; k++) {
-        js = binhead_multi[jcollection][jbin + s[k]];
-        for (j = js; j >= 0; j = bins[j]) {
-          if (j <= i) continue;
-
-          jtype = type[j];
-          if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-          delx = xtmp - x[j][0];
-          dely = ytmp - x[j][1];
-          delz = ztmp - x[j][2];
-          rsq = delx*delx + dely*dely + delz*delz;
-          radsum = radi + radius[j];
-          cutdistsq = (radsum+skin) * (radsum+skin);
-
-          if (rsq <= cutdistsq) {
-            jh = j;
-            if (history && rsq < radsum*radsum)
-              jh = jh ^ mask_history;
-
-            if (molecular != Atom::ATOMIC) {
-              if (!moltemplate)
-                which = find_special(special[i],nspecial[i],tag[j]);
-              else if (imol >= 0)
-                which = find_special(onemols[imol]->special[iatom],
-                                     onemols[imol]->nspecial[iatom],
-                                     tag[j]-tagprev);
-              else which = 0;
-              if (which == 0) neighptr[n++] = jh;
-              else if (domain->minimum_image_check(delx,dely,delz))
-                neighptr[n++] = jh;
-              else if (which > 0) neighptr[n++] = jh ^ (which << SBBITS);
-            } else neighptr[n++] = jh;
-          }
-        }
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-}
diff --git a/src/OPENMP/npair_half_size_multi_newtoff_omp.h b/src/OPENMP/npair_half_size_multi_newtoff_omp.h
deleted file mode 100644
index 2e58d9ea38..0000000000
--- a/src/OPENMP/npair_half_size_multi_newtoff_omp.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/size/multi/newtoff/omp,
-           NPairHalfSizeMultiNewtoffOmp,
-           NP_HALF | NP_SIZE | NP_MULTI | NP_NEWTOFF | NP_OMP | NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_SIZE_MULTI_NEWTOFF_OMP_H
-#define LMP_NPAIR_HALF_SIZE_MULTI_NEWTOFF_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfSizeMultiNewtoffOmp : public NPair {
- public:
-  NPairHalfSizeMultiNewtoffOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_half_size_multi_newton_omp.cpp b/src/OPENMP/npair_half_size_multi_newton_omp.cpp
deleted file mode 100644
index 4bc17f911c..0000000000
--- a/src/OPENMP/npair_half_size_multi_newton_omp.cpp
+++ /dev/null
@@ -1,225 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_size_multi_newton_omp.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neighbor.h"
-#include "neigh_list.h"
-#include "npair_omp.h"
-
-#include "omp_compat.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfSizeMultiNewtonOmp::NPairHalfSizeMultiNewtonOmp(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   size particles
-   binned neighbor list construction with full Newton's 3rd law
-   multi stencil is icollection-jcollection dependent
-   each owned atom i checks its own bin and other bins in Newton stencil
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfSizeMultiNewtonOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-  const int history = list->history;
-  const int mask_history = 1 << HISTBITS;
-
-  NPAIR_OMP_INIT;
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nlocal);
-
-  int i,j,jh,k,n,itype,jtype,icollection,jcollection,ibin,jbin,ns;
-  int which,imol,iatom;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  double radi,radsum,cutdistsq;
-  int *neighptr,*s;
-  int js;
-
-  // loop over each atom, storing neighbors
-
-  int *collection = neighbor->collection;
-  double **x = atom->x;
-  double *radius = atom->radius;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    itype = type[i];
-    icollection = collection[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    radi = radius[i];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    ibin = atom2bin[i];
-
-    // loop through stencils for all collections
-    for (jcollection = 0; jcollection < ncollections; jcollection++) {
-
-      // if same collection use own bin
-      if(icollection == jcollection) jbin = ibin;
-          else jbin = coord2bin(x[i], jcollection);
-
-      // if same size: uses half stencil so check central bin
-      if(cutcollectionsq[icollection][icollection] == cutcollectionsq[jcollection][jcollection]){
-
-        if(icollection == jcollection) js = bins[i];
-        else js = binhead_multi[jcollection][jbin];
-
-        // if same collection,
-        //   if j is owned atom, store it, since j is beyond i in linked list
-        //   if j is ghost, only store if j coords are "above and to the right" of i
-
-        // if different collections,
-        //   if j is owned atom, store it if j > i
-        //   if j is ghost, only store if j coords are "above and to the right" of i
-
-        for (j = js; j >= 0; j = bins[j]) {
-          if(icollection != jcollection && j < i) continue;
-
-              if (j >= nlocal) {
-                if (x[j][2] < ztmp) continue;
-                if (x[j][2] == ztmp) {
-                  if (x[j][1] < ytmp) continue;
-                  if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
-                }
-              }
-
-          jtype = type[j];
-          if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-          delx = xtmp - x[j][0];
-          dely = ytmp - x[j][1];
-          delz = ztmp - x[j][2];
-          rsq = delx*delx + dely*dely + delz*delz;
-          radsum = radi + radius[j];
-          cutdistsq = (radsum+skin) * (radsum+skin);
-
-          if (rsq <= cutdistsq) {
-            jh = j;
-            if (history && rsq < radsum*radsum)
-              jh = jh ^ mask_history;
-
-            if (molecular != Atom::ATOMIC) {
-              if (!moltemplate)
-                which = find_special(special[i],nspecial[i],tag[j]);
-              else if (imol >= 0)
-                which = find_special(onemols[imol]->special[iatom],
-                                     onemols[imol]->nspecial[iatom],
-                                     tag[j]-tagprev);
-              else which = 0;
-              if (which == 0) neighptr[n++] = jh;
-              else if (domain->minimum_image_check(delx,dely,delz))
-                neighptr[n++] = jh;
-              else if (which > 0) neighptr[n++] = jh ^ (which << SBBITS);
-            } else neighptr[n++] = jh;
-          }
-        }
-      }
-
-      // for all collections, loop over all atoms in other bins in stencil, store every pair
-      // stencil is empty if i larger than j
-      // stencil is half if i same size as j
-      // stencil is full if i smaller than j
-
-      s = stencil_multi[icollection][jcollection];
-      ns = nstencil_multi[icollection][jcollection];
-
-      for (k = 0; k < ns; k++) {
-        js = binhead_multi[jcollection][jbin + s[k]];
-        for (j = js; j >= 0; j = bins[j]) {
-
-          jtype = type[j];
-          if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-          delx = xtmp - x[j][0];
-          dely = ytmp - x[j][1];
-          delz = ztmp - x[j][2];
-          rsq = delx*delx + dely*dely + delz*delz;
-          radsum = radi + radius[j];
-          cutdistsq = (radsum+skin) * (radsum+skin);
-
-          if (rsq <= cutdistsq) {
-            if (history && rsq < radsum*radsum)
-              j = j ^ mask_history;
-
-            if (molecular != Atom::ATOMIC) {
-              if (!moltemplate)
-                which = find_special(special[i],nspecial[i],tag[j]);
-              else if (imol >= 0)
-                which = find_special(onemols[imol]->special[iatom],
-                                     onemols[imol]->nspecial[iatom],
-                                     tag[j]-tagprev);
-              else which = 0;
-              if (which == 0) neighptr[n++] = j;
-              else if (domain->minimum_image_check(delx,dely,delz))
-                neighptr[n++] = j;
-              else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-            } else neighptr[n++] = j;
-          }
-        }
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-}
diff --git a/src/OPENMP/npair_half_size_multi_newton_omp.h b/src/OPENMP/npair_half_size_multi_newton_omp.h
deleted file mode 100644
index 99f9174913..0000000000
--- a/src/OPENMP/npair_half_size_multi_newton_omp.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/size/multi/newton/omp,
-           NPairHalfSizeMultiNewtonOmp,
-           NP_HALF | NP_SIZE | NP_MULTI | NP_NEWTON | NP_OMP | NP_ORTHO);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_SIZE_MULTI_NEWTON_OMP_H
-#define LMP_NPAIR_HALF_SIZE_MULTI_NEWTON_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfSizeMultiNewtonOmp : public NPair {
- public:
-  NPairHalfSizeMultiNewtonOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_half_size_multi_newton_tri_omp.cpp b/src/OPENMP/npair_half_size_multi_newton_tri_omp.cpp
deleted file mode 100644
index 4765c918b7..0000000000
--- a/src/OPENMP/npair_half_size_multi_newton_tri_omp.cpp
+++ /dev/null
@@ -1,199 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_size_multi_newton_tri_omp.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "force.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neighbor.h"
-#include "neigh_list.h"
-#include "npair_omp.h"
-
-#include "omp_compat.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfSizeMultiNewtonTriOmp::NPairHalfSizeMultiNewtonTriOmp(LAMMPS *lmp) :
-  NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   size particles
-   binned neighbor list construction with Newton's 3rd law for triclinic
-   multi stencil is icollection-jcollection dependent
-   each owned atom i checks its own bin and other bins in triclinic stencil
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfSizeMultiNewtonTriOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-  const int history = list->history;
-  const int mask_history = 1 << HISTBITS;
-  const double delta = 0.01 * force->angstrom;
-
-  NPAIR_OMP_INIT;
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nlocal);
-
-  int i,j,jh,k,n,itype,jtype,icollection,jcollection,ibin,jbin,ns,js;
-  int which,imol,iatom;
-  tagint itag,jtag,tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  double radi,radsum,cutdistsq;
-  int *neighptr,*s;
-
-  int *collection = neighbor->collection;
-  double **x = atom->x;
-  double *radius = atom->radius;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    itag = tag[i];
-    itype = type[i];
-    icollection = collection[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    radi = radius[i];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    ibin = atom2bin[i];
-
-    // loop through stencils for all collections
-
-    for (jcollection = 0; jcollection < ncollections; jcollection++) {
-
-      // if same collection use own bin
-
-      if (icollection == jcollection) jbin = ibin;
-      else jbin = coord2bin(x[i], jcollection);
-
-      // loop over all atoms in bins in stencil
-      // stencil is empty if i larger than j
-      // stencil is half if i same size as j
-      // stencil is full if i smaller than j
-      // if half: pairs for atoms j "below" i are excluded
-      // below = lower z or (equal z and lower y) or (equal zy and lower x)
-      //         (equal zyx and j <= i)
-      // latter excludes self-self interaction but allows superposed atoms
-
-      s = stencil_multi[icollection][jcollection];
-      ns = nstencil_multi[icollection][jcollection];
-
-      for (k = 0; k < ns; k++) {
-        js = binhead_multi[jcollection][jbin + s[k]];
-        for (j = js; j >= 0; j = bins[j]) {
-
-          // if same size (same collection), exclude half of interactions
-
-          if (cutcollectionsq[icollection][icollection] ==
-              cutcollectionsq[jcollection][jcollection]) {
-            if (j <= i) continue;
-            if (j >= nlocal) {
-              jtag = tag[j];
-              if (itag > jtag) {
-                if ((itag+jtag) % 2 == 0) continue;
-              } else if (itag < jtag) {
-                if ((itag+jtag) % 2 == 1) continue;
-              } else {
-                if (fabs(x[j][2]-ztmp) > delta) {
-                  if (x[j][2] < ztmp) continue;
-                } else if (fabs(x[j][1]-ytmp) > delta) {
-                  if (x[j][1] < ytmp) continue;
-                } else {
-                  if (x[j][0] < xtmp) continue;
-                }
-              }
-            }
-          }
-
-          jtype = type[j];
-          if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-          delx = xtmp - x[j][0];
-          dely = ytmp - x[j][1];
-          delz = ztmp - x[j][2];
-          rsq = delx*delx + dely*dely + delz*delz;
-          radsum = radi + radius[j];
-          cutdistsq = (radsum+skin) * (radsum+skin);
-
-          if (rsq <= cutdistsq) {
-            jh = j;
-            if (history && rsq < radsum*radsum)
-              jh = jh ^ mask_history;
-
-            if (molecular != Atom::ATOMIC) {
-              if (!moltemplate)
-                which = find_special(special[i],nspecial[i],tag[j]);
-              else if (imol >= 0)
-                which = find_special(onemols[imol]->special[iatom],
-                                     onemols[imol]->nspecial[iatom],
-                                     tag[j]-tagprev);
-              else which = 0;
-              if (which == 0) neighptr[n++] = jh;
-              else if (domain->minimum_image_check(delx,dely,delz))
-                neighptr[n++] = jh;
-              else if (which > 0) neighptr[n++] = jh ^ (which << SBBITS);
-            } else neighptr[n++] = jh;
-          }
-        }
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-}
diff --git a/src/OPENMP/npair_half_size_multi_newton_tri_omp.h b/src/OPENMP/npair_half_size_multi_newton_tri_omp.h
deleted file mode 100644
index d4f6eacc27..0000000000
--- a/src/OPENMP/npair_half_size_multi_newton_tri_omp.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/size/multi/newton/tri/omp,
-           NPairHalfSizeMultiNewtonTriOmp,
-           NP_HALF | NP_SIZE | NP_MULTI | NP_NEWTON | NP_TRI | NP_OMP);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_SIZE_MULTI_NEWTON_TRI_OMP_H
-#define LMP_NPAIR_HALF_SIZE_MULTI_NEWTON_TRI_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfSizeMultiNewtonTriOmp : public NPair {
- public:
-  NPairHalfSizeMultiNewtonTriOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_half_size_multi_old_newtoff_omp.cpp b/src/OPENMP/npair_half_size_multi_old_newtoff_omp.cpp
deleted file mode 100644
index e4169482bc..0000000000
--- a/src/OPENMP/npair_half_size_multi_old_newtoff_omp.cpp
+++ /dev/null
@@ -1,158 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_size_multi_old_newtoff_omp.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neigh_list.h"
-#include "npair_omp.h"
-
-#include "omp_compat.h"
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfSizeMultiOldNewtoffOmp::NPairHalfSizeMultiOldNewtoffOmp(LAMMPS *lmp) :
-  NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   size particles
-   binned neighbor list construction with partial Newton's 3rd law
-   each owned atom i checks own bin and other bins in stencil
-   multi-type stencil is itype dependent and is distance checked
-   pair stored once if i,j are both owned and i < j
-   pair stored by me if j is ghost (also stored by proc owning j)
-------------------------------------------------------------------------- */
-
-void NPairHalfSizeMultiOldNewtoffOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-  const int history = list->history;
-  const int mask_history = 1 << HISTBITS;
-
-  NPAIR_OMP_INIT;
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nlocal);
-
-  int i,j,jh,k,n,itype,jtype,ibin,ns,which,imol,iatom;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  double radi,radsum,cutdistsq;
-  int *neighptr,*s;
-  double *cutsq,*distsq;
-
-  double **x = atom->x;
-  double *radius = atom->radius;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    radi = radius[i];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over all atoms in other bins in stencil including self
-    // only store pair if i < j
-    // skip if i,j neighbor cutoff is less than bin distance
-    // stores own/own pairs only once
-    // stores own/ghost pairs on both procs
-
-    ibin = atom2bin[i];
-    s = stencil_multi_old[itype];
-    distsq = distsq_multi_old[itype];
-    cutsq = cutneighsq[itype];
-    ns = nstencil_multi_old[itype];
-    for (k = 0; k < ns; k++) {
-      for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) {
-        if (j <= i) continue;
-        jtype = type[j];
-        if (cutsq[jtype] < distsq[k]) continue;
-
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-        radsum = radi + radius[j];
-        cutdistsq = (radsum+skin) * (radsum+skin);
-
-        if (rsq <= cutdistsq) {
-          jh = j;
-          if (history && rsq < radsum*radsum)
-            jh = jh ^ mask_history;
-
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = jh;
-            else if (domain->minimum_image_check(delx,dely,delz))
-              neighptr[n++] = jh;
-            else if (which > 0) neighptr[n++] = jh ^ (which << SBBITS);
-          } else neighptr[n++] = jh;
-        }
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-}
diff --git a/src/OPENMP/npair_half_size_multi_old_newtoff_omp.h b/src/OPENMP/npair_half_size_multi_old_newtoff_omp.h
deleted file mode 100644
index 05eb0a5269..0000000000
--- a/src/OPENMP/npair_half_size_multi_old_newtoff_omp.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/size/multi/old/newtoff/omp,
-           NPairHalfSizeMultiOldNewtoffOmp,
-           NP_HALF | NP_SIZE | NP_MULTI_OLD | NP_NEWTOFF | NP_OMP |
-           NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_SIZE_MULTI_OLD_NEWTOFF_OMP_H
-#define LMP_NPAIR_HALF_SIZE_MULTI_OLD_NEWTOFF_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfSizeMultiOldNewtoffOmp : public NPair {
- public:
-  NPairHalfSizeMultiOldNewtoffOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_half_size_multi_old_newton_omp.cpp b/src/OPENMP/npair_half_size_multi_old_newton_omp.cpp
deleted file mode 100644
index 7d6a3de871..0000000000
--- a/src/OPENMP/npair_half_size_multi_old_newton_omp.cpp
+++ /dev/null
@@ -1,196 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_size_multi_old_newton_omp.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neigh_list.h"
-#include "npair_omp.h"
-
-#include "omp_compat.h"
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfSizeMultiOldNewtonOmp::NPairHalfSizeMultiOldNewtonOmp(LAMMPS *lmp) :
-  NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   size particles
-   binned neighbor list construction with full Newton's 3rd law
-   each owned atom i checks its own bin and other bins in Newton stencil
-   multi-type stencil is itype dependent and is distance checked
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfSizeMultiOldNewtonOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-  const int history = list->history;
-  const int mask_history = 1 << HISTBITS;
-
-  NPAIR_OMP_INIT;
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nlocal);
-
-  int i,j,jh,k,n,itype,jtype,ibin,ns,which,imol,iatom;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  double radi,radsum,cutdistsq;
-  int *neighptr,*s;
-  double *cutsq,*distsq;
-
-  double **x = atom->x;
-  double *radius = atom->radius;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    radi = radius[i];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over rest of atoms in i's bin, ghosts are at end of linked list
-    // if j is owned atom, store it, since j is beyond i in linked list
-    // if j is ghost, only store if j coords are "above and to the right" of i
-
-    for (j = bins[i]; j >= 0; j = bins[j]) {
-      if (j >= nlocal) {
-        if (x[j][2] < ztmp) continue;
-        if (x[j][2] == ztmp) {
-          if (x[j][1] < ytmp) continue;
-          if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
-        }
-      }
-
-      jtype = type[j];
-      if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
-      radsum = radi + radius[j];
-      cutdistsq = (radsum+skin) * (radsum+skin);
-
-      if (rsq <= cutdistsq) {
-        jh = j;
-        if (history && rsq < radsum*radsum)
-          jh = jh ^ mask_history;
-
-        if (molecular != Atom::ATOMIC) {
-          if (!moltemplate)
-            which = find_special(special[i],nspecial[i],tag[j]);
-          else if (imol >= 0)
-            which = find_special(onemols[imol]->special[iatom],
-                                 onemols[imol]->nspecial[iatom],
-                                 tag[j]-tagprev);
-          else which = 0;
-          if (which == 0) neighptr[n++] = jh;
-          else if (domain->minimum_image_check(delx,dely,delz))
-            neighptr[n++] = jh;
-          else if (which > 0) neighptr[n++] = jh ^ (which << SBBITS);
-        } else neighptr[n++] = jh;
-      }
-    }
-
-    // loop over all atoms in other bins in stencil, store every pair
-    // skip if i,j neighbor cutoff is less than bin distance
-
-    ibin = atom2bin[i];
-    s = stencil_multi_old[itype];
-    distsq = distsq_multi_old[itype];
-    cutsq = cutneighsq[itype];
-    ns = nstencil_multi_old[itype];
-    for (k = 0; k < ns; k++) {
-      for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) {
-        jtype = type[j];
-        if (cutsq[jtype] < distsq[k]) continue;
-
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-        radsum = radi + radius[j];
-        cutdistsq = (radsum+skin) * (radsum+skin);
-
-        if (rsq <= cutdistsq) {
-          jh = j;
-          if (history && rsq < radsum*radsum)
-            jh = jh ^ mask_history;
-
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = jh;
-            else if (domain->minimum_image_check(delx,dely,delz))
-              neighptr[n++] = jh;
-            else if (which > 0) neighptr[n++] = jh ^ (which << SBBITS);
-          } else neighptr[n++] = jh;
-        }
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-}
diff --git a/src/OPENMP/npair_half_size_multi_old_newton_omp.h b/src/OPENMP/npair_half_size_multi_old_newton_omp.h
deleted file mode 100644
index 70ae082dbb..0000000000
--- a/src/OPENMP/npair_half_size_multi_old_newton_omp.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/size/multi/old/newton/omp,
-           NPairHalfSizeMultiOldNewtonOmp,
-           NP_HALF | NP_SIZE | NP_MULTI_OLD | NP_NEWTON | NP_OMP | NP_ORTHO);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_SIZE_MULTI_OLD_NEWTON_OMP_H
-#define LMP_NPAIR_HALF_SIZE_MULTI_OLD_NEWTON_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfSizeMultiOldNewtonOmp : public NPair {
- public:
-  NPairHalfSizeMultiOldNewtonOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_half_size_multi_old_newton_tri_omp.cpp b/src/OPENMP/npair_half_size_multi_old_newton_tri_omp.cpp
deleted file mode 100644
index 1c6d025fab..0000000000
--- a/src/OPENMP/npair_half_size_multi_old_newton_tri_omp.cpp
+++ /dev/null
@@ -1,177 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_size_multi_old_newton_tri_omp.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "force.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neigh_list.h"
-#include "npair_omp.h"
-
-#include "omp_compat.h"
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfSizeMultiOldNewtonTriOmp::NPairHalfSizeMultiOldNewtonTriOmp(LAMMPS *lmp) :
-  NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction with Newton's 3rd law for triclinic
-   each owned atom i checks its own bin and other bins in triclinic stencil
-   multi-type stencil is itype dependent and is distance checked
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfSizeMultiOldNewtonTriOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-  const int history = list->history;
-  const int mask_history = 1 << HISTBITS;
-  const double delta = 0.01 * force->angstrom;
-
-  NPAIR_OMP_INIT;
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nlocal);
-
-  int i,j,jh,k,n,itype,jtype,ibin,ns,which,imol,iatom;
-  tagint itag,jtag,tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  double radi,radsum,cutdistsq;
-  int *neighptr,*s;
-  double *cutsq,*distsq;
-
-  double **x = atom->x;
-  double *radius = atom->radius;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    itag = tag[i];
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    radi = radius[i];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over all atoms in bins in stencil
-    // for triclinic, bin stencil is full in all 3 dims
-    // must use itag/jtag to eliminate half the I/J interactions
-    // cannot use I/J exact coord comparision
-    //   b/c transforming orthog -> lambda -> orthog for ghost atoms
-    //   with an added PBC offset can shift all 3 coords by epsilon
-
-    ibin = atom2bin[i];
-    s = stencil_multi_old[itype];
-    distsq = distsq_multi_old[itype];
-    cutsq = cutneighsq[itype];
-    ns = nstencil_multi_old[itype];
-    for (k = 0; k < ns; k++) {
-      for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) {
-        jtype = type[j];
-        if (cutsq[jtype] < distsq[k]) continue;
-
-        if (j <= i) continue;
-        if (j >= nlocal) {
-          jtag = tag[j];
-          if (itag > jtag) {
-            if ((itag+jtag) % 2 == 0) continue;
-          } else if (itag < jtag) {
-            if ((itag+jtag) % 2 == 1) continue;
-          } else {
-            if (fabs(x[j][2]-ztmp) > delta) {
-              if (x[j][2] < ztmp) continue;
-            } else if (fabs(x[j][1]-ytmp) > delta) {
-              if (x[j][1] < ytmp) continue;
-            } else {
-              if (x[j][0] < xtmp) continue;
-            }
-          }
-        }
-
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-        radsum = radi + radius[j];
-        cutdistsq = (radsum+skin) * (radsum+skin);
-
-        if (rsq <= cutdistsq) {
-          jh = j;
-          if (history && rsq < radsum*radsum)
-            jh = jh ^ mask_history;
-
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = jh;
-            else if (domain->minimum_image_check(delx,dely,delz))
-              neighptr[n++] = jh;
-            else if (which > 0) neighptr[n++] = jh ^ (which << SBBITS);
-          } else neighptr[n++] = jh;
-        }
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-}
diff --git a/src/OPENMP/npair_half_size_multi_old_newton_tri_omp.h b/src/OPENMP/npair_half_size_multi_old_newton_tri_omp.h
deleted file mode 100644
index 3c1765b668..0000000000
--- a/src/OPENMP/npair_half_size_multi_old_newton_tri_omp.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/size/multi/old/newton/tri/omp,
-           NPairHalfSizeMultiOldNewtonTriOmp,
-           NP_HALF | NP_SIZE | NP_MULTI_OLD | NP_NEWTON | NP_TRI | NP_OMP);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_SIZE_MULTI_OLD_NEWTON_TRI_OMP_H
-#define LMP_NPAIR_HALF_SIZE_MULTI_OLD_NEWTON_TRI_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfSizeMultiOldNewtonTriOmp : public NPair {
- public:
-  NPairHalfSizeMultiOldNewtonTriOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_half_size_nsq_newtoff_omp.cpp b/src/OPENMP/npair_half_size_nsq_newtoff_omp.cpp
deleted file mode 100644
index 2982586a18..0000000000
--- a/src/OPENMP/npair_half_size_nsq_newtoff_omp.cpp
+++ /dev/null
@@ -1,147 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_size_nsq_newtoff_omp.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "molecule.h"
-#include "group.h"
-#include "my_page.h"
-#include "neigh_list.h"
-#include "npair_omp.h"
-
-#include "omp_compat.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfSizeNsqNewtoffOmp::NPairHalfSizeNsqNewtoffOmp(LAMMPS *lmp) :
-  NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   size particles
-   N^2 / 2 search for neighbor pairs with partial Newton's 3rd law
-   shear history must be accounted for when a neighbor pair is added
-   pair added to list if atoms i and j are both owned and i < j
-   pair added if j is ghost (also stored by proc owning j)
-------------------------------------------------------------------------- */
-
-void NPairHalfSizeNsqNewtoffOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-  const int history = list->history;
-  const int mask_history = 1 << HISTBITS;
-
-  NPAIR_OMP_INIT;
-
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nlocal);
-
-  int i,j,jh,n,which,imol,iatom;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  double radi,radsum,cutsq;
-  int *neighptr;
-
-  double **x = atom->x;
-  double *radius = atom->radius;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-
-  int nall = atom->nlocal + atom->nghost;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    radi = radius[i];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over remaining atoms, owned and ghost
-
-    for (j = i+1; j < nall; j++) {
-      if (includegroup && !(mask[j] & bitmask)) continue;
-      if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
-      radsum = radi + radius[j];
-      cutsq = (radsum+skin) * (radsum+skin);
-
-      if (rsq <= cutsq) {
-        jh = j;
-        if (history && rsq < radsum*radsum)
-          jh = jh ^ mask_history;
-
-        if (molecular != Atom::ATOMIC) {
-          if (!moltemplate)
-            which = find_special(special[i],nspecial[i],tag[j]);
-          else if (imol >= 0)
-            which = find_special(onemols[imol]->special[iatom],
-                                 onemols[imol]->nspecial[iatom],
-                                 tag[j]-tagprev);
-          else which = 0;
-          if (which == 0) neighptr[n++] = jh;
-          else if (domain->minimum_image_check(delx,dely,delz))
-            neighptr[n++] = jh;
-          else if (which > 0) neighptr[n++] = jh ^ (which << SBBITS);
-        } else neighptr[n++] = jh;
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-}
diff --git a/src/OPENMP/npair_half_size_nsq_newtoff_omp.h b/src/OPENMP/npair_half_size_nsq_newtoff_omp.h
deleted file mode 100644
index 93f038eed4..0000000000
--- a/src/OPENMP/npair_half_size_nsq_newtoff_omp.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/size/nsq/newtoff/omp,
-           NPairHalfSizeNsqNewtoffOmp,
-           NP_HALF | NP_SIZE | NP_NSQ | NP_NEWTOFF | NP_OMP |
-           NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_SIZE_NSQ_NEWTOFF_OMP_H
-#define LMP_NPAIR_HALF_SIZE_NSQ_NEWTOFF_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfSizeNsqNewtoffOmp : public NPair {
- public:
-  NPairHalfSizeNsqNewtoffOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_half_size_nsq_newton_omp.cpp b/src/OPENMP/npair_half_size_nsq_newton_omp.cpp
deleted file mode 100644
index 0628478c0b..0000000000
--- a/src/OPENMP/npair_half_size_nsq_newton_omp.cpp
+++ /dev/null
@@ -1,181 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_size_nsq_newton_omp.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "force.h"
-#include "molecule.h"
-#include "group.h"
-#include "my_page.h"
-#include "neigh_list.h"
-#include "npair_omp.h"
-
-#include "omp_compat.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfSizeNsqNewtonOmp::NPairHalfSizeNsqNewtonOmp(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   size particles
-   N^2 / 2 search for neighbor pairs with full Newton's 3rd law
-   pair added to list if atoms i and j are both owned and i < j
-   if j is ghost only me or other proc adds pair
-   decision based on itag,jtag tests
-------------------------------------------------------------------------- */
-
-void NPairHalfSizeNsqNewtonOmp::build(NeighList *list)
-{
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
-  const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0;
-  const int molecular = atom->molecular;
-  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
-  const int history = list->history;
-  const int mask_history = 1 << HISTBITS;
-  const double delta = 0.01 * force->angstrom;
-  const int triclinic = domain->triclinic;
-
-  NPAIR_OMP_INIT;
-
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(nlocal);
-
-  int i,j,jh,n,which,imol,iatom;
-  tagint itag,jtag,tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  double radi,radsum,cutsq;
-  int *neighptr;
-
-  double **x = atom->x;
-  double *radius = atom->radius;
-  tagint *tag = atom->tag;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-
-  int nall = atom->nlocal + atom->nghost;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  for (i = ifrom; i < ito; i++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    itag = tag[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    radi = radius[i];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over remaining atoms, owned and ghost
-    // use itag/jtap comparision to eliminate half the interactions
-    // itag = jtag is possible for long cutoffs that include images of self
-    // for triclinic, must use delta to eliminate half the I/J interactions
-    // cannot use I/J exact coord comparision as for orthog
-    //   b/c transforming orthog -> lambda -> orthog for ghost atoms
-    //   with an added PBC offset can shift all 3 coords by epsilon
-
-    for (j = i+1; j < nall; j++) {
-      if (includegroup && !(mask[j] & bitmask)) continue;
-
-      if (j >= nlocal) {
-        jtag = tag[j];
-        if (itag > jtag) {
-          if ((itag+jtag) % 2 == 0) continue;
-        } else if (itag < jtag) {
-          if ((itag+jtag) % 2 == 1) continue;
-        } else if (triclinic) {
-          if (fabs(x[j][2]-ztmp) > delta) {
-            if (x[j][2] < ztmp) continue;
-          } else if (fabs(x[j][1]-ytmp) > delta) {
-            if (x[j][1] < ytmp) continue;
-          } else {
-            if (x[j][0] < xtmp) continue;
-          }
-        } else {
-          if (x[j][2] < ztmp) continue;
-          if (x[j][2] == ztmp) {
-            if (x[j][1] < ytmp) continue;
-            if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
-          }
-        }
-      }
-
-      if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
-      radsum = radi + radius[j];
-      cutsq = (radsum+skin) * (radsum+skin);
-
-      if (rsq <= cutsq) {
-        jh = j;
-        if (history && rsq < radsum*radsum)
-          jh = jh ^ mask_history;
-
-        if (molecular != Atom::ATOMIC) {
-          if (!moltemplate)
-            which = find_special(special[i],nspecial[i],tag[j]);
-          else if (imol >= 0)
-            which = find_special(onemols[imol]->special[iatom],
-                                 onemols[imol]->nspecial[iatom],
-                                 tag[j]-tagprev);
-          else which = 0;
-          if (which == 0) neighptr[n++] = jh;
-          else if (domain->minimum_image_check(delx,dely,delz))
-            neighptr[n++] = jh;
-          else if (which > 0) neighptr[n++] = jh ^ (which << SBBITS);
-        } else neighptr[n++] = jh;
-      }
-    }
-
-    ilist[i] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = nlocal;
-}
diff --git a/src/OPENMP/npair_half_size_nsq_newton_omp.h b/src/OPENMP/npair_half_size_nsq_newton_omp.h
deleted file mode 100644
index 9a7e912007..0000000000
--- a/src/OPENMP/npair_half_size_nsq_newton_omp.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/size/nsq/newton/omp,
-           NPairHalfSizeNsqNewtonOmp,
-           NP_HALF | NP_SIZE | NP_NSQ | NP_NEWTON | NP_OMP |
-           NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_SIZE_NSQ_NEWTON_OMP_H
-#define LMP_NPAIR_HALF_SIZE_NSQ_NEWTON_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfSizeNsqNewtonOmp : public NPair {
- public:
-  NPairHalfSizeNsqNewtonOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_halffull_newtoff_omp.cpp b/src/OPENMP/npair_halffull_newtoff_omp.cpp
deleted file mode 100644
index 379ae149f8..0000000000
--- a/src/OPENMP/npair_halffull_newtoff_omp.cpp
+++ /dev/null
@@ -1,90 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_halffull_newtoff_omp.h"
-
-#include "error.h"
-#include "my_page.h"
-#include "neigh_list.h"
-#include "npair_omp.h"
-
-#include "omp_compat.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalffullNewtoffOmp::NPairHalffullNewtoffOmp(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   build half list from full list
-   pair stored once if i,j are both owned and i < j
-   pair stored by me if j is ghost (also stored by proc owning j)
-   works if full list is a skip list
-------------------------------------------------------------------------- */
-
-void NPairHalffullNewtoffOmp::build(NeighList *list)
-{
-  const int inum_full = list->listfull->inum;
-
-  NPAIR_OMP_INIT;
-
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(inum_full);
-
-  int i,j,ii,jj,n,jnum,joriginal;
-  int *neighptr,*jlist;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  int *ilist_full = list->listfull->ilist;
-  int *numneigh_full = list->listfull->numneigh;
-  int **firstneigh_full = list->listfull->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  // loop over atoms in full list
-
-  for (ii = ifrom; ii < ito; ii++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    // loop over parent full list
-
-    i = ilist_full[ii];
-    jlist = firstneigh_full[i];
-    jnum = numneigh_full[i];
-
-    for (jj = 0; jj < jnum; jj++) {
-      joriginal = jlist[jj];
-      j = joriginal & NEIGHMASK;
-      if (j > i) neighptr[n++] = joriginal;
-    }
-
-    ilist[ii] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = inum_full;
-}
diff --git a/src/OPENMP/npair_halffull_newtoff_omp.h b/src/OPENMP/npair_halffull_newtoff_omp.h
deleted file mode 100644
index f0ff24dcf9..0000000000
--- a/src/OPENMP/npair_halffull_newtoff_omp.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(halffull/newtoff/omp,
-           NPairHalffullNewtoffOmp,
-           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF |
-           NP_ORTHO | NP_TRI |NP_OMP);
-
-NPairStyle(halffull/newtoff/skip/omp,
-           NPairHalffullNewtoffOmp,
-           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF |
-           NP_ORTHO | NP_TRI | NP_SKIP | NP_OMP);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALFFULL_NEWTOFF_OMP_H
-#define LMP_NPAIR_HALFFULL_NEWTOFF_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalffullNewtoffOmp : public NPair {
- public:
-  NPairHalffullNewtoffOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_halffull_newton_omp.cpp b/src/OPENMP/npair_halffull_newton_omp.cpp
deleted file mode 100644
index e833ab3095..0000000000
--- a/src/OPENMP/npair_halffull_newton_omp.cpp
+++ /dev/null
@@ -1,120 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_halffull_newton_omp.h"
-
-#include "atom.h"
-#include "domain.h"
-#include "error.h"
-#include "force.h"
-#include "my_page.h"
-#include "neigh_list.h"
-#include "npair_omp.h"
-
-#include "omp_compat.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalffullNewtonOmp::NPairHalffullNewtonOmp(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   build half list from full list
-   pair stored once if i,j are both owned and i < j
-   if j is ghost, only store if j coords are "above and to the right" of i
-   works if full list is a skip list
-------------------------------------------------------------------------- */
-
-void NPairHalffullNewtonOmp::build(NeighList *list)
-{
-  const int inum_full = list->listfull->inum;
-  const double delta = 0.01 * force->angstrom;
-  const int triclinic = domain->triclinic;
-
-  NPAIR_OMP_INIT;
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(inum_full);
-
-  int i,j,ii,jj,n,jnum,joriginal;
-  int *neighptr,*jlist;
-  double xtmp,ytmp,ztmp;
-
-  double **x = atom->x;
-  int nlocal = atom->nlocal;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  int *ilist_full = list->listfull->ilist;
-  int *numneigh_full = list->listfull->numneigh;
-  int **firstneigh_full = list->listfull->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  // loop over parent full list
-
-  for (ii = ifrom; ii < ito; ii++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    i = ilist_full[ii];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-
-    // loop over full neighbor list
-
-    jlist = firstneigh_full[i];
-    jnum = numneigh_full[i];
-
-    for (jj = 0; jj < jnum; jj++) {
-      joriginal = jlist[jj];
-      j = joriginal & NEIGHMASK;
-
-      if (j < nlocal) {
-        if (i > j) continue;
-      } else if (triclinic) {
-        if (fabs(x[j][2]-ztmp) > delta) {
-          if (x[j][2] < ztmp) continue;
-        } else if (fabs(x[j][1]-ytmp) > delta) {
-          if (x[j][1] < ytmp) continue;
-        } else {
-          if (x[j][0] < xtmp) continue;
-        }
-      } else {
-        if (x[j][2] < ztmp) continue;
-        if (x[j][2] == ztmp) {
-          if (x[j][1] < ytmp) continue;
-          if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
-        }
-      }
-      neighptr[n++] = joriginal;
-    }
-
-    ilist[ii] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = inum_full;
-}
diff --git a/src/OPENMP/npair_halffull_newton_omp.h b/src/OPENMP/npair_halffull_newton_omp.h
deleted file mode 100644
index 3ce9fd9ebe..0000000000
--- a/src/OPENMP/npair_halffull_newton_omp.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(halffull/newton/omp,
-           NPairHalffullNewtonOmp,
-           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
-           NP_ORTHO | NP_TRI| NP_OMP);
-
-NPairStyle(halffull/newton/skip/omp,
-           NPairHalffullNewtonOmp,
-           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
-           NP_ORTHO | NP_TRI | NP_SKIP | NP_OMP);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALFFULL_NEWTON_OMP_H
-#define LMP_NPAIR_HALFFULL_NEWTON_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalffullNewtonOmp : public NPair {
- public:
-  NPairHalffullNewtonOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_halffull_omp.cpp b/src/OPENMP/npair_halffull_omp.cpp
new file mode 100644
index 0000000000..bc646accbc
--- /dev/null
+++ b/src/OPENMP/npair_halffull_omp.cpp
@@ -0,0 +1,165 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "npair_halffull_omp.h"
+#include "npair_omp.h"
+#include "omp_compat.h"
+
+#include "atom.h"
+#include "error.h"
+#include "force.h"
+#include "my_page.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+template<int NEWTON, int TRI, int TRIM>
+NPairHalffullOmp<NEWTON, TRI, TRIM>::NPairHalffullOmp(LAMMPS *lmp) : NPair(lmp) {}
+
+/* ----------------------------------------------------------------------
+   build half list from full list
+   pair stored once if i,j are both owned and i < j
+   works if full list is a skip list
+
+   Newtoff:
+     pair stored by me if j is ghost (also stored by proc owning j)
+     works for owned (non-ghost) list, also for ghost list
+     if ghost, also store neighbors of ghost atoms & set inum,gnum correctly
+   Newton:
+     if j is ghost, only store if j coords are "above and to the right" of i
+     use i < j < nlocal to eliminate half the local/local interactions
+   Newton + Triclinic:
+     must use delta to eliminate half the local/ghost interactions
+     cannot use I/J exact coord comparision as for orthog
+       b/c transforming orthog -> lambda -> orthog for ghost atoms
+       with an added PBC offset can shift all 3 coords by epsilon
+------------------------------------------------------------------------- */
+
+template<int NEWTON, int TRI, int TRIM>
+void NPairHalffullOmp<NEWTON, TRI, TRIM>::build(NeighList *list)
+{
+  const int inum_full = list->listfull->inum;
+  const double delta = 0.01 * force->angstrom;
+
+  NPAIR_OMP_INIT;
+
+#if defined(_OPENMP)
+#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
+#endif
+  NPAIR_OMP_SETUP(inum_full);
+
+  int i, j, ii, jj, n, jnum, joriginal;
+  int *neighptr, *jlist;
+  double xtmp, ytmp, ztmp, delx, dely, delz, rsq;
+
+  double **x = atom->x;
+  int nlocal = atom->nlocal;
+
+  int *ilist = list->ilist;
+  int *numneigh = list->numneigh;
+  int **firstneigh = list->firstneigh;
+  int *ilist_full = list->listfull->ilist;
+  int *numneigh_full = list->listfull->numneigh;
+  int **firstneigh_full = list->listfull->firstneigh;
+
+  // each thread has its own page allocator
+  MyPage<int> &ipage = list->ipage[tid];
+  ipage.reset();
+
+  double cutsq_custom = cutoff_custom * cutoff_custom;
+
+  // loop over atoms in full list
+
+  for (ii = ifrom; ii < ito; ii++) {
+
+    n = 0;
+    neighptr = ipage.vget();
+
+    // loop over parent full list
+
+    i = ilist_full[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+
+    jlist = firstneigh_full[i];
+    jnum = numneigh_full[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      joriginal = jlist[jj];
+      j = joriginal & NEIGHMASK;
+
+      if (NEWTON) {
+        if (j < nlocal) {
+          if (i > j) continue;
+        } else if (TRI) {
+          if (fabs(x[j][2] - ztmp) > delta) {
+            if (x[j][2] < ztmp) continue;
+          } else if (fabs(x[j][1] - ytmp) > delta) {
+            if (x[j][1] < ytmp) continue;
+          } else {
+            if (x[j][0] < xtmp) continue;
+          }
+        } else {
+          if (x[j][2] < ztmp) continue;
+          if (x[j][2] == ztmp) {
+            if (x[j][1] < ytmp) continue;
+            if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
+          }
+        }
+
+        if (TRIM) {
+          delx = xtmp - x[j][0];
+          dely = ytmp - x[j][1];
+          delz = ztmp - x[j][2];
+          rsq = delx * delx + dely * dely + delz * delz;
+
+          if (rsq > cutsq_custom) continue;
+        }
+        neighptr[n++] = joriginal;
+      } else {
+        if (j > i) {
+          if (TRIM) {
+            delx = xtmp - x[j][0];
+            dely = ytmp - x[j][1];
+            delz = ztmp - x[j][2];
+            rsq = delx * delx + dely * dely + delz * delz;
+
+            if (rsq > cutsq_custom) continue;
+          }
+          neighptr[n++] = joriginal;
+        }
+      }
+    }
+
+    ilist[ii] = i;
+    firstneigh[i] = neighptr;
+    numneigh[i] = n;
+    ipage.vgot(n);
+    if (ipage.status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
+  }
+  NPAIR_OMP_CLOSE;
+  list->inum = inum_full;
+}
+
+namespace LAMMPS_NS {
+template class NPairHalffullOmp<0,0,0>;
+template class NPairHalffullOmp<1,0,0>;
+template class NPairHalffullOmp<1,1,0>;
+template class NPairHalffullOmp<0,0,1>;
+template class NPairHalffullOmp<1,0,1>;
+template class NPairHalffullOmp<1,1,1>;
+}
diff --git a/src/OPENMP/npair_halffull_omp.h b/src/OPENMP/npair_halffull_omp.h
new file mode 100644
index 0000000000..5c0c943566
--- /dev/null
+++ b/src/OPENMP/npair_halffull_omp.h
@@ -0,0 +1,131 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef NPAIR_CLASS
+// clang-format off
+typedef NPairHalffullOmp<0, 0, 0> NPairHalffullNewtoffOmp;
+NPairStyle(halffull/newtoff/omp,
+           NPairHalffullNewtoffOmp,
+           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF |
+           NP_ORTHO | NP_TRI | NP_OMP);
+
+typedef NPairHalffullOmp<0, 0, 0> NPairHalffullNewtoffOmp;
+NPairStyle(halffull/newtoff/skip/omp,
+           NPairHalffullNewtoffOmp,
+           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF |
+           NP_ORTHO | NP_TRI | NP_SKIP | NP_OMP);
+
+typedef NPairHalffullOmp<0, 0, 0> NPairHalffullNewtoffOmp;
+NPairStyle(halffull/newtoff/ghost/omp,
+           NPairHalffullNewtoffOmp,
+           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF |
+           NP_ORTHO | NP_TRI | NP_GHOST | NP_OMP);
+
+typedef NPairHalffullOmp<0, 0, 0> NPairHalffullNewtoffOmp;
+NPairStyle(halffull/newtoff/skip/ghost/omp,
+           NPairHalffullNewtoffOmp,
+           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF |
+           NP_ORTHO | NP_TRI | NP_SKIP | NP_GHOST | NP_OMP);
+
+typedef NPairHalffullOmp<1, 0, 0> NPairHalffullNewtonOmp;
+NPairStyle(halffull/newton/omp,
+           NPairHalffullNewtonOmp,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
+           NP_ORTHO | NP_OMP);
+
+typedef NPairHalffullOmp<1, 1, 0> NPairHalffullNewtonTriOmp;
+NPairStyle(halffull/newton/tri/omp,
+           NPairHalffullNewtonTriOmp,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
+           NP_TRI | NP_OMP);
+
+typedef NPairHalffullOmp<1, 0, 0> NPairHalffullNewtonOmp;
+NPairStyle(halffull/newton/skip/omp,
+           NPairHalffullNewtonOmp,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
+           NP_ORTHO | NP_SKIP | NP_OMP);
+
+typedef NPairHalffullOmp<1, 1, 0> NPairHalffullNewtonTriOmp;
+NPairStyle(halffull/newton/tri/skip/omp,
+           NPairHalffullNewtonTriOmp,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
+           NP_TRI | NP_SKIP | NP_OMP);
+
+typedef NPairHalffullOmp<0, 0, 1> NPairHalffullTrimNewtoffOmp;
+NPairStyle(halffull/trim/newtoff/omp,
+           NPairHalffullTrimNewtoffOmp,
+           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF |
+           NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP);
+
+typedef NPairHalffullOmp<0, 0, 1> NPairHalffullTrimNewtoffOmp;
+NPairStyle(halffull/trim/newtoff/skip/omp,
+           NPairHalffullTrimNewtoffOmp,
+           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF |
+           NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_OMP);
+
+typedef NPairHalffullOmp<0, 0, 1> NPairHalffullTrimNewtoffOmp;
+NPairStyle(halffull/trim/newtoff/ghost/omp,
+           NPairHalffullTrimNewtoffOmp,
+           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF |
+           NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM | NP_OMP);
+
+typedef NPairHalffullOmp<0, 0, 1> NPairHalffullTrimNewtoffOmp;
+NPairStyle(halffull/trim/newtoff/skip/ghost/omp,
+           NPairHalffullTrimNewtoffOmp,
+           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF |
+           NP_ORTHO | NP_TRI | NP_SKIP | NP_GHOST | NP_TRIM | NP_OMP);
+
+typedef NPairHalffullOmp<1, 0, 1> NPairHalffullTrimNewtonOmp;
+NPairStyle(halffull/trim/newton/omp,
+           NPairHalffullTrimNewtonOmp,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
+           NP_ORTHO | NP_TRIM | NP_OMP);
+
+typedef NPairHalffullOmp<1, 1, 1> NPairHalffullTrimNewtonTriOmp;
+NPairStyle(halffull/trim/newton/tri/omp,
+           NPairHalffullTrimNewtonTriOmp,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
+           NP_TRI | NP_TRIM | NP_OMP);
+
+typedef NPairHalffullOmp<1, 0, 1> NPairHalffullTrimNewtonOmp;
+NPairStyle(halffull/trim/newton/skip/omp,
+           NPairHalffullTrimNewtonOmp,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
+           NP_ORTHO | NP_SKIP | NP_TRIM | NP_OMP);
+
+typedef NPairHalffullOmp<1, 1, 1> NPairHalffullTrimNewtonTriOmp;
+NPairStyle(halffull/trim/newton/tri/skip/omp,
+           NPairHalffullTrimNewtonTriOmp,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
+           NP_TRI | NP_SKIP | NP_TRIM | NP_OMP);
+// clang-format on
+#else
+
+#ifndef LMP_NPAIR_HALFFULL_OMP_H
+#define LMP_NPAIR_HALFFULL_OMP_H
+
+#include "npair.h"
+
+namespace LAMMPS_NS {
+
+template<int NEWTON, int TRI, int TRIM>
+class NPairHalffullOmp : public NPair {
+ public:
+  NPairHalffullOmp(class LAMMPS *);
+  void build(class NeighList *) override;
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/OPENMP/npair_halffull_trim_newtoff_omp.cpp b/src/OPENMP/npair_halffull_trim_newtoff_omp.cpp
deleted file mode 100644
index d0c5c1ab86..0000000000
--- a/src/OPENMP/npair_halffull_trim_newtoff_omp.cpp
+++ /dev/null
@@ -1,111 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_halffull_trim_newtoff_omp.h"
-
-#include "atom.h"
-#include "error.h"
-#include "my_page.h"
-#include "neigh_list.h"
-#include "npair_omp.h"
-
-#include "omp_compat.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalffullTrimNewtoffOmp::NPairHalffullTrimNewtoffOmp(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   build half list from full list and trim to shorter cutoff
-   pair stored once if i,j are both owned and i < j
-   pair stored by me if j is ghost (also stored by proc owning j)
-   works if full list is a skip list
-------------------------------------------------------------------------- */
-
-void NPairHalffullTrimNewtoffOmp::build(NeighList *list)
-{
-  const int inum_full = list->listfull->inum;
-
-  NPAIR_OMP_INIT;
-
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(inum_full);
-
-  int i,j,ii,jj,n,jnum,joriginal;
-  int *neighptr,*jlist;
-  double xtmp,ytmp,ztmp;
-  double delx,dely,delz,rsq;
-
-  double **x = atom->x;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  int *ilist_full = list->listfull->ilist;
-  int *numneigh_full = list->listfull->numneigh;
-  int **firstneigh_full = list->listfull->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  double cutsq_custom = cutoff_custom * cutoff_custom;
-
-  // loop over atoms in full list
-
-  for (ii = ifrom; ii < ito; ii++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    // loop over parent full list
-
-    i = ilist_full[ii];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-
-    jlist = firstneigh_full[i];
-    jnum = numneigh_full[i];
-
-    for (jj = 0; jj < jnum; jj++) {
-      joriginal = jlist[jj];
-      j = joriginal & NEIGHMASK;
-
-      // trim to shorter cutoff
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx * delx + dely * dely + delz * delz;
-
-      if (rsq > cutsq_custom) continue;
-
-      if (j > i) neighptr[n++] = joriginal;
-    }
-
-    ilist[ii] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = inum_full;
-}
diff --git a/src/OPENMP/npair_halffull_trim_newtoff_omp.h b/src/OPENMP/npair_halffull_trim_newtoff_omp.h
deleted file mode 100644
index c86c132b69..0000000000
--- a/src/OPENMP/npair_halffull_trim_newtoff_omp.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(halffull/trim/newtoff/omp,
-           NPairHalffullTrimNewtoffOmp,
-           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF |
-           NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP);
-
-NPairStyle(halffull/trim/newtoff/skip/omp,
-           NPairHalffullTrimNewtoffOmp,
-           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF |
-           NP_ORTHO | NP_TRI | NP_TRIM | NP_SKIP | NP_OMP);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALFFULL_TRIM_NEWTOFF_OMP_H
-#define LMP_NPAIR_HALFFULL_TRIM_NEWTOFF_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalffullTrimNewtoffOmp : public NPair {
- public:
-  NPairHalffullTrimNewtoffOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_halffull_trim_newton_omp.cpp b/src/OPENMP/npair_halffull_trim_newton_omp.cpp
deleted file mode 100644
index bd9d553eb9..0000000000
--- a/src/OPENMP/npair_halffull_trim_newton_omp.cpp
+++ /dev/null
@@ -1,133 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_halffull_trim_newton_omp.h"
-
-#include "atom.h"
-#include "domain.h"
-#include "error.h"
-#include "force.h"
-#include "my_page.h"
-#include "neigh_list.h"
-#include "npair_omp.h"
-
-#include "omp_compat.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalffullTrimNewtonOmp::NPairHalffullTrimNewtonOmp(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   build half list from full list and trim to shorter cutoff
-   pair stored once if i,j are both owned and i < j
-   if j is ghost, only store if j coords are "above and to the right" of i
-   works if full list is a skip list
-------------------------------------------------------------------------- */
-
-void NPairHalffullTrimNewtonOmp::build(NeighList *list)
-{
-  const int inum_full = list->listfull->inum;
-  const double delta = 0.01 * force->angstrom;
-  const int triclinic = domain->triclinic;
-
-  NPAIR_OMP_INIT;
-#if defined(_OPENMP)
-#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
-#endif
-  NPAIR_OMP_SETUP(inum_full);
-
-  int i,j,ii,jj,n,jnum,joriginal;
-  int *neighptr,*jlist;
-  double xtmp,ytmp,ztmp;
-  double delx,dely,delz,rsq;
-
-  double **x = atom->x;
-  int nlocal = atom->nlocal;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  int *ilist_full = list->listfull->ilist;
-  int *numneigh_full = list->listfull->numneigh;
-  int **firstneigh_full = list->listfull->firstneigh;
-
-  // each thread has its own page allocator
-  MyPage<int> &ipage = list->ipage[tid];
-  ipage.reset();
-
-  double cutsq_custom = cutoff_custom * cutoff_custom;
-
-  // loop over parent full list
-
-  for (ii = ifrom; ii < ito; ii++) {
-
-    n = 0;
-    neighptr = ipage.vget();
-
-    i = ilist_full[ii];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-
-    // loop over full neighbor list
-
-    jlist = firstneigh_full[i];
-    jnum = numneigh_full[i];
-
-    for (jj = 0; jj < jnum; jj++) {
-      joriginal = jlist[jj];
-      j = joriginal & NEIGHMASK;
-
-      if (j < nlocal) {
-        if (i > j) continue;
-      } else if (triclinic) {
-        if (fabs(x[j][2]-ztmp) > delta) {
-          if (x[j][2] < ztmp) continue;
-        } else if (fabs(x[j][1]-ytmp) > delta) {
-          if (x[j][1] < ytmp) continue;
-        } else {
-          if (x[j][0] < xtmp) continue;
-        }
-      } else {
-        if (x[j][2] < ztmp) continue;
-        if (x[j][2] == ztmp) {
-          if (x[j][1] < ytmp) continue;
-          if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
-        }
-      }
-
-      // trim to shorter cutoff
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx * delx + dely * dely + delz * delz;
-
-      if (rsq > cutsq_custom) continue;
-
-      neighptr[n++] = joriginal;
-    }
-
-    ilist[ii] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-  NPAIR_OMP_CLOSE;
-  list->inum = inum_full;
-}
diff --git a/src/OPENMP/npair_halffull_trim_newton_omp.h b/src/OPENMP/npair_halffull_trim_newton_omp.h
deleted file mode 100644
index c6950dfa45..0000000000
--- a/src/OPENMP/npair_halffull_trim_newton_omp.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(halffull/trim/newton/omp,
-           NPairHalffullTrimNewtonOmp,
-           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
-           NP_ORTHO | NP_TRI| NP_TRIM | NP_OMP);
-
-NPairStyle(halffull/trim/newton/skip/omp,
-           NPairHalffullTrimNewtonOmp,
-           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
-           NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_OMP);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALFFULL_TRIM_NEWTON_OMP_H
-#define LMP_NPAIR_HALFFULL_TRIM_NEWTON_OMP_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalffullTrimNewtonOmp : public NPair {
- public:
-  NPairHalffullTrimNewtonOmp(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/OPENMP/npair_multi_old_omp.cpp b/src/OPENMP/npair_multi_old_omp.cpp
new file mode 100644
index 0000000000..d45f2d1f5f
--- /dev/null
+++ b/src/OPENMP/npair_multi_old_omp.cpp
@@ -0,0 +1,262 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "npair_multi_old_omp.h"
+#include "npair_omp.h"
+#include "omp_compat.h"
+
+#include "atom.h"
+#include "atom_vec.h"
+#include "domain.h"
+#include "error.h"
+#include "force.h"
+#include "molecule.h"
+#include "my_page.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+template<int HALF, int NEWTON, int TRI, int SIZE>
+NPairMultiOldOmp<HALF, NEWTON, TRI, SIZE>::NPairMultiOldOmp(LAMMPS *lmp) : NPair(lmp) {}
+
+/* ----------------------------------------------------------------------
+  multi/old-type stencil is itype dependent and is distance checked
+  Full:
+    binned neighbor list construction for all neighbors
+    multi-type stencil is itype dependent and is distance checked
+    every neighbor pair appears in list of both atoms i and j
+  Half + newtoff:
+    binned neighbor list construction with partial Newton's 3rd law
+    each owned atom i checks own bin and other bins in stencil
+    multi-type stencil is itype dependent and is distance checked
+    pair stored once if i,j are both owned and i < j
+    pair stored by me if j is ghost (also stored by proc owning j)
+  Half + newton:
+    binned neighbor list construction with full Newton's 3rd law
+    each owned atom i checks its own bin and other bins in Newton stencil
+    multi-type stencil is itype dependent and is distance checked
+    every pair stored exactly once by some processor
+------------------------------------------------------------------------- */
+
+template<int HALF, int NEWTON, int TRI, int SIZE>
+void NPairMultiOldOmp<HALF, NEWTON, TRI, SIZE>::build(NeighList *list)
+{
+  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
+  const int molecular = atom->molecular;
+  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
+  const double delta = 0.01 * force->angstrom;
+
+  NPAIR_OMP_INIT;
+#if defined(_OPENMP)
+#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
+#endif
+  NPAIR_OMP_SETUP(nlocal);
+
+  int i, j, jh, k, n, itype, jtype, ibin, bin_start, which, ns, imol, iatom;
+  tagint itag, jtag, tagprev;
+  double xtmp, ytmp, ztmp, delx, dely, delz, rsq, radsum, cut, cutsq;
+  int *neighptr, *s;
+  double *cutnsq, *distsq;
+
+  // loop over each atom, storing neighbors
+
+  double **x = atom->x;
+  double *radius = atom->radius;
+  int *type = atom->type;
+  int *mask = atom->mask;
+  tagint *tag = atom->tag;
+  tagint *molecule = atom->molecule;
+  tagint **special = atom->special;
+  int **nspecial = atom->nspecial;
+
+  int *molindex = atom->molindex;
+  int *molatom = atom->molatom;
+  Molecule **onemols = atom->avec->onemols;
+
+  int history = list->history;
+  int mask_history = 1 << HISTBITS;
+
+  int *ilist = list->ilist;
+  int *numneigh = list->numneigh;
+  int **firstneigh = list->firstneigh;
+
+  // each thread has its own page allocator
+  MyPage<int> &ipage = list->ipage[tid];
+  ipage.reset();
+
+  for (i = ifrom; i < ito; i++) {
+
+    n = 0;
+    neighptr = ipage.vget();
+
+    itag = tag[i];
+    itype = type[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    if (moltemplate) {
+      imol = molindex[i];
+      iatom = molatom[i];
+      tagprev = tag[i] - iatom - 1;
+    }
+
+    ibin = atom2bin[i];
+    s = stencil_multi_old[itype];
+    distsq = distsq_multi_old[itype];
+    cutnsq = cutneighsq[itype];
+    ns = nstencil_multi_old[itype];
+    for (k = 0; k < ns; k++) {
+      bin_start = binhead[ibin+s[k]];
+      if (k == 0) {
+        if (HALF && NEWTON && (!TRI)) {
+          // Half neighbor list, newton on, orthonormal
+          // loop over rest of atoms in i's bin, ghosts are at end of linked list
+          bin_start = bins[i];
+        }
+      }
+
+      for (j = bin_start; j >= 0; j = bins[j]) {
+        if (!HALF) {
+          // Full neighbor list
+          // only skip i = j
+          if (i == j) continue;
+        } else if (!NEWTON) {
+          // Half neighbor list, newton off
+          // only store pair if i < j
+          // stores own/own pairs only once
+          // stores own/ghost pairs on both procs
+          if (j <= i) continue;
+        } else if (TRI) {
+          // Half neighbor list, newton on, triclinic
+          // for triclinic, bin stencil is full in all 3 dims
+          // must use itag/jtag to eliminate half the I/J interactions
+          // cannot use I/J exact coord comparision
+          //   b/c transforming orthog -> lambda -> orthog for ghost atoms
+          //   with an added PBC offset can shift all 3 coords by epsilon
+          if (j <= i) continue;
+          if (j >= nlocal) {
+            jtag = tag[j];
+            if (itag > jtag) {
+              if ((itag + jtag) % 2 == 0) continue;
+            } else if (itag < jtag) {
+              if ((itag + jtag) % 2 == 1) continue;
+            } else {
+              if (fabs(x[j][2] - ztmp) > delta) {
+                if (x[j][2] < ztmp) continue;
+              } else if (fabs(x[j][1] - ytmp) > delta) {
+                if (x[j][1] < ytmp) continue;
+              } else {
+                if (x[j][0] < xtmp) continue;
+              }
+            }
+          }
+        } else {
+          // Half neighbor list, newton on, orthonormal
+          // store every pair for every bin in stencil,except for i's bin
+
+          if (k == 0) {
+            // if j is owned atom, store it, since j is beyond i in linked list
+            // if j is ghost, only store if j coords are "above and to the "right" of i
+            if (j >= nlocal) {
+              if (x[j][2] < ztmp) continue;
+              if (x[j][2] == ztmp) {
+                if (x[j][1] < ytmp) continue;
+                if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
+              }
+            }
+          }
+        }
+
+        jtype = type[j];
+        if (cutnsq[jtype] < distsq[k]) continue;
+
+        if (exclude && exclusion(i, j, itype, jtype, mask, molecule)) continue;
+
+        delx = xtmp - x[j][0];
+        dely = ytmp - x[j][1];
+        delz = ztmp - x[j][2];
+        rsq = delx * delx + dely * dely + delz * delz;
+
+        if (SIZE) {
+          radsum = radius[i] + radius[j];
+          cut = radsum + skin;
+          cutsq = cut * cut;
+
+          if (rsq <= cutsq) {
+            jh = j;
+            if (history && rsq < radsum * radsum)
+              jh = jh ^ mask_history;
+
+            if (molecular != Atom::ATOMIC) {
+              if (!moltemplate)
+                which = find_special(special[i], nspecial[i], tag[j]);
+              else if (imol >= 0)
+                which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom],
+                                     tag[j] - tagprev);
+              else
+                which = 0;
+              if (which == 0)
+                neighptr[n++] = jh;
+              else if (domain->minimum_image_check(delx, dely, delz))
+                neighptr[n++] = jh;
+              else if (which > 0)
+                neighptr[n++] = jh ^ (which << SBBITS);
+            } else
+              neighptr[n++] = jh;
+          }
+        } else {
+          if (rsq <= cutneighsq[itype][jtype]) {
+            if (molecular != Atom::ATOMIC) {
+              if (!moltemplate)
+                which = find_special(special[i], nspecial[i], tag[j]);
+              else if (imol >= 0)
+                which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom],
+                                     tag[j] - tagprev);
+              else
+                which = 0;
+              if (which == 0)
+                neighptr[n++] = j;
+              else if (domain->minimum_image_check(delx, dely, delz))
+                neighptr[n++] = j;
+              else if (which > 0)
+                neighptr[n++] = j ^ (which << SBBITS);
+            } else
+              neighptr[n++] = j;
+          }
+        }
+      }
+    }
+
+    ilist[i] = i;
+    firstneigh[i] = neighptr;
+    numneigh[i] = n;
+    ipage.vgot(n);
+    if (ipage.status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
+  }
+  NPAIR_OMP_CLOSE;
+  list->inum = nlocal;
+  list->gnum = 0;
+}
+
+namespace LAMMPS_NS {
+template class NPairMultiOldOmp<0,1,0,0>;
+template class NPairMultiOldOmp<1,0,0,0>;
+template class NPairMultiOldOmp<1,1,0,0>;
+template class NPairMultiOldOmp<1,1,1,0>;
+template class NPairMultiOldOmp<0,1,0,1>;
+template class NPairMultiOldOmp<1,0,0,1>;
+template class NPairMultiOldOmp<1,1,0,1>;
+template class NPairMultiOldOmp<1,1,1,1>;
+}
diff --git a/src/OPENMP/npair_multi_old_omp.h b/src/OPENMP/npair_multi_old_omp.h
new file mode 100644
index 0000000000..4251c6ed48
--- /dev/null
+++ b/src/OPENMP/npair_multi_old_omp.h
@@ -0,0 +1,77 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef NPAIR_CLASS
+// clang-format off
+typedef NPairMultiOldOmp<0, 1, 0, 0> NPairFullMultiOldOmp;
+NPairStyle(full/multi/old/omp,
+           NPairFullMultiOldOmp,
+           NP_FULL | NP_MULTI_OLD | NP_OMP |
+           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairMultiOldOmp<1, 0, 0, 0> NPairHalfMultiOldNewtoffOmp;
+NPairStyle(half/multi/old/newtoff/omp,
+           NPairHalfMultiOldNewtoffOmp,
+           NP_HALF | NP_MULTI_OLD | NP_OMP | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairMultiOldOmp<1, 1, 0, 0> NPairHalfMultiOldNewtonOmp;
+NPairStyle(half/multi/old/newton/omp,
+           NPairHalfMultiOldNewtonOmp,
+           NP_HALF | NP_MULTI_OLD | NP_OMP | NP_NEWTON | NP_ORTHO);
+
+typedef NPairMultiOldOmp<1, 1, 1, 0> NPairHalfMultiOldNewtonTriOmp;
+NPairStyle(half/multi/old/newton/tri/omp,
+           NPairHalfMultiOldNewtonTriOmp,
+           NP_HALF | NP_MULTI_OLD | NP_OMP | NP_NEWTON | NP_TRI);
+
+typedef NPairMultiOldOmp<0, 1, 0, 1> NPairFullSizeMultiOldOmp;
+NPairStyle(full/size/multi/old/omp,
+           NPairFullSizeMultiOldOmp,
+           NP_FULL | NP_SIZE | NP_MULTI_OLD | NP_OMP |
+           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairMultiOldOmp<1, 0, 0, 1> NPairHalfSizeMultiOldNewtoffOmp;
+NPairStyle(half/size/multi/old/newtoff/omp,
+           NPairHalfSizeMultiOldNewtoffOmp,
+           NP_HALF | NP_SIZE | NP_MULTI_OLD | NP_OMP | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairMultiOldOmp<1, 1, 0, 1> NPairHalfSizeMultiOldNewtonOmp;
+NPairStyle(half/size/multi/old/newton/omp,
+           NPairHalfSizeMultiOldNewtonOmp,
+           NP_HALF | NP_SIZE | NP_MULTI_OLD | NP_OMP | NP_NEWTON | NP_ORTHO);
+
+typedef NPairMultiOldOmp<1, 1, 1, 1> NPairHalfSizeMultiOldNewtonTriOmp;
+NPairStyle(half/size/multi/old/newton/tri/omp,
+           NPairHalfSizeMultiOldNewtonTriOmp,
+           NP_HALF | NP_SIZE | NP_MULTI_OLD | NP_OMP | NP_NEWTON | NP_TRI);
+// clang-format on
+#else
+
+#ifndef LMP_NPAIR_MULTI_OLD_OMP_H
+#define LMP_NPAIR_MULTI_OLD_OMP_H
+
+#include "npair.h"
+
+namespace LAMMPS_NS {
+
+template<int HALF, int NEWTON, int TRI, int SIZE>
+class NPairMultiOldOmp : public NPair {
+ public:
+  NPairMultiOldOmp(class LAMMPS *);
+  void build(class NeighList *) override;
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/OPENMP/npair_multi_omp.cpp b/src/OPENMP/npair_multi_omp.cpp
new file mode 100644
index 0000000000..3f8604572c
--- /dev/null
+++ b/src/OPENMP/npair_multi_omp.cpp
@@ -0,0 +1,304 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "npair_multi_omp.h"
+#include "npair_omp.h"
+#include "omp_compat.h"
+
+#include "atom.h"
+#include "atom_vec.h"
+#include "domain.h"
+#include "error.h"
+#include "force.h"
+#include "molecule.h"
+#include "my_page.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+template<int HALF, int NEWTON, int TRI, int SIZE, int ATOMONLY>
+NPairMultiOmp<HALF, NEWTON, TRI, SIZE, ATOMONLY>::NPairMultiOmp(LAMMPS *lmp) : NPair(lmp) {}
+
+/* ----------------------------------------------------------------------
+   multi stencil is icollection-jcollection dependent
+   Full:
+     binned neighbor list construction for all neighbors
+     every neighbor pair appears in list of both atoms i and j
+   Half + Newtoff:
+     binned neighbor list construction with partial Newton's 3rd law
+     each owned atom i checks own bin and other bins in stencil
+     pair stored once if i,j are both owned and i < j
+     pair stored by me if j is ghost (also stored by proc owning j)
+   Half + Newton:
+     binned neighbor list construction with full Newton's 3rd law
+     each owned atom i checks its own bin and other bins in Newton stencil
+     every pair stored exactly once by some processor
+------------------------------------------------------------------------- */
+
+template<int HALF, int NEWTON, int TRI, int SIZE, int ATOMONLY>
+void NPairMultiOmp<HALF, NEWTON, TRI, SIZE, ATOMONLY>::build(NeighList *list)
+{
+  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
+  const int molecular = atom->molecular;
+  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
+  const double delta = 0.01 * force->angstrom;
+
+  NPAIR_OMP_INIT;
+#if defined(_OPENMP)
+#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
+#endif
+  NPAIR_OMP_SETUP(nlocal);
+
+  int i, j, jh, js, k, n, itype, jtype, ibin, jbin, icollection, jcollection, which, ns, imol, iatom;
+  tagint itag, jtag, tagprev;
+  double xtmp, ytmp, ztmp, delx, dely, delz, rsq, radsum, cut, cutsq;
+  int *neighptr, *s;
+
+  int *collection = neighbor->collection;
+  double **x = atom->x;
+  double *radius = atom->radius;
+  int *type = atom->type;
+  int *mask = atom->mask;
+  tagint *tag = atom->tag;
+  tagint *molecule = atom->molecule;
+  tagint **special = atom->special;
+  int **nspecial = atom->nspecial;
+
+  int *molindex = atom->molindex;
+  int *molatom = atom->molatom;
+  Molecule **onemols = atom->avec->onemols;
+
+  int history = list->history;
+  int mask_history = 1 << HISTBITS;
+
+  int *ilist = list->ilist;
+  int *numneigh = list->numneigh;
+  int **firstneigh = list->firstneigh;
+
+  // each thread has its own page allocator
+  MyPage<int> &ipage = list->ipage[tid];
+  ipage.reset();
+
+  for (i = ifrom; i < ito; i++) {
+
+    n = 0;
+    neighptr = ipage.vget();
+
+    itag = tag[i];
+    itype = type[i];
+    icollection = collection[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    if (!ATOMONLY) {
+      if (moltemplate) {
+        imol = molindex[i];
+        iatom = molatom[i];
+        tagprev = tag[i] - iatom - 1;
+      }
+    }
+
+    ibin = atom2bin[i];
+
+    // loop through stencils for all collections
+
+    for (jcollection = 0; jcollection < ncollections; jcollection++) {
+
+      // Use own bin for same collection
+      if (icollection == jcollection) jbin = ibin;
+      else jbin = coord2bin(x[i], jcollection);
+
+      s = stencil_multi[icollection][jcollection];
+      ns = nstencil_multi[icollection][jcollection];
+
+      for (k = 0; k < ns; k++) {
+        js = binhead_multi[jcollection][jbin + s[k]];
+
+        // For half-newton-ortho, first check self bin (k == 0, always half)
+        // if checking its own binlist, skip all before i in linked list
+        if (HALF && NEWTON && !TRI)
+          if ((k == 0) && (icollection == jcollection)) js = bins[i];
+
+        for (j = js; j >= 0; j = bins[j]) {
+          if (!HALF) {
+            // Full neighbor list, only uses full stencils
+            // only skip i = j
+            if (i == j) continue;
+          } else if (!NEWTON) {
+            // Half neighbor list, newton off, only uses full stencils
+            // only store pair if i < j
+            // stores own/own pairs only once
+            // stores own/ghost pairs on both procs
+            if (j <= i) continue;
+          } else if (TRI) {
+            // Half neighbor list, newton on, triclinic, only uses full stencils
+            // If different sizes -> full stencil (accept all, one-way search)
+            // If same size -> half stencil, exclude half of interactions
+            //     stencil is empty if i larger than j
+            //     stencil is full if i smaller than j
+            //     stencil is full if i same size as j
+            //   for i smaller than j:
+            //     must use itag/jtag to eliminate half the I/J interactions
+            //     cannot use I/J exact coord comparision
+            //       b/c transforming orthog -> lambda -> orthog for ghost atoms
+            //   with an added PBC offset can shift all 3 coords by epsilon
+
+            if (flag_same_multi[icollection][jcollection]) {
+              if (j <= i) continue;
+              if (j >= nlocal) {
+                jtag = tag[j];
+                if (itag > jtag) {
+                  if ((itag + jtag) % 2 == 0) continue;
+                } else if (itag < jtag) {
+                  if ((itag + jtag) % 2 == 1) continue;
+                } else {
+                  if (fabs(x[j][2] - ztmp) > delta) {
+                    if (x[j][2] < ztmp) continue;
+                  } else if (fabs(x[j][1] - ytmp) > delta) {
+                    if (x[j][1] < ytmp) continue;
+                  } else {
+                    if (x[j][0] < xtmp) continue;
+                  }
+                }
+              }
+            }
+          } else {
+            // Half neighbor list, newton on, orthonormal, uses a mix of stencils
+            // If different sizes -> full stencil (accept all, one-way search)
+            // If same size -> half stencil (first includes a self bin search)
+            if (k == 0 && flag_same_multi[icollection][jcollection]) {
+              // if same collection,
+              //   if j is owned atom, store it, since j is beyond i in linked list
+              //   if j is ghost, only store if j coords are "above and to the right" of i
+
+              // if different collections,
+              //   if j is owned atom, store it if j > i
+              //   if j is ghost, only store if j coords are "above and to the right" of i
+
+              if ((icollection != jcollection) && (j < i)) continue;
+
+              if (j >= nlocal) {
+                if (x[j][2] < ztmp) continue;
+                if (x[j][2] == ztmp) {
+                  if (x[j][1] < ytmp) continue;
+                  if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
+                }
+              }
+            }
+          }
+
+          jtype = type[j];
+          if (exclude && exclusion(i, j, itype, jtype, mask, molecule)) continue;
+
+          delx = xtmp - x[j][0];
+          dely = ytmp - x[j][1];
+          delz = ztmp - x[j][2];
+          rsq = delx * delx + dely * dely + delz * delz;
+
+          if (SIZE) {
+            radsum = radius[i] + radius[j];
+            cut = radsum + skin;
+            cutsq = cut * cut;
+
+            if (ATOMONLY) {
+              if (rsq <= cutsq) {
+                jh = j;
+                if (history && rsq < (radsum * radsum))
+                  jh = jh ^ mask_history;
+                neighptr[n++] = jh;
+              }
+            } else {
+              if (rsq <= cutsq) {
+                jh = j;
+                if (history && rsq < (radsum * radsum))
+                  jh = jh ^ mask_history;
+
+                if (molecular != Atom::ATOMIC) {
+                  if (!moltemplate)
+                    which = find_special(special[i], nspecial[i], tag[j]);
+                  else if (imol >= 0)
+                    which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom],
+                                         tag[j] - tagprev);
+                  else
+                    which = 0;
+                  if (which == 0)
+                    neighptr[n++] = jh;
+                  else if (domain->minimum_image_check(delx, dely, delz))
+                    neighptr[n++] = jh;
+                  else if (which > 0)
+                    neighptr[n++] = jh ^ (which << SBBITS);
+                } else
+                  neighptr[n++] = jh;
+              }
+            }
+          } else {
+            if (ATOMONLY) {
+              if (rsq <= cutneighsq[itype][jtype]) neighptr[n++] = j;
+            } else {
+              if (rsq <= cutneighsq[itype][jtype]) {
+                if (molecular != Atom::ATOMIC) {
+                  if (!moltemplate)
+                    which = find_special(special[i], nspecial[i], tag[j]);
+                  else if (imol >= 0)
+                    which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom],
+                                         tag[j] - tagprev);
+                  else
+                    which = 0;
+                  if (which == 0)
+                    neighptr[n++] = j;
+                  else if (domain->minimum_image_check(delx, dely, delz))
+                    neighptr[n++] = j;
+                  else if (which > 0)
+                    neighptr[n++] = j ^ (which << SBBITS);
+                } else
+                  neighptr[n++] = j;
+              }
+            }
+          }
+        }
+      }
+    }
+
+    ilist[i] = i;
+    firstneigh[i] = neighptr;
+    numneigh[i] = n;
+    ipage.vgot(n);
+    if (ipage.status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
+  }
+  NPAIR_OMP_CLOSE;
+  list->inum = nlocal;
+  list->gnum = 0;
+}
+
+namespace LAMMPS_NS {
+template class NPairMultiOmp<0,1,0,0,0>;
+template class NPairMultiOmp<1,0,0,0,0>;
+template class NPairMultiOmp<1,1,0,0,0>;
+template class NPairMultiOmp<1,1,1,0,0>;
+template class NPairMultiOmp<0,1,0,1,0>;
+template class NPairMultiOmp<1,0,0,1,0>;
+template class NPairMultiOmp<1,1,0,1,0>;
+template class NPairMultiOmp<1,1,1,1,0>;
+template class NPairMultiOmp<0,1,0,0,1>;
+template class NPairMultiOmp<1,0,0,0,1>;
+template class NPairMultiOmp<1,1,0,0,1>;
+template class NPairMultiOmp<1,1,1,0,1>;
+template class NPairMultiOmp<0,1,0,1,1>;
+template class NPairMultiOmp<1,0,0,1,1>;
+template class NPairMultiOmp<1,1,0,1,1>;
+template class NPairMultiOmp<1,1,1,1,1>;
+}
diff --git a/src/OPENMP/npair_multi_omp.h b/src/OPENMP/npair_multi_omp.h
new file mode 100644
index 0000000000..bcb01c87cf
--- /dev/null
+++ b/src/OPENMP/npair_multi_omp.h
@@ -0,0 +1,115 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef NPAIR_CLASS
+// clang-format off
+typedef NPairMultiOmp<0, 1, 0, 0, 0> NPairFullMultiOmp;
+NPairStyle(full/multi/omp,
+           NPairFullMultiOmp,
+           NP_FULL | NP_MULTI | NP_MOLONLY | NP_OMP | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairMultiOmp<1, 0, 0, 0, 0> NPairHalfMultiNewtoffOmp;
+NPairStyle(half/multi/newtoff/omp,
+           NPairHalfMultiNewtoffOmp,
+           NP_HALF | NP_MULTI | NP_MOLONLY | NP_OMP | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairMultiOmp<1, 1, 0, 0, 0> NPairHalfMultiNewtonOmp;
+NPairStyle(half/multi/newton/omp,
+           NPairHalfMultiNewtonOmp,
+           NP_HALF | NP_MULTI | NP_MOLONLY | NP_OMP | NP_NEWTON | NP_ORTHO);
+
+typedef NPairMultiOmp<1, 1, 1, 0, 0> NPairHalfMultiNewtonTriOmp;
+NPairStyle(half/multi/newton/tri/omp,
+           NPairHalfMultiNewtonTriOmp,
+           NP_HALF | NP_MULTI | NP_MOLONLY | NP_OMP | NP_NEWTON | NP_TRI);
+
+typedef NPairMultiOmp<0, 1, 0, 1, 0> NPairFullSizeMultiOmp;
+NPairStyle(full/size/multi/omp,
+           NPairFullSizeMultiOmp,
+           NP_FULL | NP_SIZE | NP_MULTI | NP_MOLONLY | NP_OMP | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairMultiOmp<1, 0, 0, 1, 0> NPairHalfSizeMultiNewtoffOmp;
+NPairStyle(half/size/multi/newtoff/omp,
+           NPairHalfSizeMultiNewtoffOmp,
+           NP_HALF | NP_SIZE | NP_MULTI | NP_MOLONLY | NP_OMP | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairMultiOmp<1, 1, 0, 1, 0> NPairHalfSizeMultiNewtonOmp;
+NPairStyle(half/size/multi/newton/omp,
+           NPairHalfSizeMultiNewtonOmp,
+           NP_HALF | NP_SIZE | NP_MULTI | NP_MOLONLY | NP_OMP | NP_NEWTON | NP_ORTHO);
+
+typedef NPairMultiOmp<1, 1, 1, 1, 0> NPairHalfSizeMultiNewtonTriOmp;
+NPairStyle(half/size/multi/newton/tri/omp,
+           NPairHalfSizeMultiNewtonTriOmp,
+           NP_HALF | NP_SIZE | NP_MULTI | NP_MOLONLY | NP_OMP | NP_NEWTON | NP_TRI);
+
+typedef NPairMultiOmp<0, 1, 0, 0, 1> NPairFullMultiAtomonlyOmp;
+NPairStyle(full/multi/atomonly/omp,
+           NPairFullMultiAtomonlyOmp,
+           NP_FULL | NP_MULTI | NP_ATOMONLY | NP_OMP | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairMultiOmp<1, 0, 0, 0, 1> NPairHalfMultiAtomonlyNewtoffOmp;
+NPairStyle(half/multi/atomonly/newtoff/omp,
+           NPairHalfMultiAtomonlyNewtoffOmp,
+           NP_HALF | NP_MULTI | NP_ATOMONLY | NP_OMP | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairMultiOmp<1, 1, 0, 0, 1> NPairHalfMultiAtomonlyNewtonOmp;
+NPairStyle(half/multi/atomonly/newton/omp,
+           NPairHalfMultiAtomonlyNewtonOmp,
+           NP_HALF | NP_MULTI | NP_ATOMONLY | NP_OMP | NP_NEWTON | NP_ORTHO);
+
+typedef NPairMultiOmp<1, 1, 1, 0, 1> NPairHalfMultiAtomonlyNewtonTriOmp;
+NPairStyle(half/multi/atomonly/newton/tri/omp,
+           NPairHalfMultiAtomonlyNewtonTriOmp,
+           NP_HALF | NP_MULTI | NP_ATOMONLY | NP_OMP | NP_NEWTON | NP_TRI);
+
+typedef NPairMultiOmp<0, 1, 0, 1, 1> NPairFullSizeMultiAtomonlyOmp;
+NPairStyle(full/size/multi/atomonly/omp,
+           NPairFullSizeMultiAtomonlyOmp,
+           NP_FULL | NP_SIZE | NP_MULTI | NP_ATOMONLY | NP_OMP | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairMultiOmp<1, 0, 0, 1, 1> NPairHalfSizeMultiAtomonlyNewtoffOmp;
+NPairStyle(half/size/multi/atomonly/newtoff/omp,
+           NPairHalfSizeMultiAtomonlyNewtoffOmp,
+           NP_HALF | NP_SIZE | NP_MULTI | NP_ATOMONLY | NP_OMP | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairMultiOmp<1, 1, 0, 1, 1> NPairHalfSizeMultiAtomonlyNewtonOmp;
+NPairStyle(half/size/multi/atomonly/newton/omp,
+           NPairHalfSizeMultiAtomonlyNewtonOmp,
+           NP_HALF | NP_SIZE | NP_MULTI | NP_ATOMONLY | NP_OMP | NP_NEWTON | NP_ORTHO);
+
+typedef NPairMultiOmp<1, 1, 1, 1, 1> NPairHalfSizeMultiAtomonlyNewtonTriOmp;
+NPairStyle(half/size/multi/atomonly/newton/tri/omp,
+           NPairHalfSizeMultiAtomonlyNewtonTriOmp,
+           NP_HALF | NP_SIZE | NP_MULTI | NP_ATOMONLY | NP_OMP | NP_NEWTON | NP_TRI);
+// clang-format on
+#else
+
+#ifndef LMP_NPAIR_MULTI_OMP_H
+#define LMP_NPAIR_MULTI_OMP_H
+
+#include "npair.h"
+
+namespace LAMMPS_NS {
+
+template<int HALF, int NEWTON, int TRI, int SIZE, int ATOMONLY>
+class NPairMultiOmp : public NPair {
+ public:
+  NPairMultiOmp(class LAMMPS *);
+  void build(class NeighList *) override;
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/OPENMP/npair_full_multi_old_omp.cpp b/src/OPENMP/npair_nsq_ghost_omp.cpp
similarity index 61%
rename from src/OPENMP/npair_full_multi_old_omp.cpp
rename to src/OPENMP/npair_nsq_ghost_omp.cpp
index f0ed6360ab..a270fbb84d 100644
--- a/src/OPENMP/npair_full_multi_old_omp.cpp
+++ b/src/OPENMP/npair_nsq_ghost_omp.cpp
@@ -1,3 +1,4 @@
+// clang-format off
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    https://www.lammps.org/, Sandia National Laboratories
@@ -11,32 +12,44 @@
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
-#include "npair_full_multi_old_omp.h"
+
+#include "npair_nsq_ghost_omp.h"
+#include "npair_omp.h"
+#include "omp_compat.h"
+
 #include "atom.h"
-#include "atom_vec.h"
 #include "domain.h"
 #include "error.h"
+#include "atom_vec.h"
 #include "molecule.h"
 #include "my_page.h"
 #include "neigh_list.h"
-#include "npair_omp.h"
-#include "omp_compat.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-NPairFullMultiOldOmp::NPairFullMultiOldOmp(LAMMPS *lmp) : NPair(lmp) {}
+template<int HALF>
+NPairNsqGhostOmp<HALF>::NPairNsqGhostOmp(LAMMPS *lmp) : NPair(lmp) {}
 
 /* ----------------------------------------------------------------------
-   binned neighbor list construction for all neighbors
-   multi-type stencil is itype dependent and is distance checked
-   every neighbor pair appears in list of both atoms i and j
+   Full:
+     N^2 search for all neighbors
+     include neighbors of ghost atoms, but no "special neighbors" for ghosts
+     every neighbor pair appears in list of both atoms i and j
+   Half + Newtoff:
+     N^2 / 2 search for neighbor pairs with partial Newton's 3rd law
+     include neighbors of ghost atoms, but no "special neighbors" for ghosts
+     pair stored once if i,j are both owned and i < j
+     pair stored by me if i owned and j ghost (also stored by proc owning j)
+     pair stored once if i,j are both ghost and i < j
 ------------------------------------------------------------------------- */
 
-void NPairFullMultiOldOmp::build(NeighList *list)
+template<int HALF>
+void NPairNsqGhostOmp<HALF>::build(NeighList *list)
 {
-  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
+  const int nlocal = atom->nlocal;
+  const int nall = nlocal + atom->nghost;
   const int molecular = atom->molecular;
   const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
 
@@ -44,15 +57,12 @@ void NPairFullMultiOldOmp::build(NeighList *list)
 #if defined(_OPENMP)
 #pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
 #endif
-  NPAIR_OMP_SETUP(nlocal);
+  NPAIR_OMP_SETUP(nall);
 
-  int i, j, k, n, itype, jtype, ibin, which, ns, imol, iatom;
+  int i, j, jstart, n, itype, jtype, which, imol, iatom;
   tagint tagprev;
   double xtmp, ytmp, ztmp, delx, dely, delz, rsq;
-  int *neighptr, *s;
-  double *cutsq, *distsq;
-
-  // loop over each atom, storing neighbors
+  int *neighptr;
 
   double **x = atom->x;
   int *type = atom->type;
@@ -74,6 +84,8 @@ void NPairFullMultiOldOmp::build(NeighList *list)
   MyPage<int> &ipage = list->ipage[tid];
   ipage.reset();
 
+  // loop over owned & ghost atoms, storing neighbors
+
   for (i = ifrom; i < ito; i++) {
 
     n = 0;
@@ -89,28 +101,32 @@ void NPairFullMultiOldOmp::build(NeighList *list)
       tagprev = tag[i] - iatom - 1;
     }
 
-    // loop over all atoms in other bins in stencil, including self
-    // skip if i,j neighbor cutoff is less than bin distance
-    // skip i = j
+    // loop over all atoms, owned and ghost
+    // Full:
+    //   skip i = j
+    // Half:
+    //   only store pair if i < j
+    //   stores own/own pairs only once
+    //   stores own/ghost pairs with owned atom only, on both procs
+    //   stores ghost/ghost pairs only once
+    // no molecular test when i = ghost atom
+
+    if (HALF) jstart = i + 1;
+    else jstart = 0;
+
+    if (i < nlocal) {
+      for (j = jstart; j < nall; j++) {
+        if (!HALF) {
+          if (i == j) continue;
+        }
 
-    ibin = atom2bin[i];
-    s = stencil_multi_old[itype];
-    distsq = distsq_multi_old[itype];
-    cutsq = cutneighsq[itype];
-    ns = nstencil_multi_old[itype];
-    for (k = 0; k < ns; k++) {
-      for (j = binhead[ibin + s[k]]; j >= 0; j = bins[j]) {
         jtype = type[j];
-        if (cutsq[jtype] < distsq[k]) continue;
-        if (i == j) continue;
-
         if (exclude && exclusion(i, j, itype, jtype, mask, molecule)) continue;
 
         delx = xtmp - x[j][0];
         dely = ytmp - x[j][1];
         delz = ztmp - x[j][2];
         rsq = delx * delx + dely * dely + delz * delz;
-
         if (rsq <= cutneighsq[itype][jtype]) {
           if (molecular != Atom::ATOMIC) {
             if (!moltemplate)
@@ -130,6 +146,26 @@ void NPairFullMultiOldOmp::build(NeighList *list)
             neighptr[n++] = j;
         }
       }
+    } else {
+      for (j = jstart; j < nall; j++) {
+        if (!HALF) {
+          if (i == j) continue;
+        }
+
+        jtype = type[j];
+        if (exclude && exclusion(i, j, itype, jtype, mask, molecule)) continue;
+
+        delx = xtmp - x[j][0];
+        dely = ytmp - x[j][1];
+        delz = ztmp - x[j][2];
+        rsq = delx * delx + dely * dely + delz * delz;
+
+        if (HALF) {
+          if (rsq <= cutneighsq[itype][jtype]) neighptr[n++] = j;
+        } else {
+          if (rsq <= cutneighghostsq[itype][jtype]) neighptr[n++] = j;
+        }
+      }
     }
 
     ilist[i] = i;
@@ -140,5 +176,10 @@ void NPairFullMultiOldOmp::build(NeighList *list)
   }
   NPAIR_OMP_CLOSE;
   list->inum = nlocal;
-  list->gnum = 0;
+  list->gnum = nall - nlocal;
+}
+
+namespace LAMMPS_NS {
+template class NPairNsqGhostOmp<0>;
+template class NPairNsqGhostOmp<1>;
 }
diff --git a/src/OPENMP/npair_half_nsq_newtoff_ghost_omp.h b/src/OPENMP/npair_nsq_ghost_omp.h
similarity index 64%
rename from src/OPENMP/npair_half_nsq_newtoff_ghost_omp.h
rename to src/OPENMP/npair_nsq_ghost_omp.h
index 4df15104c3..1d33758bac 100644
--- a/src/OPENMP/npair_half_nsq_newtoff_ghost_omp.h
+++ b/src/OPENMP/npair_nsq_ghost_omp.h
@@ -13,23 +13,29 @@
 
 #ifdef NPAIR_CLASS
 // clang-format off
+typedef NPairNsqGhostOmp<0> NPairFullNsqGhostOmp;
+NPairStyle(full/nsq/ghost/omp,
+           NPairFullNsqGhostOmp,
+           NP_FULL | NP_NSQ | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_OMP | NP_ORTHO | NP_TRI);
+
+typedef NPairNsqGhostOmp<1> NPairHalfNsqNewtoffGhostOmp;
 NPairStyle(half/nsq/newtoff/ghost/omp,
            NPairHalfNsqNewtoffGhostOmp,
-           NP_HALF | NP_NSQ | NP_NEWTOFF | NP_GHOST | NP_OMP |
-           NP_ORTHO | NP_TRI);
+           NP_HALF | NP_NSQ | NP_NEWTOFF | NP_GHOST | NP_OMP | NP_ORTHO | NP_TRI);
 // clang-format on
 #else
 
-#ifndef LMP_NPAIR_HALF_NSQ_NEWTOFF_GHOST_OMP_H
-#define LMP_NPAIR_HALF_NSQ_NEWTOFF_GHOST_OMP_H
+#ifndef LMP_NPAIR_NSQ_GHOST_OMP_H
+#define LMP_NPAIR_NSQ_GHOST_OMP_H
 
 #include "npair.h"
 
 namespace LAMMPS_NS {
 
-class NPairHalfNsqNewtoffGhostOmp : public NPair {
+template<int HALF>
+class NPairNsqGhostOmp : public NPair {
  public:
-  NPairHalfNsqNewtoffGhostOmp(class LAMMPS *);
+  NPairNsqGhostOmp(class LAMMPS *);
   void build(class NeighList *) override;
 };
 
diff --git a/src/OPENMP/npair_nsq_omp.cpp b/src/OPENMP/npair_nsq_omp.cpp
new file mode 100644
index 0000000000..c482fc8f2d
--- /dev/null
+++ b/src/OPENMP/npair_nsq_omp.cpp
@@ -0,0 +1,238 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "npair_nsq_omp.h"
+#include "npair_omp.h"
+#include "omp_compat.h"
+
+#include "atom.h"
+#include "atom_vec.h"
+#include "domain.h"
+#include "error.h"
+#include "force.h"
+#include "group.h"
+#include "molecule.h"
+#include "my_page.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+
+using namespace LAMMPS_NS;
+using namespace NeighConst;
+
+/* ---------------------------------------------------------------------- */
+
+template<int HALF, int NEWTON, int TRI, int SIZE>
+NPairNsqOmp<HALF, NEWTON, TRI, SIZE>::NPairNsqOmp(LAMMPS *lmp) : NPair(lmp) {}
+
+/* ----------------------------------------------------------------------
+   Full:
+     N^2 search for all neighbors
+     every neighbor pair appears in list of both atoms i and j
+   Half + Newtoff:
+     N^2 / 2 search for neighbor pairs with partial Newton's 3rd law
+     pair stored once if i,j are both owned and i < j
+     pair stored by me if j is ghost (also stored by proc owning j)
+   Half + Newton:
+     N^2 / 2 search for neighbor pairs with full Newton's 3rd law
+     every pair stored exactly once by some processor
+     decision on ghost atoms based on itag,jtag tests
+   Half + Newton + Tri:
+     use itag/jtap comparision to eliminate half the interactions
+     for triclinic, must use delta to eliminate half the I/J interactions
+     cannot use I/J exact coord comparision as for orthog
+     b/c transforming orthog -> lambda -> orthog for ghost atoms
+     with an added PBC offset can shift all 3 coords by epsilon
+------------------------------------------------------------------------- */
+
+template<int HALF, int NEWTON, int TRI, int SIZE>
+void NPairNsqOmp<HALF, NEWTON, TRI, SIZE>::build(NeighList *list)
+{
+  const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
+  const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0;
+  const int molecular = atom->molecular;
+  const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
+  const double delta = 0.01 * force->angstrom;
+
+  NPAIR_OMP_INIT;
+#if defined(_OPENMP)
+#pragma omp parallel LMP_DEFAULT_NONE LMP_SHARED(list)
+#endif
+  NPAIR_OMP_SETUP(nlocal);
+
+  int i, j, jh, jstart, n, itype, jtype, which, imol, iatom;
+  tagint itag, jtag, tagprev;
+  double xtmp, ytmp, ztmp, delx, dely, delz, rsq, radsum, cut, cutsq;
+  int *neighptr;
+
+  double **x = atom->x;
+  double *radius = atom->radius;
+  int *type = atom->type;
+  int *mask = atom->mask;
+  tagint *tag = atom->tag;
+  tagint *molecule = atom->molecule;
+  tagint **special = atom->special;
+  int **nspecial = atom->nspecial;
+
+  int nall = atom->nlocal + atom->nghost;
+  int *molindex = atom->molindex;
+  int *molatom = atom->molatom;
+  Molecule **onemols = atom->avec->onemols;
+
+  int history = list->history;
+  int mask_history = 1 << HISTBITS;
+
+  int *ilist = list->ilist;
+  int *numneigh = list->numneigh;
+  int **firstneigh = list->firstneigh;
+
+  // each thread has its own page allocator
+  MyPage<int> &ipage = list->ipage[tid];
+  ipage.reset();
+
+  // loop over owned atoms, storing neighbors
+
+  for (i = ifrom; i < ito; i++) {
+
+    n = 0;
+    neighptr = ipage.vget();
+
+    itag = tag[i];
+    itype = type[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    if (moltemplate) {
+      imol = molindex[i];
+      iatom = molatom[i];
+      tagprev = tag[i] - iatom - 1;
+    }
+
+    // Full: loop over all atoms, owned and ghost, skip i = j
+    // Half: loop over remaining atoms, owned and ghost
+    //   Newtoff: only store pair if i < j
+    //   Newton: itag = jtag is possible for long cutoffs that include images of self
+
+    if (!HALF) jstart = 0;
+    else jstart = i + 1;
+
+    for (j = jstart; j < nall; j++) {
+      if (includegroup && !(mask[j] & bitmask)) continue;
+
+      if (!HALF) {
+        // Full neighbor list
+        if (i == j) continue;
+      } else if (NEWTON) {
+        // Half neighbor list, newton on
+        if (j >= nlocal) {
+          jtag = tag[j];
+          if (itag > jtag) {
+            if ((itag + jtag) % 2 == 0) continue;
+          } else if (itag < jtag) {
+            if ((itag + jtag) % 2 == 1) continue;
+          } else if (TRI) {
+            if (fabs(x[j][2] - ztmp) > delta) {
+              if (x[j][2] < ztmp) continue;
+            } else if (fabs(x[j][1] - ytmp) > delta) {
+              if (x[j][1] < ytmp) continue;
+            } else {
+              if (x[j][0] < xtmp) continue;
+            }
+          } else {
+            if (x[j][2] < ztmp) continue;
+            if (x[j][2] == ztmp) {
+              if (x[j][1] < ytmp) continue;
+              if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
+            }
+          }
+        }
+      }
+
+      jtype = type[j];
+      if (exclude && exclusion(i, j, itype, jtype, mask, molecule)) continue;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx * delx + dely * dely + delz * delz;
+
+      if (SIZE) {
+        radsum = radius[i] + radius[j];
+        cut = radsum + skin;
+        cutsq = cut * cut;
+
+        if (rsq <= cutsq) {
+          jh = j;
+          if (history && rsq < radsum * radsum)
+            jh = jh ^ mask_history;
+
+          if (molecular != Atom::ATOMIC) {
+            if (!moltemplate)
+              which = find_special(special[i], nspecial[i], tag[j]);
+            else if (imol >= 0)
+              which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom],
+                                   tag[j] - tagprev);
+            else
+              which = 0;
+            if (which == 0)
+              neighptr[n++] = jh;
+            else if (domain->minimum_image_check(delx, dely, delz))
+              neighptr[n++] = jh;
+            else if (which > 0)
+              neighptr[n++] = jh ^ (which << SBBITS);
+          } else
+            neighptr[n++] = jh;
+        }
+      } else {
+        if (rsq <= cutneighsq[itype][jtype]) {
+          if (molecular != Atom::ATOMIC) {
+            if (!moltemplate)
+              which = find_special(special[i], nspecial[i], tag[j]);
+            else if (imol >= 0)
+              which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom],
+                                   tag[j] - tagprev);
+            else
+              which = 0;
+            if (which == 0)
+              neighptr[n++] = j;
+            else if (domain->minimum_image_check(delx, dely, delz))
+              neighptr[n++] = j;
+            else if (which > 0)
+              neighptr[n++] = j ^ (which << SBBITS);
+          } else
+            neighptr[n++] = j;
+        }
+      }
+    }
+
+    ilist[i] = i;
+    firstneigh[i] = neighptr;
+    numneigh[i] = n;
+    ipage.vgot(n);
+    if (ipage.status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
+  }
+  NPAIR_OMP_CLOSE;
+  list->inum = nlocal;
+  list->gnum = 0;
+}
+
+namespace LAMMPS_NS {
+template class NPairNsqOmp<0,1,0,0>;
+template class NPairNsqOmp<1,0,0,0>;
+template class NPairNsqOmp<1,1,0,0>;
+template class NPairNsqOmp<1,1,1,0>;
+template class NPairNsqOmp<0,1,0,1>;
+template class NPairNsqOmp<1,0,0,1>;
+template class NPairNsqOmp<1,1,0,1>;
+template class NPairNsqOmp<1,1,1,1>;
+}
diff --git a/src/OPENMP/npair_nsq_omp.h b/src/OPENMP/npair_nsq_omp.h
new file mode 100644
index 0000000000..b7479b6e17
--- /dev/null
+++ b/src/OPENMP/npair_nsq_omp.h
@@ -0,0 +1,76 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef NPAIR_CLASS
+// clang-format off
+
+typedef NPairNsqOmp<0, 1, 0, 0> NPairFullNsqOmp;
+NPairStyle(full/nsq/omp,
+           NPairFullNsqOmp,
+           NP_FULL | NP_NSQ | NP_OMP | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairNsqOmp<1, 0, 0, 0> NPairHalfNsqNewtoffOmp;
+NPairStyle(half/nsq/newtoff/omp,
+           NPairHalfNsqNewtoffOmp,
+           NP_HALF | NP_NSQ | NP_OMP | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairNsqOmp<1, 1, 0, 0> NPairHalfNsqNewtonOmp;
+NPairStyle(half/nsq/newton/omp,
+           NPairHalfNsqNewtonOmp,
+           NP_HALF | NP_NSQ | NP_OMP | NP_NEWTON | NP_ORTHO);
+
+typedef NPairNsqOmp<1, 1, 1, 0> NPairHalfNsqNewtonTriOmp;
+NPairStyle(half/nsq/newton/tri/omp,
+           NPairHalfNsqNewtonTriOmp,
+           NP_HALF | NP_NSQ | NP_OMP | NP_NEWTON | NP_TRI);
+
+typedef NPairNsqOmp<0, 1, 0, 1> NPairFullSizeNsqOmp;
+NPairStyle(full/size/nsq/omp,
+           NPairFullSizeNsqOmp,
+           NP_FULL | NP_SIZE | NP_NSQ | NP_OMP | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairNsqOmp<1, 0, 0, 1> NPairHalfSizeNsqNewtoffOmp;
+NPairStyle(half/size/nsq/newtoff/omp,
+           NPairHalfSizeNsqNewtoffOmp,
+           NP_HALF | NP_SIZE | NP_NSQ | NP_OMP | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairNsqOmp<1, 1, 0, 1> NPairHalfSizeNsqNewtonOmp;
+NPairStyle(half/size/nsq/newton/omp,
+           NPairHalfSizeNsqNewtonOmp,
+           NP_HALF | NP_SIZE | NP_NSQ | NP_OMP | NP_NEWTON | NP_ORTHO);
+
+typedef NPairNsqOmp<1, 1, 1, 1> NPairHalfSizeNsqNewtonTriOmp;
+NPairStyle(half/size/nsq/newton/tri/omp,
+           NPairHalfSizeNsqNewtonTriOmp,
+           NP_HALF | NP_SIZE | NP_NSQ | NP_OMP | NP_NEWTON | NP_TRI);
+// clang-format on
+#else
+
+#ifndef LMP_NPAIR_NSQ_OMP_H
+#define LMP_NPAIR_NSQ_OMP_H
+
+#include "npair.h"
+
+namespace LAMMPS_NS {
+
+template<int HALF, int NEWTON, int TRI, int SIZE>
+class NPairNsqOmp : public NPair {
+ public:
+  NPairNsqOmp(class LAMMPS *);
+  void build(class NeighList *) override;
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/OPENMP/npair_half_respa_bin_newton_tri_omp.cpp b/src/OPENMP/npair_respa_bin_omp.cpp
similarity index 51%
rename from src/OPENMP/npair_half_respa_bin_newton_tri_omp.cpp
rename to src/OPENMP/npair_respa_bin_omp.cpp
index 78b3abdd66..c958167ba0 100644
--- a/src/OPENMP/npair_half_respa_bin_newton_tri_omp.cpp
+++ b/src/OPENMP/npair_respa_bin_omp.cpp
@@ -12,9 +12,9 @@
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
-#include "omp_compat.h"
-#include "npair_half_respa_bin_newton_tri_omp.h"
+#include "npair_respa_bin_omp.h"
 #include "npair_omp.h"
+#include "omp_compat.h"
 
 #include "atom.h"
 #include "atom_vec.h"
@@ -29,17 +29,25 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-NPairHalfRespaBinNewtonTriOmp::NPairHalfRespaBinNewtonTriOmp(LAMMPS *lmp) :
+template<int NEWTON, int TRI>
+NPairRespaBinOmp<NEWTON, TRI>::NPairRespaBinOmp(LAMMPS *lmp) :
   NPair(lmp) {}
 
 /* ----------------------------------------------------------------------
    multiple respa lists
-   binned neighbor list construction with Newton's 3rd law for triclinic
-   each owned atom i checks its own bin and other bins in triclinic stencil
-   every pair stored exactly once by some processor
+   Newtoff
+     binned neighbor list construction with partial Newton's 3rd law
+     each owned atom i checks own bin and surrounding bins in non-Newton stencil
+     pair stored once if i,j are both owned and i < j
+     pair stored by me if j is ghost (also stored by proc owning j)
+  Newton
+     binned neighbor list construction with full Newton's 3rd law
+     each owned atom i checks its own bin and other bins in Newton stencil
+     every pair stored exactly once by some processor
 ------------------------------------------------------------------------- */
 
-void NPairHalfRespaBinNewtonTriOmp::build(NeighList *list)
+template<int NEWTON, int TRI>
+void NPairRespaBinOmp<NEWTON, TRI>::build(NeighList *list)
 {
   const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
   const int molecular = atom->molecular;
@@ -55,10 +63,10 @@ void NPairHalfRespaBinNewtonTriOmp::build(NeighList *list)
 #endif
   NPAIR_OMP_SETUP(nlocal);
 
-  int i,j,k,n,itype,jtype,ibin,n_inner,n_middle,imol,iatom;
-  tagint itag,jtag,tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr,*neighptr_inner,*neighptr_middle;
+  int i, j, k, n, itype, jtype, ibin, bin_start, n_inner, n_middle, imol, iatom;
+  tagint itag, jtag, tagprev;
+  double xtmp, ytmp, ztmp, delx, dely, delz, rsq;
+  int *neighptr, *neighptr_inner, *neighptr_middle;
 
   double **x = atom->x;
   int *type = atom->type;
@@ -80,7 +88,7 @@ void NPairHalfRespaBinNewtonTriOmp::build(NeighList *list)
   int *numneigh_inner = list->numneigh_inner;
   int **firstneigh_inner = list->firstneigh_inner;
 
-  int *ilist_middle,*numneigh_middle,**firstneigh_middle;
+  int *ilist_middle, *numneigh_middle, **firstneigh_middle;
   if (respamiddle) {
     ilist_middle = list->ilist_middle;
     numneigh_middle = list->numneigh_middle;
@@ -117,75 +125,112 @@ void NPairHalfRespaBinNewtonTriOmp::build(NeighList *list)
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
+    ibin = atom2bin[i];
     if (moltemplate) {
       imol = molindex[i];
       iatom = molatom[i];
       tagprev = tag[i] - iatom - 1;
     }
 
-    // loop over all atoms in bins in stencil
-    // for triclinic, bin stencil is full in all 3 dims
-    // must use itag/jtag to eliminate half the I/J interactions
-    // cannot use I/J exact coord comparision
-    //   b/c transforming orthog -> lambda -> orthog for ghost atoms
-    //   with an added PBC offset can shift all 3 coords by epsilon
-
-    ibin = atom2bin[i];
     for (k = 0; k < nstencil; k++) {
-      for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
+      bin_start = binhead[ibin + stencil[k]];
+      if (NEWTON && (!TRI)) {
+        if (k == 0) {
+          // Half neighbor list, newton on, orthonormal
+          // loop over rest of atoms in i's bin, ghosts are at end of linked list
+          bin_start = bins[i];
+        }
+      }
 
-        if (j <= i) continue;
-        if (j >= nlocal) {
-          jtag = tag[j];
-          if (itag > jtag) {
-            if ((itag+jtag) % 2 == 0) continue;
-          } else if (itag < jtag) {
-            if ((itag+jtag) % 2 == 1) continue;
-          } else {
-            if (fabs(x[j][2]-ztmp) > delta) {
-              if (x[j][2] < ztmp) continue;
-            } else if (fabs(x[j][1]-ytmp) > delta) {
-              if (x[j][1] < ytmp) continue;
+      for (j = bin_start; j >= 0; j = bins[j]) {
+        if (!NEWTON) {
+          // Half neighbor list, newton off
+          // only store pair if i < j
+          // stores own/own pairs only once
+          // stores own/ghost pairs on both procs
+          if (j <= i) continue;
+        } else if (TRI) {
+          // Half neighbor list, newton on, triclinic
+          // for triclinic, bin stencil is full in all 3 dims
+          // must use itag/jtag to eliminate half the I/J interactions
+          // cannot use I/J exact coord comparision
+          //   b/c transforming orthog -> lambda -> orthog for ghost atoms
+          //   with an added PBC offset can shift all 3 coords by epsilon
+          if (j <= i) continue;
+          if (j >= nlocal) {
+            jtag = tag[j];
+            if (itag > jtag) {
+              if ((itag + jtag) % 2 == 0) continue;
+            } else if (itag < jtag) {
+              if ((itag + jtag) % 2 == 1) continue;
             } else {
-              if (x[j][0] < xtmp) continue;
+              if (fabs(x[j][2] - ztmp) > delta) {
+                if (x[j][2] < ztmp) continue;
+              } else if (fabs(x[j][1] - ytmp) > delta) {
+                if (x[j][1] < ytmp) continue;
+              } else {
+                if (x[j][0] < xtmp) continue;
+              }
+            }
+          }
+        } else {
+          // Half neighbor list, newton on, orthonormal
+          // store every pair for every bin in stencil,except for i's bin
+
+          if (k == 0) {
+            // if j is owned atom, store it, since j is beyond i in linked list
+            // if j is ghost, only store if j coords are "above and to the "right" of i
+            if (j >= nlocal) {
+              if (x[j][2] < ztmp) continue;
+              if (x[j][2] == ztmp) {
+                if (x[j][1] < ytmp) continue;
+                if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
+              }
             }
           }
         }
 
         jtype = type[j];
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
+        if (exclude && exclusion(i, j, itype, jtype, mask, molecule)) continue;
 
         delx = xtmp - x[j][0];
         dely = ytmp - x[j][1];
         delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
+        rsq = delx * delx + dely * dely + delz * delz;
 
         if (rsq <= cutneighsq[itype][jtype]) {
           if (molecular != Atom::ATOMIC) {
             if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
+              which = find_special(special[i], nspecial[i], tag[j]);
             else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = j;
-            else if ((minchange = domain->minimum_image_check(delx,dely,delz)))
+              which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom],
+                                   tag[j] - tagprev);
+            else
+              which = 0;
+            if (which == 0)
               neighptr[n++] = j;
-            else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-          } else neighptr[n++] = j;
+            else if ((minchange = domain->minimum_image_check(delx, dely, delz)))
+              neighptr[n++] = j;
+            else if (which > 0)
+              neighptr[n++] = j ^ (which << SBBITS);
+          } else
+            neighptr[n++] = j;
 
           if (rsq < cut_inner_sq) {
-            if (which == 0) neighptr_inner[n_inner++] = j;
-            else if (minchange) neighptr_inner[n_inner++] = j;
+            if (which == 0)
+              neighptr_inner[n_inner++] = j;
+            else if (minchange)
+              neighptr_inner[n_inner++] = j;
             else if (which > 0)
               neighptr_inner[n_inner++] = j ^ (which << SBBITS);
           }
 
           if (respamiddle &&
               rsq < cut_middle_sq && rsq > cut_middle_inside_sq) {
-            if (which == 0) neighptr_middle[n_middle++] = j;
-            else if (minchange) neighptr_middle[n_middle++] = j;
+            if (which == 0)
+              neighptr_middle[n_middle++] = j;
+            else if (minchange)
+              neighptr_middle[n_middle++] = j;
             else if (which > 0)
               neighptr_middle[n_middle++] = j ^ (which << SBBITS);
           }
@@ -197,23 +242,20 @@ void NPairHalfRespaBinNewtonTriOmp::build(NeighList *list)
     firstneigh[i] = neighptr;
     numneigh[i] = n;
     ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
+    if (ipage.status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
 
     ilist_inner[i] = i;
     firstneigh_inner[i] = neighptr_inner;
     numneigh_inner[i] = n_inner;
-    ipage_inner.vgot(n_inner);
-    if (ipage_inner.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
+    ipage.vgot(n_inner);
+    if (ipage_inner.status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
 
     if (respamiddle) {
       ilist_middle[i] = i;
       firstneigh_middle[i] = neighptr_middle;
       numneigh_middle[i] = n_middle;
       ipage_middle->vgot(n_middle);
-      if (ipage_middle->status())
-        error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
+      if (ipage_middle->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
     }
   }
   NPAIR_OMP_CLOSE;
@@ -221,3 +263,9 @@ void NPairHalfRespaBinNewtonTriOmp::build(NeighList *list)
   list->inum_inner = nlocal;
   if (respamiddle) list->inum_middle = nlocal;
 }
+
+namespace LAMMPS_NS {
+template class NPairRespaBinOmp<0,0>;
+template class NPairRespaBinOmp<1,0>;
+template class NPairRespaBinOmp<1,1>;
+}
diff --git a/src/OPENMP/npair_half_respa_bin_newton_omp.h b/src/OPENMP/npair_respa_bin_omp.h
similarity index 55%
rename from src/OPENMP/npair_half_respa_bin_newton_omp.h
rename to src/OPENMP/npair_respa_bin_omp.h
index 695d0ce627..23daacbb8f 100644
--- a/src/OPENMP/npair_half_respa_bin_newton_omp.h
+++ b/src/OPENMP/npair_respa_bin_omp.h
@@ -13,22 +13,34 @@
 
 #ifdef NPAIR_CLASS
 // clang-format off
+typedef NPairRespaBinOmp<0, 0> NPairHalfRespaBinNewtoffOmp;
+NPairStyle(half/respa/bin/newtoff,
+           NPairHalfRespaBinNewtoffOmp,
+           NP_HALF | NP_RESPA | NP_BIN | NP_OMP | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairRespaBinOmp<1, 0> NPairHalfRespaBinNewtonOmp;
 NPairStyle(half/respa/bin/newton/omp,
            NPairHalfRespaBinNewtonOmp,
-           NP_HALF | NP_RESPA | NP_BIN | NP_NEWTON | NP_OMP | NP_ORTHO);
+           NP_HALF | NP_RESPA | NP_BIN | NP_OMP | NP_NEWTON | NP_ORTHO);
+
+typedef NPairRespaBinOmp<1, 1> NPairHalfRespaBinNewtonTriOmp;
+NPairStyle(half/respa/bin/newton/tri/omp,
+           NPairHalfRespaBinNewtonTriOmp,
+           NP_HALF | NP_RESPA | NP_BIN | NP_OMP | NP_NEWTON | NP_TRI);
 // clang-format on
 #else
 
-#ifndef LMP_NPAIR_HALF_RESPA_BIN_NEWTON_OMP_H
-#define LMP_NPAIR_HALF_RESPA_BIN_NEWTON_OMP_H
+#ifndef LMP_NPAIR_RESPA_BIN_OMP_H
+#define LMP_NPAIR_RESPA_BIN_OMP_H
 
 #include "npair.h"
 
 namespace LAMMPS_NS {
 
-class NPairHalfRespaBinNewtonOmp : public NPair {
+template<int NEWTON, int TRI>
+class NPairRespaBinOmp : public NPair {
  public:
-  NPairHalfRespaBinNewtonOmp(class LAMMPS *);
+  NPairRespaBinOmp(class LAMMPS *);
   void build(class NeighList *) override;
 };
 
diff --git a/src/OPENMP/npair_half_respa_nsq_newton_omp.cpp b/src/OPENMP/npair_respa_nsq_omp.cpp
similarity index 55%
rename from src/OPENMP/npair_half_respa_nsq_newton_omp.cpp
rename to src/OPENMP/npair_respa_nsq_omp.cpp
index a9745edc64..6815b21544 100644
--- a/src/OPENMP/npair_half_respa_nsq_newton_omp.cpp
+++ b/src/OPENMP/npair_respa_nsq_omp.cpp
@@ -12,9 +12,9 @@
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
-#include "omp_compat.h"
-#include "npair_half_respa_nsq_newton_omp.h"
+#include "npair_respa_nsq_omp.h"
 #include "npair_omp.h"
+#include "omp_compat.h"
 
 #include "atom.h"
 #include "atom_vec.h"
@@ -30,24 +30,38 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-NPairHalfRespaNsqNewtonOmp::NPairHalfRespaNsqNewtonOmp(LAMMPS *lmp) : NPair(lmp) {}
+template<int NEWTON, int TRI>
+NPairRespaNsqOmp<NEWTON, TRI>::NPairRespaNsqOmp(LAMMPS *lmp) :
+  NPair(lmp) {}
 
 /* ----------------------------------------------------------------------
    multiple respa lists
-   N^2 / 2 search for neighbor pairs with full Newton's 3rd law
-   pair added to list if atoms i and j are both owned and i < j
-   if j is ghost only me or other proc adds pair
-   decision based on itag,jtag tests
+   Newtoff
+     N^2 / 2 search for neighbor pairs with partial Newton's 3rd law
+     pair added to list if atoms i and j are both owned and i < j
+     pair added if j is ghost (also stored by proc owning j)
+  Newton
+     N^2 / 2 search for neighbor pairs with full Newton's 3rd law
+     pair added to list if atoms i and j are both owned and i < j
+     if j is ghost only me or other proc adds pair
+     decision based on itag,jtag tests
+     use itag/jtag comparision to eliminate half the interactions
+     itag = jtag is possible for long cutoffs that include images of self
+  Newton + Triclinic:
+     for triclinic, must use delta to eliminate half the I/J interactions
+     cannot use I/J exact coord comparision as for orthog
+     b/c transforming orthog -> lambda -> orthog for ghost atoms
+     with an added PBC offset can shift all 3 coords by epsilon
 ------------------------------------------------------------------------- */
 
-void NPairHalfRespaNsqNewtonOmp::build(NeighList *list)
+template<int NEWTON, int TRI>
+void NPairRespaNsqOmp<NEWTON, TRI>::build(NeighList *list)
 {
   const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
   const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0;
   const int molecular = atom->molecular;
   const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0;
   const double delta = 0.01 * force->angstrom;
-  const int triclinic = domain->triclinic;
 
   NPAIR_OMP_INIT;
 
@@ -58,10 +72,10 @@ void NPairHalfRespaNsqNewtonOmp::build(NeighList *list)
 #endif
   NPAIR_OMP_SETUP(nlocal);
 
-  int i,j,n,itype,jtype,n_inner,n_middle,imol,iatom;
-  tagint itag,jtag,tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr,*neighptr_inner,*neighptr_middle;
+  int i, j, n, itype, jtype, n_inner, n_middle, imol, iatom;
+  tagint itag, jtag, tagprev;
+  double xtmp, ytmp, ztmp, delx, dely, delz, rsq;
+  int *neighptr, *neighptr_inner, *neighptr_middle;
 
   double **x = atom->x;
   int *type = atom->type;
@@ -85,7 +99,7 @@ void NPairHalfRespaNsqNewtonOmp::build(NeighList *list)
   int *numneigh_inner = list->numneigh_inner;
   int **firstneigh_inner = list->firstneigh_inner;
 
-  int *ilist_middle,*numneigh_middle,**firstneigh_middle;
+  int *ilist_middle, *numneigh_middle, **firstneigh_middle;
   if (respamiddle) {
     ilist_middle = list->ilist_middle;
     numneigh_middle = list->numneigh_middle;
@@ -117,8 +131,8 @@ void NPairHalfRespaNsqNewtonOmp::build(NeighList *list)
       neighptr_middle = ipage_middle->vget();
     }
 
-    itag = tag[i];
     itype = type[i];
+    itag = tag[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
@@ -129,72 +143,75 @@ void NPairHalfRespaNsqNewtonOmp::build(NeighList *list)
     }
 
     // loop over remaining atoms, owned and ghost
-    // use itag/jtap comparision to eliminate half the interactions
-    // itag = jtag is possible for long cutoffs that include images of self
-    // for triclinic, must use delta to eliminate half the I/J interactions
-    // cannot use I/J exact coord comparision as for orthog
-    //   b/c transforming orthog -> lambda -> orthog for ghost atoms
-    //   with an added PBC offset can shift all 3 coords by epsilon
 
-    for (j = i+1; j < nall; j++) {
+    for (j = i + 1; j < nall; j++) {
       if (includegroup && !(mask[j] & bitmask)) continue;
 
-      if (j >= nlocal) {
-        jtag = tag[j];
-        if (itag > jtag) {
-          if ((itag+jtag) % 2 == 0) continue;
-        } else if (itag < jtag) {
-          if ((itag+jtag) % 2 == 1) continue;
-        } else if (triclinic) {
-          if (fabs(x[j][2]-ztmp) > delta) {
-            if (x[j][2] < ztmp) continue;
-          } else if (fabs(x[j][1]-ytmp) > delta) {
-            if (x[j][1] < ytmp) continue;
+      if (NEWTON) {
+        if (j >= nlocal) {
+          jtag = tag[j];
+          if (itag > jtag) {
+            if ((itag + jtag) % 2 == 0) continue;
+          } else if (itag < jtag) {
+            if ((itag + jtag) % 2 == 1) continue;
+          } else if (TRI) {
+            if (fabs(x[j][2] - ztmp) > delta) {
+              if (x[j][2] < ztmp) continue;
+            } else if (fabs(x[j][1] - ytmp) > delta) {
+              if (x[j][1] < ytmp) continue;
+            } else {
+              if (x[j][0] < xtmp) continue;
+            }
           } else {
-            if (x[j][0] < xtmp) continue;
-          }
-        } else {
-          if (x[j][2] < ztmp) continue;
-          if (x[j][2] == ztmp) {
-            if (x[j][1] < ytmp) continue;
-            if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
+            if (x[j][2] < ztmp) continue;
+            if (x[j][2] == ztmp) {
+              if (x[j][1] < ytmp) continue;
+              if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
+            }
           }
         }
       }
 
       jtype = type[j];
-      if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
+      if (exclude && exclusion(i, j, itype, jtype, mask, molecule)) continue;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
 
       if (rsq <= cutneighsq[itype][jtype]) {
         if (molecular != Atom::ATOMIC) {
           if (!moltemplate)
-            which = find_special(special[i],nspecial[i],tag[j]);
+            which = find_special(special[i], nspecial[i], tag[j]);
           else if (imol >= 0)
-            which = find_special(onemols[imol]->special[iatom],
-                                 onemols[imol]->nspecial[iatom],
-                                 tag[j]-tagprev);
-          else which = 0;
-          if (which == 0) neighptr[n++] = j;
-          else if ((minchange = domain->minimum_image_check(delx,dely,delz)))
+            which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom],
+                                 tag[j] - tagprev);
+          else
+            which = 0;
+          if (which == 0)
             neighptr[n++] = j;
-          else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-        } else neighptr[n++] = j;
+          else if ((minchange = domain->minimum_image_check(delx, dely, delz)))
+            neighptr[n++] = j;
+          else if (which > 0)
+            neighptr[n++] = j ^ (which << SBBITS);
+        } else
+          neighptr[n++] = j;
 
         if (rsq < cut_inner_sq) {
-          if (which == 0) neighptr_inner[n_inner++] = j;
-          else if (minchange) neighptr_inner[n_inner++] = j;
-          else if (which > 0) neighptr_inner[n_inner++] = j ^ (which << SBBITS);
+          if (which == 0)
+            neighptr_inner[n_inner++] = j;
+          else if (minchange)
+            neighptr_inner[n_inner++] = j;
+          else if (which > 0)
+            neighptr_inner[n_inner++] = j ^ (which << SBBITS);
         }
 
-        if (respamiddle &&
-            rsq < cut_middle_sq && rsq > cut_middle_inside_sq) {
-          if (which == 0) neighptr_middle[n_middle++] = j;
-          else if (minchange) neighptr_middle[n_middle++] = j;
+        if (respamiddle && rsq < cut_middle_sq && rsq > cut_middle_inside_sq) {
+          if (which == 0)
+            neighptr_middle[n_middle++] = j;
+          else if (minchange)
+            neighptr_middle[n_middle++] = j;
           else if (which > 0)
             neighptr_middle[n_middle++] = j ^ (which << SBBITS);
         }
@@ -205,23 +222,20 @@ void NPairHalfRespaNsqNewtonOmp::build(NeighList *list)
     firstneigh[i] = neighptr;
     numneigh[i] = n;
     ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
+    if (ipage.status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
 
     ilist_inner[i] = i;
     firstneigh_inner[i] = neighptr_inner;
     numneigh_inner[i] = n_inner;
     ipage.vgot(n_inner);
-    if (ipage_inner.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
+    if (ipage_inner.status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
 
     if (respamiddle) {
       ilist_middle[i] = i;
       firstneigh_middle[i] = neighptr_middle;
       numneigh_middle[i] = n_middle;
       ipage_middle->vgot(n_middle);
-      if (ipage_middle->status())
-        error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
+      if (ipage_middle->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
     }
   }
   NPAIR_OMP_CLOSE;
@@ -229,3 +243,9 @@ void NPairHalfRespaNsqNewtonOmp::build(NeighList *list)
   list->inum_inner = nlocal;
   if (respamiddle) list->inum_middle = nlocal;
 }
+
+namespace LAMMPS_NS {
+template class NPairRespaNsqOmp<0,0>;
+template class NPairRespaNsqOmp<1,0>;
+template class NPairRespaNsqOmp<1,1>;
+}
diff --git a/src/OPENMP/npair_half_respa_nsq_newtoff_omp.h b/src/OPENMP/npair_respa_nsq_omp.h
similarity index 55%
rename from src/OPENMP/npair_half_respa_nsq_newtoff_omp.h
rename to src/OPENMP/npair_respa_nsq_omp.h
index abd28fd51b..c68d06b4b5 100644
--- a/src/OPENMP/npair_half_respa_nsq_newtoff_omp.h
+++ b/src/OPENMP/npair_respa_nsq_omp.h
@@ -13,23 +13,34 @@
 
 #ifdef NPAIR_CLASS
 // clang-format off
+typedef NPairRespaNsqOmp<0,0> NPairHalfRespaNsqNewtoffOmp;
 NPairStyle(half/respa/nsq/newtoff/omp,
            NPairHalfRespaNsqNewtoffOmp,
-           NP_HALF | NP_RESPA | NP_NSQ | NP_NEWTOFF | NP_OMP |
-           NP_ORTHO | NP_TRI);
+           NP_HALF | NP_RESPA | NP_NSQ | NP_OMP | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairRespaNsqOmp<1,0> NPairHalfRespaNsqNewtonOmp;
+NPairStyle(half/respa/nsq/newton/omp,
+           NPairHalfRespaNsqNewtonOmp,
+           NP_HALF | NP_RESPA | NP_NSQ | NP_OMP | NP_NEWTON | NP_ORTHO);
+
+typedef NPairRespaNsqOmp<1,1> NPairHalfRespaNsqNewtonTriOmp;
+NPairStyle(half/respa/nsq/newton/tri/omp,
+           NPairHalfRespaNsqNewtonTriOmp,
+           NP_HALF | NP_RESPA | NP_NSQ | NP_OMP | NP_NEWTON | NP_TRI);
 // clang-format on
 #else
 
-#ifndef LMP_NPAIR_HALF_RESPA_NSQ_NEWTOFF_OMP_H
-#define LMP_NPAIR_HALF_RESPA_NSQ_NEWTOFF_OMP_H
+#ifndef LMP_NPAIR_RESPA_NSQ_OMP_H
+#define LMP_NPAIR_RESPA_NSQ_OMP_H
 
 #include "npair.h"
 
 namespace LAMMPS_NS {
 
-class NPairHalfRespaNsqNewtoffOmp : public NPair {
+template<int NEWTON, int TRI>
+class NPairRespaNsqOmp : public NPair {
  public:
-  NPairHalfRespaNsqNewtoffOmp(class LAMMPS *);
+  NPairRespaNsqOmp(class LAMMPS *);
   void build(class NeighList *) override;
 };
 
diff --git a/src/OPENMP/npair_skip_omp.h b/src/OPENMP/npair_skip_omp.h
index ce61968c17..937304ad3f 100644
--- a/src/OPENMP/npair_skip_omp.h
+++ b/src/OPENMP/npair_skip_omp.h
@@ -16,6 +16,7 @@
 
 #ifdef NPAIR_CLASS
 // clang-format off
+
 NPairStyle(skip/omp,
            NPairSkip,
            NP_SKIP | NP_HALF | NP_FULL |
@@ -50,6 +51,41 @@ NPairStyle(skip/ghost/omp,
            NP_SKIP | NP_HALF | NP_FULL |
            NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
            NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_OMP | NP_GHOST);
+
+NPairStyle(skip/trim/omp,
+           NPairSkipTrim,
+           NP_SKIP | NP_HALF | NP_FULL |
+           NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
+           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP);
+
+NPairStyle(skip/trim/half/respa/omp,
+           NPairSkipTrimRespa,
+           NP_SKIP | NP_RESPA | NP_HALF | NP_FULL |
+           NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
+           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP);
+
+NPairStyle(skip/trim/half/size/omp,
+           NPairSkipTrimSize,
+           NP_SKIP | NP_SIZE | NP_HALF | NP_FULL | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
+           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP);
+
+NPairStyle(skip/trim/size/off2on/omp,
+           NPairSkipTrimSizeOff2on,
+           NP_SKIP | NP_SIZE | NP_OFF2ON | NP_HALF |
+           NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_MULTI_OLD |
+           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP);
+
+NPairStyle(skip/trim/size/off2on/oneside/omp,
+           NPairSkipTrimSizeOff2onOneside,
+           NP_SKIP | NP_SIZE | NP_OFF2ON | NP_ONESIDE | NP_HALF |
+           NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_NEWTON | NP_NEWTOFF |
+           NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP);
+
+NPairStyle(skip/trim/ghost/omp,
+           NPairSkipTrim,
+           NP_SKIP | NP_HALF | NP_FULL |
+           NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
+           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP | NP_GHOST);
 // clang-format off
 #endif
 
diff --git a/src/OPENMP/npair_skip_trim_omp.h b/src/OPENMP/npair_skip_trim_omp.h
deleted file mode 100644
index aba6f50e17..0000000000
--- a/src/OPENMP/npair_skip_trim_omp.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-// There is no benefit from multi-threading for skip lists, so we
-// just forward the requests to the corresponding non-omp versions.
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(skip/trim/omp,
-           NPairSkipTrim,
-           NP_SKIP | NP_HALF | NP_FULL |
-           NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
-           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP);
-
-NPairStyle(skip/trim/half/respa/omp,
-           NPairSkipTrimRespa,
-           NP_SKIP | NP_RESPA | NP_HALF | NP_FULL |
-           NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
-           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP);
-
-NPairStyle(skip/trim/half/size/omp,
-           NPairSkipTrimSize,
-           NP_SKIP | NP_SIZE | NP_HALF | NP_FULL | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
-           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP);
-
-NPairStyle(skip/trim/size/off2on/omp,
-           NPairSkipTrimSizeOff2on,
-           NP_SKIP | NP_SIZE | NP_OFF2ON | NP_HALF |
-           NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_MULTI_OLD |
-           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP);
-
-NPairStyle(skip/trim/size/off2on/oneside/omp,
-           NPairSkipTrimSizeOff2onOneside,
-           NP_SKIP | NP_SIZE | NP_OFF2ON | NP_ONESIDE | NP_HALF |
-           NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_NEWTON | NP_NEWTOFF |
-           NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP);
-
-NPairStyle(skip/trim/ghost/omp,
-           NPairSkipTrim,
-           NP_SKIP | NP_HALF | NP_FULL |
-           NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
-           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM | NP_OMP | NP_GHOST);
-// clang-format off
-#endif
-
diff --git a/src/OPENMP/npair_trim_omp.cpp b/src/OPENMP/npair_trim_omp.cpp
index d7ac0ddb40..0840c1a5f2 100644
--- a/src/OPENMP/npair_trim_omp.cpp
+++ b/src/OPENMP/npair_trim_omp.cpp
@@ -13,14 +13,13 @@
 ------------------------------------------------------------------------- */
 
 #include "npair_trim_omp.h"
+#include "npair_omp.h"
+#include "omp_compat.h"
 
 #include "atom.h"
 #include "error.h"
 #include "my_page.h"
 #include "neigh_list.h"
-#include "npair_omp.h"
-
-#include "omp_compat.h"
 
 using namespace LAMMPS_NS;
 
@@ -43,10 +42,9 @@ void NPairTrimOmp::build(NeighList *list)
 #endif
   NPAIR_OMP_SETUP(inum_copy);
 
-  int i,j,ii,jj,n,jnum,joriginal;
-  int *neighptr,*jlist;
-  double xtmp,ytmp,ztmp;
-  double delx,dely,delz,rsq;
+  int i, j, ii, jj, n, jnum, joriginal;
+  int *neighptr, *jlist;
+  double xtmp, ytmp, ztmp, delx, dely, delz, rsq;
 
   double **x = atom->x;
 
@@ -100,8 +98,7 @@ void NPairTrimOmp::build(NeighList *list)
     firstneigh[i] = neighptr;
     numneigh[i] = n;
     ipage.vgot(n);
-    if (ipage.status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
+    if (ipage.status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
   }
   NPAIR_OMP_CLOSE;
   list->inum = inum_copy;
diff --git a/src/OPENMP/pair_airebo_omp.cpp b/src/OPENMP/pair_airebo_omp.cpp
index 0872678518..9f992aefda 100644
--- a/src/OPENMP/pair_airebo_omp.cpp
+++ b/src/OPENMP/pair_airebo_omp.cpp
@@ -34,7 +34,7 @@
 using namespace LAMMPS_NS;
 using namespace MathSpecial;
 
-#define TOL 1.0e-9
+static constexpr double TOL = 1.0e-9;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/pair_brownian_omp.cpp b/src/OPENMP/pair_brownian_omp.cpp
index 45288f13dd..1506f1f35a 100644
--- a/src/OPENMP/pair_brownian_omp.cpp
+++ b/src/OPENMP/pair_brownian_omp.cpp
@@ -36,11 +36,7 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
-#define EPSILON 1.0e-10
-
-// same as fix_wall.cpp
-
-enum{EDGE,CONSTANT,VARIABLE};
+static constexpr double EPSILON = 1.0e-10;
 
 /* ---------------------------------------------------------------------- */
 
@@ -93,7 +89,7 @@ void PairBrownianOMP::compute(int eflag, int vflag)
         for (int m = 0; m < wallfix->nwall; m++) {
           int dim = wallfix->wallwhich[m] / 2;
           int side = wallfix->wallwhich[m] % 2;
-          if (wallfix->xstyle[m] == VARIABLE) {
+          if (wallfix->xstyle[m] == FixWall::VARIABLE) {
             wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
           }
           else wallcoord = wallfix->coord0[m];
diff --git a/src/OPENMP/pair_brownian_poly_omp.cpp b/src/OPENMP/pair_brownian_poly_omp.cpp
index 91a496979d..ddd1af2c01 100644
--- a/src/OPENMP/pair_brownian_poly_omp.cpp
+++ b/src/OPENMP/pair_brownian_poly_omp.cpp
@@ -36,11 +36,7 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
-#define EPSILON 1.0e-10
-
-// same as fix_wall.cpp
-
-enum{EDGE,CONSTANT,VARIABLE};
+static constexpr double EPSILON = 1.0e-10;
 
 /* ---------------------------------------------------------------------- */
 
@@ -93,7 +89,7 @@ void PairBrownianPolyOMP::compute(int eflag, int vflag)
         for (int m = 0; m < wallfix->nwall; m++) {
           int dim = wallfix->wallwhich[m] / 2;
           int side = wallfix->wallwhich[m] % 2;
-          if (wallfix->xstyle[m] == VARIABLE) {
+          if (wallfix->xstyle[m] == FixWall::VARIABLE) {
             wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
           }
           else wallcoord = wallfix->coord0[m];
diff --git a/src/OPENMP/pair_comb_omp.cpp b/src/OPENMP/pair_comb_omp.cpp
index aceff6e111..87d467a846 100644
--- a/src/OPENMP/pair_comb_omp.cpp
+++ b/src/OPENMP/pair_comb_omp.cpp
@@ -32,7 +32,7 @@
 using namespace LAMMPS_NS;
 using MathExtra::dot3;
 
-#define MAXNEIGH 24
+static constexpr int MAXNEIGH = 24;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/pair_dpd_ext_omp.cpp b/src/OPENMP/pair_dpd_ext_omp.cpp
index f900512e26..9c53984b68 100644
--- a/src/OPENMP/pair_dpd_ext_omp.cpp
+++ b/src/OPENMP/pair_dpd_ext_omp.cpp
@@ -28,7 +28,7 @@
 #include "omp_compat.h"
 using namespace LAMMPS_NS;
 
-#define EPSILON 1.0e-10
+static constexpr double EPSILON = 1.0e-10;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/pair_dpd_ext_tstat_omp.cpp b/src/OPENMP/pair_dpd_ext_tstat_omp.cpp
index b55a9d39c6..41234dbcc9 100644
--- a/src/OPENMP/pair_dpd_ext_tstat_omp.cpp
+++ b/src/OPENMP/pair_dpd_ext_tstat_omp.cpp
@@ -28,7 +28,7 @@
 #include "omp_compat.h"
 using namespace LAMMPS_NS;
 
-#define EPSILON 1.0e-10
+static constexpr double EPSILON = 1.0e-10;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/pair_dpd_omp.cpp b/src/OPENMP/pair_dpd_omp.cpp
index f267bde1b0..c4226b5f9d 100644
--- a/src/OPENMP/pair_dpd_omp.cpp
+++ b/src/OPENMP/pair_dpd_omp.cpp
@@ -27,7 +27,7 @@
 #include "suffix.h"
 using namespace LAMMPS_NS;
 
-#define EPSILON 1.0e-10
+static constexpr double EPSILON = 1.0e-10;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/pair_dpd_tstat_omp.cpp b/src/OPENMP/pair_dpd_tstat_omp.cpp
index 2396bf5756..86ecd86528 100644
--- a/src/OPENMP/pair_dpd_tstat_omp.cpp
+++ b/src/OPENMP/pair_dpd_tstat_omp.cpp
@@ -28,7 +28,7 @@
 #include "omp_compat.h"
 using namespace LAMMPS_NS;
 
-#define EPSILON 1.0e-10
+static constexpr double EPSILON = 1.0e-10;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/pair_edip_omp.cpp b/src/OPENMP/pair_edip_omp.cpp
index 7c31f0db78..a96f276916 100644
--- a/src/OPENMP/pair_edip_omp.cpp
+++ b/src/OPENMP/pair_edip_omp.cpp
@@ -28,8 +28,8 @@ using namespace LAMMPS_NS;
 
 static constexpr int leadDimInteractionList = 64;
 
-#define GRIDDENSITY 8000
-#define GRIDSTART 0.1
+static constexpr int GRIDDENSITY = 8000;
+static constexpr double GRIDSTART = 0.1;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/pair_hbond_dreiding_lj_omp.cpp b/src/OPENMP/pair_hbond_dreiding_lj_omp.cpp
index e7ba4d72c6..b0f6dcfb5b 100644
--- a/src/OPENMP/pair_hbond_dreiding_lj_omp.cpp
+++ b/src/OPENMP/pair_hbond_dreiding_lj_omp.cpp
@@ -33,7 +33,7 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/pair_hbond_dreiding_morse_omp.cpp b/src/OPENMP/pair_hbond_dreiding_morse_omp.cpp
index 99e2d2c5b1..0e43e2a037 100644
--- a/src/OPENMP/pair_hbond_dreiding_morse_omp.cpp
+++ b/src/OPENMP/pair_hbond_dreiding_morse_omp.cpp
@@ -33,7 +33,7 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/pair_lepton_coul_omp.cpp b/src/OPENMP/pair_lepton_coul_omp.cpp
index bc34bc00af..532c16d797 100644
--- a/src/OPENMP/pair_lepton_coul_omp.cpp
+++ b/src/OPENMP/pair_lepton_coul_omp.cpp
@@ -20,11 +20,13 @@
 #include "neigh_list.h"
 #include "suffix.h"
 
-#include <cmath>
-
 #include "Lepton.h"
 #include "lepton_utils.h"
 #include "omp_compat.h"
+
+#include <array>
+#include <cmath>
+
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
@@ -101,25 +103,30 @@ void PairLeptonCoulOMP::eval(int iifrom, int iito, ThrData *const thr)
 
   std::vector<Lepton::CompiledExpression> pairforce;
   std::vector<Lepton::CompiledExpression> pairpot;
-  std::vector<std::pair<bool, bool>> have_q;
+  std::vector<std::array<bool, 3>> has_ref;
   try {
     for (const auto &expr : expressions) {
       auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp), functions);
       pairforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
+      has_ref.push_back({true, true, true});
+      try {
+        pairforce.back().getVariableReference("r");
+      } catch (Lepton::Exception &) {
+        has_ref.back()[0] = false;
+      }
       if (EFLAG) pairpot.emplace_back(parsed.createCompiledExpression());
-      pairforce.back().getVariableReference("r");
-      have_q.emplace_back(true, true);
 
       // check if there are references to charges
+
       try {
         pairforce.back().getVariableReference("qi");
-      } catch (std::exception &) {
-        have_q.back().first = false;
+      } catch (Lepton::Exception &) {
+        has_ref.back()[1] = false;
       }
       try {
         pairforce.back().getVariableReference("qj");
-      } catch (std::exception &) {
-        have_q.back().second = false;
+      } catch (Lepton::Exception &) {
+        has_ref.back()[2] = false;
       }
     }
   } catch (std::exception &e) {
@@ -152,9 +159,9 @@ void PairLeptonCoulOMP::eval(int iifrom, int iito, ThrData *const thr)
       if (rsq < cutsq[itype][jtype]) {
         const double r = sqrt(rsq);
         const int idx = type2expression[itype][jtype];
-        pairforce[idx].getVariableReference("r") = r;
-        if (have_q[idx].first) pairforce[idx].getVariableReference("qi") = q2e * q[i];
-        if (have_q[idx].second) pairforce[idx].getVariableReference("qj") = q2e * q[j];
+        if (has_ref[idx][0]) pairforce[idx].getVariableReference("r") = r;
+        if (has_ref[idx][1]) pairforce[idx].getVariableReference("qi") = q2e * q[i];
+        if (has_ref[idx][2]) pairforce[idx].getVariableReference("qj") = q2e * q[j];
         const double fpair = -pairforce[idx].evaluate() / r * factor_coul;
 
         fxtmp += delx * fpair;
@@ -168,9 +175,14 @@ void PairLeptonCoulOMP::eval(int iifrom, int iito, ThrData *const thr)
 
         double ecoul = 0.0;
         if (EFLAG) {
-          pairpot[idx].getVariableReference("r") = r;
-          if (have_q[idx].first) pairpot[idx].getVariableReference("qi") = q2e * q[i];
-          if (have_q[idx].second) pairpot[idx].getVariableReference("qj") = q2e * q[j];
+          try {
+            pairpot[idx].getVariableReference("r") = r;
+          } catch (Lepton::Exception &) {
+            ;    // ignore -> constant potential
+          }
+          if (has_ref[idx][1]) pairpot[idx].getVariableReference("qi") = q2e * q[i];
+          if (has_ref[idx][2]) pairpot[idx].getVariableReference("qj") = q2e * q[j];
+
           ecoul = pairpot[idx].evaluate();
           ecoul *= factor_coul;
         }
diff --git a/src/OPENMP/pair_lepton_omp.cpp b/src/OPENMP/pair_lepton_omp.cpp
index b57b0fe11e..58692e52d6 100644
--- a/src/OPENMP/pair_lepton_omp.cpp
+++ b/src/OPENMP/pair_lepton_omp.cpp
@@ -20,11 +20,12 @@
 #include "neigh_list.h"
 #include "suffix.h"
 
-#include <cmath>
-
 #include "Lepton.h"
 #include "lepton_utils.h"
 #include "omp_compat.h"
+#include <array>
+#include <cmath>
+
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
@@ -96,10 +97,17 @@ void PairLeptonOMP::eval(int iifrom, int iito, ThrData *const thr)
 
   std::vector<Lepton::CompiledExpression> pairforce;
   std::vector<Lepton::CompiledExpression> pairpot;
+  std::vector<bool> have_ref;
   try {
     for (const auto &expr : expressions) {
       auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp), functions);
       pairforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
+      have_ref.push_back(true);
+      try {
+        pairforce.back().getVariableReference("r");
+      } catch (Lepton::Exception &) {
+        have_ref.back() = false;
+      }
       if (EFLAG) pairpot.emplace_back(parsed.createCompiledExpression());
     }
   } catch (std::exception &e) {
@@ -132,7 +140,7 @@ void PairLeptonOMP::eval(int iifrom, int iito, ThrData *const thr)
       if (rsq < cutsq[itype][jtype]) {
         const double r = sqrt(rsq);
         const int idx = type2expression[itype][jtype];
-        pairforce[idx].getVariableReference("r") = r;
+        if (have_ref[idx]) pairforce[idx].getVariableReference("r") = r;
         const double fpair = -pairforce[idx].evaluate() / r * factor_lj;
 
         fxtmp += delx * fpair;
@@ -146,7 +154,11 @@ void PairLeptonOMP::eval(int iifrom, int iito, ThrData *const thr)
 
         double evdwl = 0.0;
         if (EFLAG) {
-          pairpot[idx].getVariableReference("r") = r;
+          try {
+            pairpot[idx].getVariableReference("r") = r;
+          } catch (Lepton::Exception &) {
+            ;    // ignore -> constant potential
+          }
           evdwl = pairpot[idx].evaluate() - offset[itype][jtype];
           evdwl *= factor_lj;
         }
diff --git a/src/OPENMP/pair_lepton_sphere_omp.cpp b/src/OPENMP/pair_lepton_sphere_omp.cpp
index 6d3a4827b3..79afe27717 100644
--- a/src/OPENMP/pair_lepton_sphere_omp.cpp
+++ b/src/OPENMP/pair_lepton_sphere_omp.cpp
@@ -20,11 +20,13 @@
 #include "neigh_list.h"
 #include "suffix.h"
 
-#include <cmath>
-
 #include "Lepton.h"
 #include "lepton_utils.h"
 #include "omp_compat.h"
+
+#include <array>
+#include <cmath>
+
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
@@ -99,25 +101,30 @@ void PairLeptonSphereOMP::eval(int iifrom, int iito, ThrData *const thr)
 
   std::vector<Lepton::CompiledExpression> pairforce;
   std::vector<Lepton::CompiledExpression> pairpot;
-  std::vector<std::pair<bool, bool>> have_rad;
+  std::vector<std::array<bool, 3>> has_ref;
   try {
     for (const auto &expr : expressions) {
       auto parsed = Lepton::Parser::parse(LeptonUtils::substitute(expr, Pointers::lmp), functions);
       pairforce.emplace_back(parsed.differentiate("r").createCompiledExpression());
+      has_ref.push_back({true, true, true});
+      try {
+        pairforce.back().getVariableReference("r");
+      } catch (Lepton::Exception &) {
+        has_ref.back()[0] = false;
+      }
       if (EFLAG) pairpot.emplace_back(parsed.createCompiledExpression());
-      pairforce.back().getVariableReference("r");
-      have_rad.emplace_back(true, true);
 
-      // check if there are references to charges
+      // check if there are references to radii
+
       try {
         pairforce.back().getVariableReference("radi");
-      } catch (std::exception &) {
-        have_rad.back().first = false;
+      } catch (Lepton::Exception &) {
+        has_ref.back()[1] = false;
       }
       try {
         pairforce.back().getVariableReference("radj");
-      } catch (std::exception &) {
-        have_rad.back().second = false;
+      } catch (Lepton::Exception &) {
+        has_ref.back()[2] = false;
       }
     }
   } catch (std::exception &e) {
@@ -150,9 +157,9 @@ void PairLeptonSphereOMP::eval(int iifrom, int iito, ThrData *const thr)
       if (rsq < cutsq[itype][jtype]) {
         const double r = sqrt(rsq);
         const int idx = type2expression[itype][jtype];
-        pairforce[idx].getVariableReference("r") = r;
-        if (have_rad[idx].first) pairforce[idx].getVariableReference("radi") = radius[i];
-        if (have_rad[idx].second) pairforce[idx].getVariableReference("radj") = radius[j];
+        if (has_ref[idx][0]) pairforce[idx].getVariableReference("r") = r;
+        if (has_ref[idx][1]) pairforce[idx].getVariableReference("radi") = radius[i];
+        if (has_ref[idx][2]) pairforce[idx].getVariableReference("radj") = radius[j];
         const double fpair = -pairforce[idx].evaluate() / r * factor_lj;
 
         fxtmp += delx * fpair;
@@ -166,9 +173,14 @@ void PairLeptonSphereOMP::eval(int iifrom, int iito, ThrData *const thr)
 
         double evdwl = 0.0;
         if (EFLAG) {
-          pairpot[idx].getVariableReference("r") = r;
-          if (have_rad[idx].first) pairpot[idx].getVariableReference("radi") = radius[i];
-          if (have_rad[idx].second) pairpot[idx].getVariableReference("radj") = radius[j];
+          try {
+            pairpot[idx].getVariableReference("r") = r;
+          } catch (Lepton::Exception &) {
+            ;    // ignore -> constant potential
+          }
+          if (has_ref[idx][1]) pairpot[idx].getVariableReference("radi") = radius[i];
+          if (has_ref[idx][2]) pairpot[idx].getVariableReference("radj") = radius[j];
+
           evdwl = pairpot[idx].evaluate();
           evdwl *= factor_lj;
         }
diff --git a/src/OPENMP/pair_lj_cut_thole_long_omp.cpp b/src/OPENMP/pair_lj_cut_thole_long_omp.cpp
index 1ad97a4416..8fdea2ce6d 100644
--- a/src/OPENMP/pair_lj_cut_thole_long_omp.cpp
+++ b/src/OPENMP/pair_lj_cut_thole_long_omp.cpp
@@ -43,7 +43,7 @@ using namespace MathConst;
 #define B4       -5.80844129e-3
 #define B5        1.14652755e-1
 
-#define EPSILON 1.0e-20
+static constexpr double EPSILON = 1.0e-20;
 #define EPS_EWALD 1.0e-6
 #define EPS_EWALD_SQR 1.0e-12
 
diff --git a/src/OPENMP/pair_lubricate_omp.cpp b/src/OPENMP/pair_lubricate_omp.cpp
index 2145744a5b..5ba66ff266 100644
--- a/src/OPENMP/pair_lubricate_omp.cpp
+++ b/src/OPENMP/pair_lubricate_omp.cpp
@@ -32,10 +32,6 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-// same as fix_wall.cpp
-
-enum{EDGE,CONSTANT,VARIABLE};
-
 /* ---------------------------------------------------------------------- */
 
 PairLubricateOMP::PairLubricateOMP(LAMMPS *lmp) :
@@ -74,7 +70,7 @@ void PairLubricateOMP::compute(int eflag, int vflag)
          for (int m = 0; m < wallfix->nwall; m++) {
            int dim = wallfix->wallwhich[m] / 2;
            int side = wallfix->wallwhich[m] % 2;
-           if (wallfix->xstyle[m] == VARIABLE) {
+           if (wallfix->xstyle[m] == FixWall::VARIABLE) {
              wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
            }
            else wallcoord = wallfix->coord0[m];
diff --git a/src/OPENMP/pair_lubricate_poly_omp.cpp b/src/OPENMP/pair_lubricate_poly_omp.cpp
index 5b98ec7b14..ebb2d65496 100644
--- a/src/OPENMP/pair_lubricate_poly_omp.cpp
+++ b/src/OPENMP/pair_lubricate_poly_omp.cpp
@@ -32,10 +32,6 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-// same as fix_wall.cpp
-
-enum{EDGE,CONSTANT,VARIABLE};
-
 /* ---------------------------------------------------------------------- */
 
 PairLubricatePolyOMP::PairLubricatePolyOMP(LAMMPS *_lmp) :
@@ -74,7 +70,7 @@ void PairLubricatePolyOMP::compute(int eflag, int vflag)
          for (int m = 0; m < wallfix->nwall; m++) {
            int dim = wallfix->wallwhich[m] / 2;
            int side = wallfix->wallwhich[m] % 2;
-           if (wallfix->xstyle[m] == VARIABLE) {
+           if (wallfix->xstyle[m] == FixWall::VARIABLE) {
              wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
            }
            else wallcoord = wallfix->coord0[m];
diff --git a/src/OPENMP/pair_soft_omp.cpp b/src/OPENMP/pair_soft_omp.cpp
index 0be8c80dcf..309d11a6f9 100644
--- a/src/OPENMP/pair_soft_omp.cpp
+++ b/src/OPENMP/pair_soft_omp.cpp
@@ -28,7 +28,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define SMALL 1.0e-4
+static constexpr double SMALL = 1.0e-4;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/pair_tersoff_table_omp.cpp b/src/OPENMP/pair_tersoff_table_omp.cpp
index 5c44aa3329..ec97df26f6 100644
--- a/src/OPENMP/pair_tersoff_table_omp.cpp
+++ b/src/OPENMP/pair_tersoff_table_omp.cpp
@@ -26,7 +26,7 @@
 #include "omp_compat.h"
 using namespace LAMMPS_NS;
 
-#define GRIDSTART 0.1
+static constexpr double GRIDSTART = 0.1;
 #define GRIDDENSITY_FCUTOFF 5000
 #define GRIDDENSITY_EXP 12000
 #define GRIDDENSITY_GTETA 12000
diff --git a/src/OPENMP/pair_tersoff_zbl_omp.cpp b/src/OPENMP/pair_tersoff_zbl_omp.cpp
index cefa89665a..524d7fe509 100644
--- a/src/OPENMP/pair_tersoff_zbl_omp.cpp
+++ b/src/OPENMP/pair_tersoff_zbl_omp.cpp
@@ -34,7 +34,7 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
-#define DELTA 4
+static constexpr int DELTA = 4;
 
 /* ----------------------------------------------------------------------
    Fermi-like smoothing function
diff --git a/src/OPENMP/pppm_cg_omp.cpp b/src/OPENMP/pppm_cg_omp.cpp
index 4c751a16f7..0eff5c9363 100644
--- a/src/OPENMP/pppm_cg_omp.cpp
+++ b/src/OPENMP/pppm_cg_omp.cpp
@@ -38,15 +38,10 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
-#ifdef FFT_SINGLE
-#define ZEROF 0.0f
-#define ONEF  1.0f
-#else
-#define ZEROF 0.0
-#define ONEF  1.0
-#endif
+static constexpr FFT_SCALAR ZEROF = 0.0;
+static constexpr FFT_SCALAR ONEF =  1.0;
 
-#define EPS_HOC 1.0e-7
+static constexpr double EPS_HOC = 1.0e-7;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/pppm_disp_omp.cpp b/src/OPENMP/pppm_disp_omp.cpp
index 24bf2d9564..b610b1711e 100644
--- a/src/OPENMP/pppm_disp_omp.cpp
+++ b/src/OPENMP/pppm_disp_omp.cpp
@@ -38,15 +38,9 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#ifdef FFT_SINGLE
-#define ZEROF 0.0f
-#define ONEF  1.0f
-#else
-#define ZEROF 0.0
-#define ONEF  1.0
-#endif
-
-#define OFFSET 16384
+static constexpr FFT_SCALAR ZEROF = 0.0;
+static constexpr FFT_SCALAR ONEF =  1.0;
+static constexpr int OFFSET = 16384;
 
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/OPENMP/pppm_disp_tip4p_omp.cpp b/src/OPENMP/pppm_disp_tip4p_omp.cpp
index ab6342a047..bcc083e809 100644
--- a/src/OPENMP/pppm_disp_tip4p_omp.cpp
+++ b/src/OPENMP/pppm_disp_tip4p_omp.cpp
@@ -37,13 +37,8 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#ifdef FFT_SINGLE
-#define ZEROF 0.0f
-#else
-#define ZEROF 0.0
-#endif
-
-#define OFFSET 16384
+static constexpr FFT_SCALAR ZEROF = 0.0;
+static constexpr int OFFSET = 16384;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/pppm_omp.cpp b/src/OPENMP/pppm_omp.cpp
index 86e65da101..a178483b8a 100644
--- a/src/OPENMP/pppm_omp.cpp
+++ b/src/OPENMP/pppm_omp.cpp
@@ -38,13 +38,8 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
-#ifdef FFT_SINGLE
-#define ZEROF 0.0f
-#else
-#define ZEROF 0.0
-#endif
-
-#define EPS_HOC 1.0e-7
+static constexpr FFT_SCALAR ZEROF = 0.0;
+static constexpr double EPS_HOC = 1.0e-7;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/OPENMP/pppm_tip4p_omp.cpp b/src/OPENMP/pppm_tip4p_omp.cpp
index 66ce44b5ef..420a116816 100644
--- a/src/OPENMP/pppm_tip4p_omp.cpp
+++ b/src/OPENMP/pppm_tip4p_omp.cpp
@@ -39,14 +39,10 @@ using namespace LAMMPS_NS;
 using namespace MathConst;
 using namespace MathSpecial;
 
-#ifdef FFT_SINGLE
-#define ZEROF 0.0f
-#else
-#define ZEROF 0.0
-#endif
+static constexpr FFT_SCALAR ZEROF = 0.0;
 
-#define EPS_HOC 1.0e-7
-#define OFFSET 16384
+static constexpr double EPS_HOC = 1.0e-7;
+static constexpr int OFFSET = 16384;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/ORIENT/fix_orient_bcc.cpp b/src/ORIENT/fix_orient_bcc.cpp
index 2d801b0bdc..25fec2e606 100644
--- a/src/ORIENT/fix_orient_bcc.cpp
+++ b/src/ORIENT/fix_orient_bcc.cpp
@@ -38,7 +38,7 @@ using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathConst;
 
-#define BIG 1000000000
+static constexpr int BIG = 1000000000;
 
 static const char cite_fix_orient_bcc[] =
   "fix orient/bcc command: doi:10.1016/j.commatsci.2016.02.016\n\n"
diff --git a/src/ORIENT/fix_orient_fcc.cpp b/src/ORIENT/fix_orient_fcc.cpp
index cdb3fd689d..78a8485278 100644
--- a/src/ORIENT/fix_orient_fcc.cpp
+++ b/src/ORIENT/fix_orient_fcc.cpp
@@ -35,7 +35,7 @@ using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathConst;
 
-#define BIG 1000000000
+static constexpr int BIG = 1000000000;
 
 static const char cite_fix_orient_fcc[] =
   "fix orient/fcc command: doi:10.1038/nmat1559\n\n"
diff --git a/src/PHONON/fix_phonon.cpp b/src/PHONON/fix_phonon.cpp
index 6b5294d308..786931a549 100644
--- a/src/PHONON/fix_phonon.cpp
+++ b/src/PHONON/fix_phonon.cpp
@@ -45,9 +45,9 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-#define MAXLINE 512
+static constexpr int MAXLINE = 512;
 
-enum{FORWARD=-1,BACKWARD=1};
+enum{ FORWARD=-1, BACKWARD=1 };
 
 static const char cite_fix_phonon[] =
   "fix phonon command: doi:10.1016/j.cpc.2011.04.019\n\n"
@@ -555,7 +555,7 @@ void FixPhonon::readmap()
   }
 
   // read from map file for others
-  char line[MAXLINE];
+  char line[MAXLINE] = {'\0'};
   FILE *fp = fopen(mapfile, "r");
   if (fp == nullptr)
     error->all(FLERR,"Cannot open input map file {}: {}", mapfile, utils::getsyserror());
diff --git a/src/POEMS/fix_poems.cpp b/src/POEMS/fix_poems.cpp
index f289a939e6..6ae21f652c 100644
--- a/src/POEMS/fix_poems.cpp
+++ b/src/POEMS/fix_poems.cpp
@@ -42,9 +42,9 @@ using namespace LAMMPS_NS;
 using namespace FixConst;
 
 #define MAXBODY 2    // currently 2 since only linear chains allowed
-#define DELTA 128
-#define TOLERANCE 1.0e-6
-#define EPSILON 1.0e-7
+static constexpr int DELTA = 128;
+static constexpr double TOLERANCE = 1.0e-6;
+static constexpr double EPSILON = 1.0e-7;
 
 static const char cite_fix_poems[] =
     "fix poems command: doi:10.1016/j.ijnonlinmec.2008.04.003\n\n"
@@ -855,7 +855,7 @@ void FixPOEMS::pre_neighbor() {}
    count # of degrees-of-freedom removed by fix_poems for atoms in igroup
 ------------------------------------------------------------------------- */
 
-int FixPOEMS::dof(int igroup)
+bigint FixPOEMS::dof(int igroup)
 {
   int groupbit = group->bitmask[igroup];
 
@@ -877,17 +877,17 @@ int FixPOEMS::dof(int igroup)
 
   // remove 3N - 6 dof for each rigid body if at least 2 atoms are in igroup
 
-  int n = 0;
+  bigint n = 0;
   for (int ibody = 0; ibody < nbody; ibody++)
     if (nall[ibody] > 2) n += 3 * nall[ibody] - 6;
 
   // subtract 3 additional dof for each joint if atom is also in igroup
 
-  int m = 0;
+  bigint m = 0;
   for (int i = 0; i < nlocal; i++)
     if (natom2body[i] > 1 && (mask[i] & groupbit)) m += 3 * (natom2body[i] - 1);
-  int mall;
-  MPI_Allreduce(&m, &mall, 1, MPI_INT, MPI_SUM, world);
+  bigint mall;
+  MPI_Allreduce(&m, &mall, 1, MPI_LMP_BIGINT, MPI_SUM, world);
   n += mall;
 
   // delete local memory
diff --git a/src/POEMS/fix_poems.h b/src/POEMS/fix_poems.h
index 99af171636..6aac4abd8a 100644
--- a/src/POEMS/fix_poems.h
+++ b/src/POEMS/fix_poems.h
@@ -47,7 +47,7 @@ class FixPOEMS : public Fix {
   double memory_usage() override;
 
   void pre_neighbor() override;
-  int dof(int) override;
+  bigint dof(int) override;
   void deform(int) override;
   int modify_param(int, char **) override;
   void reset_dt() override;
diff --git a/src/PTM/ptm_convex_hull_incremental.cpp b/src/PTM/ptm_convex_hull_incremental.cpp
index 25ff54c787..abae00778c 100644
--- a/src/PTM/ptm_convex_hull_incremental.cpp
+++ b/src/PTM/ptm_convex_hull_incremental.cpp
@@ -17,10 +17,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
 
 namespace ptm {
 
-#define VISIBLE 1
-#define INVISIBLE 2
-#define BOTH 3
-#define TOLERANCE 1E-8
+enum { VISIBLE=1, INVISIBLE, BOTH };
+static constexpr double TOLERANCE = 1E-8;
 
 static double norm_squared(double *p)
 {
diff --git a/src/PYTHON/python_impl.cpp b/src/PYTHON/python_impl.cpp
index 57f8ea1cf0..0db468d701 100644
--- a/src/PYTHON/python_impl.cpp
+++ b/src/PYTHON/python_impl.cpp
@@ -17,6 +17,7 @@
 
 #include "python_impl.h"
 
+#include "comm.h"
 #include "error.h"
 #include "input.h"
 #include "memory.h"
@@ -29,22 +30,33 @@
 
 #ifdef MLIAP_PYTHON
 #include "mliap_model_python.h"
+#if defined(__PYX_EXTERN_C) && !defined(CYTHON_EXTERN_C)
+#undef __PYX_EXTERN_C
+#endif
 #include "mliap_unified.h"
 // The above should somehow really be included in the next file.
 // We could get around this with cython --capi-reexport-cincludes
 // However, that exposes -too many- headers.
 #include "mliap_model_python_couple.h"
+#if defined(__PYX_EXTERN_C) && !defined(CYTHON_EXTERN_C)
+#undef __PYX_EXTERN_C
+#endif
 #include "mliap_unified_couple.h"
 #ifdef LMP_KOKKOS
 #include "mliap_model_python_kokkos.h"
+#if defined(__PYX_EXTERN_C) && !defined(CYTHON_EXTERN_C)
+#undef __PYX_EXTERN_C
+#endif
 #include "mliap_unified_kokkos.h"
 // The above should somehow really be included in the next file.
 // We could get around this with cython --capi-reexport-cincludes
 // However, that exposes -too many- headers.
 #include "mliap_model_python_couple_kokkos.h"
+#if defined(__PYX_EXTERN_C) && !defined(CYTHON_EXTERN_C)
+#undef __PYX_EXTERN_C
+#endif
 #include "mliap_unified_couple_kokkos.h"
 
-
 #endif
 #endif
 
@@ -61,46 +73,58 @@ PythonImpl::PythonImpl(LAMMPS *lmp) : Pointers(lmp)
   nfunc = 0;
   pfuncs = nullptr;
 
-#if PY_MAJOR_VERSION >= 3
-#ifndef Py_LIMITED_API
+#if PY_MAJOR_VERSION >= 3 && !defined(Py_LIMITED_API)
   // check for PYTHONUNBUFFERED environment variable
   const char *PYTHONUNBUFFERED = getenv("PYTHONUNBUFFERED");
+  // Force the stdout and stderr streams to be unbuffered.
+  bool unbuffered = PYTHONUNBUFFERED != nullptr && strcmp(PYTHONUNBUFFERED, "1") == 0;
 
-  if (PYTHONUNBUFFERED != nullptr && strcmp(PYTHONUNBUFFERED, "1") == 0) {
-    // Python Global configuration variable
-    // Force the stdout and stderr streams to be unbuffered.
-    Py_UnbufferedStdioFlag = 1;
-  }
+#if (PY_VERSION_HEX >= 0x030800f0)
+  PyConfig config;
+  PyConfig_InitPythonConfig(&config);
+  config.buffered_stdio = !unbuffered;
+#else
+  // Python Global configuration variable
+  Py_UnbufferedStdioFlag = unbuffered;
 #endif
 #endif
 
 #ifdef MLIAP_PYTHON
-  // Inform python intialization scheme of the mliappy module.
-  // This -must- happen before python is initialized.
-  int err = PyImport_AppendInittab("mliap_model_python_couple", PyInit_mliap_model_python_couple);
-  if (err) error->all(FLERR, "Could not register MLIAPPY embedded python module.");
+  // cannot register mliappy module a second time
+  if (!Py_IsInitialized()) {
+    // Inform python intialization scheme of the mliappy module.
+    // This -must- happen before python is initialized.
+    int err = PyImport_AppendInittab("mliap_model_python_couple", PyInit_mliap_model_python_couple);
+    if (err) error->all(FLERR, "Could not register MLIAPPY embedded python module.");
+
+    err = PyImport_AppendInittab("mliap_unified_couple", PyInit_mliap_unified_couple);
+    if (err) error->all(FLERR, "Could not register MLIAPPY unified embedded python module.");
 
-  err = PyImport_AppendInittab("mliap_unified_couple", PyInit_mliap_unified_couple);
-  if (err) error->all(FLERR, "Could not register MLIAPPY unified embedded python module.");
 #ifdef LMP_KOKKOS
-  // Inform python intialization scheme of the mliappy module.
-  // This -must- happen before python is initialized.
-  err = PyImport_AppendInittab("mliap_model_python_couple_kokkos", PyInit_mliap_model_python_couple_kokkos);
-  if (err) error->all(FLERR, "Could not register MLIAPPY embedded python module.");
-
-  err = PyImport_AppendInittab("mliap_unified_couple_kokkos", PyInit_mliap_unified_couple_kokkos);
-  if (err) error->all(FLERR, "Could not register MLIAPPY unified embedded python module.");
+    // Inform python intialization scheme of the mliappy module.
+    // This -must- happen before python is initialized.
+    err = PyImport_AppendInittab("mliap_model_python_couple_kokkos",
+                                 PyInit_mliap_model_python_couple_kokkos);
+    if (err) error->all(FLERR, "Could not register MLIAPPY embedded python KOKKOS module.");
 
+    err = PyImport_AppendInittab("mliap_unified_couple_kokkos", PyInit_mliap_unified_couple_kokkos);
+    if (err) error->all(FLERR, "Could not register MLIAPPY unified embedded python KOKKOS module.");
 #endif
+  }
 #endif
 
+#if PY_VERSION_HEX >= 0x030800f0 && !defined(Py_LIMITED_API)
+  Py_InitializeFromConfig(&config);
+  PyConfig_Clear(&config);
+#else
   Py_Initialize();
+#endif
 
   // only needed for Python 2.x and Python 3 < 3.7
   // With Python 3.7 this function is now called by Py_Initialize()
   // Deprecated since version 3.9, will be removed in version 3.11
-#if PY_MAJOR_VERSION < 3 || PY_MINOR_VERSION < 7
-  if (!PyEval_ThreadsInitialized()) { PyEval_InitThreads(); }
+#if PY_VERSION_HEX < 0x030700f0
+  if (!PyEval_ThreadsInitialized()) PyEval_InitThreads();
 #endif
 
   PyUtils::GIL lock;
diff --git a/src/QEQ/fix_qeq.cpp b/src/QEQ/fix_qeq.cpp
index b60438b7c8..22632cf786 100644
--- a/src/QEQ/fix_qeq.cpp
+++ b/src/QEQ/fix_qeq.cpp
@@ -338,12 +338,6 @@ void FixQEq::setup_pre_force(int vflag)
   if (force->newton_pair == 0)
     error->all(FLERR,"QEQ with 'newton pair off' not supported");
 
-  if (force->pair) {
-    if (force->pair->suffix_flag & (Suffix::INTEL|Suffix::GPU))
-      error->all(FLERR,"QEQ is not compatiple with suffix version "
-                 "of pair style");
-  }
-
   deallocate_storage();
   allocate_storage();
 
diff --git a/src/QEQ/fix_qeq_fire.cpp b/src/QEQ/fix_qeq_fire.cpp
index 34ef51d947..f8eb667f2a 100644
--- a/src/QEQ/fix_qeq_fire.cpp
+++ b/src/QEQ/fix_qeq_fire.cpp
@@ -34,12 +34,12 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-#define DELAYSTEP 0
+static constexpr int DELAYSTEP = 0;
 #define DT_GROW 1.1
 #define DT_SHRINK 0.5
 #define ALPHA0 0.8
 #define ALPHA_SHRINK 0.10
-#define TMAX 10.0
+static constexpr double TMAX = 10.0;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/REACTION/README b/src/REACTION/README
index 99a5d604ec..b9199d6d47 100644
--- a/src/REACTION/README
+++ b/src/REACTION/README
@@ -25,4 +25,5 @@ The REACTER methodology is detailed in:
     https://doi.org/10.1021/acs.macromol.0c02012
 
 This package was created by Jacob Gissinger
-(jacob.r.gissinger@gmail.com) at the NASA Langley Research Center.
+(jgissing@stevens.edu) while at the NASA Langley Research Center
+and Stevens Institute of Technology.
diff --git a/src/REACTION/fix_bond_react.cpp b/src/REACTION/fix_bond_react.cpp
index d124b06dc2..d8561b3959 100644
--- a/src/REACTION/fix_bond_react.cpp
+++ b/src/REACTION/fix_bond_react.cpp
@@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
 LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
 https://www.lammps.org/, Sandia National Laboratories
@@ -13,7 +12,7 @@ See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
-Contributing Author: Jacob Gissinger (jacob.r.gissinger@gmail.com)
+Contributing Author: Jacob Gissinger (jgissing@stevens.edu)
 ------------------------------------------------------------------------- */
 
 #include "fix_bond_react.h"
@@ -58,30 +57,31 @@ using namespace FixConst;
 using namespace MathConst;
 
 static const char cite_fix_bond_react[] =
-  "fix bond/react: reacter.org doi:10.1016/j.polymer.2017.09.038, doi:10.1021/acs.macromol.0c02012\n\n"
-  "@Article{Gissinger17,\n"
-  " author = {J. R. Gissinger and B. D. Jensen and K. E. Wise},\n"
-  " title = {Modeling Chemical Reactions in Classical Molecular Dynamics Simulations},\n"
-  " journal = {Polymer},\n"
-  " year =    2017,\n"
-  " volume =  128,\n"
-  " pages =   {211--217}\n"
-  "}\n\n"
-  "@Article{Gissinger20,\n"
-  " author = {J. R. Gissinger, B. D. Jensen, K. E. Wise},\n"
-  " title = {{REACTER}: A Heuristic Method for Reactive Molecular Dynamics},\n"
-  " journal = {Macromolecules},\n"
-  " year =    2020,\n"
-  " volume =  53,\n"
-  " number =  22,\n"
-  " pages =   {9953--9961}\n"
-  "}\n\n";
+    "fix bond/react: reacter.org doi:10.1016/j.polymer.2017.09.038, "
+    "doi:10.1021/acs.macromol.0c02012\n\n"
+    "@Article{Gissinger17,\n"
+    " author = {J. R. Gissinger and B. D. Jensen and K. E. Wise},\n"
+    " title = {Modeling Chemical Reactions in Classical Molecular Dynamics Simulations},\n"
+    " journal = {Polymer},\n"
+    " year =    2017,\n"
+    " volume =  128,\n"
+    " pages =   {211--217}\n"
+    "}\n\n"
+    "@Article{Gissinger20,\n"
+    " author = {J. R. Gissinger, B. D. Jensen, K. E. Wise},\n"
+    " title = {{REACTER}: A Heuristic Method for Reactive Molecular Dynamics},\n"
+    " journal = {Macromolecules},\n"
+    " year =    2020,\n"
+    " volume =  53,\n"
+    " number =  22,\n"
+    " pages =   {9953--9961}\n"
+    "}\n\n";
 
-#define BIG 1.0e20
-#define DELTA 16
-#define MAXGUESS 20 // max # of guesses allowed by superimpose algorithm
-#define MAXCONARGS 14 // max # of arguments for any type of constraint + rxnID
-#define NUMVARVALS 5 // max # of keyword values that have variables as input
+static constexpr double BIG = 1.0e20;
+static constexpr int DELTA = 16;
+static constexpr int MAXGUESS = 20;      // max # of guesses allowed by superimpose algorithm
+static constexpr int MAXCONARGS = 14;    // max # of arguments for any type of constraint + rxnID
+static constexpr int NUMVARVALS = 5;     // max # of keyword values that have variables as input
 
 // various statuses of superimpose algorithm:
 // ACCEPT: site successfully matched to pre-reacted template
@@ -90,24 +90,25 @@ static const char cite_fix_bond_react[] =
 // CONTINUE: a neighbor has been assigned, skip to next neighbor
 // GUESSFAIL: a guess has failed (if no more restore points, status = 'REJECT')
 // RESTORE: restore mode, load most recent restore point
-enum{ACCEPT,REJECT,PROCEED,CONTINUE,GUESSFAIL,RESTORE};
+enum { ACCEPT, REJECT, PROCEED, CONTINUE, GUESSFAIL, RESTORE };
 
 // types of available reaction constraints
-enum{DISTANCE,ANGLE,DIHEDRAL,ARRHENIUS,RMSD,CUSTOM};
+enum { DISTANCE, ANGLE, DIHEDRAL, ARRHENIUS, RMSD, CUSTOM };
 
 // ID type used by constraint
-enum{ATOM,FRAG};
+enum { ATOM, FRAG };
 
 // keyword values that accept variables as input
-enum{NEVERY,RMIN,RMAX,PROB,NRATE};
+enum { NEVERY, RMIN, RMAX, PROB, NRATE };
 
 // flag for one-proc vs shared reaction sites
-enum{LOCAL,GLOBAL};
+enum { LOCAL, GLOBAL };
 
 // values for molecule_keyword
-enum{OFF,INTER,INTRA};
+enum { OFF, INTER, INTRA };
 
 /* ---------------------------------------------------------------------- */
+// clang-format off
 
 FixBondReact::FixBondReact(LAMMPS *lmp, int narg, char **arg) :
   Fix(lmp, narg, arg)
@@ -670,15 +671,6 @@ FixBondReact::~FixBondReact()
   memory->destroy(ghostly_rxn_count);
   memory->destroy(reaction_count_total);
 
-  if (newton_bond == 0) {
-    memory->destroy(xspecial);
-    memory->destroy(nxspecial);
-    memory->destroy(onemol_xspecial);
-    memory->destroy(onemol_nxspecial);
-    memory->destroy(twomol_xspecial);
-    memory->destroy(twomol_nxspecial);
-  }
-
   if (attempted_rxn == 1) {
     memory->destroy(restore_pt);
     memory->destroy(restore);
@@ -827,11 +819,10 @@ void FixBondReact::init()
     nlevels_respa = (dynamic_cast<Respa *>(update->integrate))->nlevels;
 
   // check cutoff for iatomtype,jatomtype
-  for (int i = 0; i < nreacts; i++) {
-    if (!utils::strmatch(force->pair_style,"^hybrid"))
-      if (force->pair == nullptr || cutsq[i][1] > force->pair->cutsq[iatomtype[i]][jatomtype[i]])
+  if (!utils::strmatch(force->pair_style,"^hybrid"))
+    for (int i = 0; i < nreacts; i++)
+      if (force->pair == nullptr || (closeneigh[i] < 0 && cutsq[i][1] > force->pair->cutsq[iatomtype[i]][jatomtype[i]]))
         error->all(FLERR,"Fix bond/react: Fix bond/react cutoff is longer than pairwise cutoff");
-  }
 
   // need a half neighbor list, built every Nevery steps
   neighbor->add_request(this, NeighConst::REQ_OCCASIONAL);
@@ -931,29 +922,10 @@ void FixBondReact::post_integrate()
 
   neighbor->build_one(list,1);
 
-  // here we define a full special list, independent of Newton setting
-  if (newton_bond == 1) {
-    nxspecial = atom->nspecial;
-    xspecial = atom->special;
-  } else {
-    int nall = atom->nlocal + atom->nghost;
-    memory->destroy(nxspecial);
-    memory->destroy(xspecial);
-    memory->create(nxspecial,nall,3,"bond/react:nxspecial");
-    memory->create(xspecial,nall,atom->maxspecial,"bond/react:xspecial");
-    for (int i = 0; i < atom->nlocal; i++) {
-      nxspecial[i][0] = atom->num_bond[i];
-      for (int j = 0; j < nxspecial[i][0]; j++) {
-        xspecial[i][j] = atom->bond_atom[i][j];
-      }
-      nxspecial[i][1] = atom->nspecial[i][1];
-      nxspecial[i][2] = atom->nspecial[i][2];
-      int joffset = nxspecial[i][0] - atom->nspecial[i][0];
-      for (int j = nxspecial[i][0]; j < nxspecial[i][2]; j++) {
-        xspecial[i][j+joffset] = atom->special[i][j];
-      }
-    }
-  }
+  // here we define a full special list
+  // may need correction for unusual special bond settings
+  nxspecial = atom->nspecial;
+  xspecial = atom->special;
 
   int j;
   for (rxnID = 0; rxnID < nreacts; rxnID++) {
@@ -2541,49 +2513,15 @@ int FixBondReact::get_chirality(double four_coords[12])
 
 /* ----------------------------------------------------------------------
   Get xspecials for current molecule templates
+  may need correction when specials defined explicitly in molecule templates
 ------------------------------------------------------------------------- */
 
 void FixBondReact::get_molxspecials()
 {
-  if (newton_bond == 1) {
-    onemol_nxspecial = onemol->nspecial;
-    onemol_xspecial = onemol->special;
-    twomol_nxspecial = twomol->nspecial;
-    twomol_xspecial = twomol->special;
-  } else {
-    memory->destroy(onemol_nxspecial);
-    memory->destroy(onemol_xspecial);
-    memory->create(onemol_nxspecial,onemol->natoms,3,"bond/react:onemol_nxspecial");
-    memory->create(onemol_xspecial,onemol->natoms,atom->maxspecial,"bond/react:onemol_xspecial");
-    for (int i = 0; i < onemol->natoms; i++) {
-      onemol_nxspecial[i][0] = onemol->num_bond[i];
-      for (int j = 0; j < onemol_nxspecial[i][0]; j++) {
-        onemol_xspecial[i][j] = onemol->bond_atom[i][j];
-      }
-      onemol_nxspecial[i][1] = onemol->nspecial[i][1];
-      onemol_nxspecial[i][2] = onemol->nspecial[i][2];
-      int joffset = onemol_nxspecial[i][0] - onemol->nspecial[i][0];
-      for (int j = onemol_nxspecial[i][0]; j < onemol_nxspecial[i][2]; j++) {
-        onemol_xspecial[i][j+joffset] = onemol->special[i][j];
-      }
-    }
-    memory->destroy(twomol_nxspecial);
-    memory->destroy(twomol_xspecial);
-    memory->create(twomol_nxspecial,twomol->natoms,3,"bond/react:twomol_nxspecial");
-    memory->create(twomol_xspecial,twomol->natoms,atom->maxspecial,"bond/react:twomol_xspecial");
-    for (int i = 0; i < twomol->natoms; i++) {
-      twomol_nxspecial[i][0] = twomol->num_bond[i];
-      for (int j = 0; j < twomol_nxspecial[i][0]; j++) {
-        twomol_xspecial[i][j] = twomol->bond_atom[i][j];
-      }
-      twomol_nxspecial[i][1] = twomol->nspecial[i][1];
-      twomol_nxspecial[i][2] = twomol->nspecial[i][2];
-      int joffset = twomol_nxspecial[i][0] - twomol->nspecial[i][0];
-      for (int j = twomol_nxspecial[i][0]; j < twomol_nxspecial[i][2]; j++) {
-        twomol_xspecial[i][j+joffset] = twomol->special[i][j];
-      }
-    }
-  }
+  onemol_nxspecial = onemol->nspecial;
+  onemol_xspecial = onemol->special;
+  twomol_nxspecial = twomol->nspecial;
+  twomol_xspecial = twomol->special;
 }
 
 /* ----------------------------------------------------------------------
@@ -2682,16 +2620,43 @@ void FixBondReact::find_landlocked_atoms(int myrxn)
   }
 
   // also, if atoms change number of bonds, but aren't landlocked, that could be bad
+  int warnflag = 0;
   if (comm->me == 0)
     for (int i = 0; i < twomol->natoms; i++) {
       if ((create_atoms[i][myrxn] == 0) &&
           (twomol_nxspecial[i][0] != onemol_nxspecial[equivalences[i][1][myrxn]-1][0]) &&
-          (landlocked_atoms[i][myrxn] == 0))
-        error->warning(FLERR, "Fix bond/react: Atom affected by reaction {} is too close "
-                       "to template edge",rxn_name[myrxn]);
-          break;
+          (landlocked_atoms[i][myrxn] == 0)) {
+        warnflag = 1;
+        break;
+      }
     }
 
+  // also, if an atom changes any of its bonds, but is not landlocked, that could be bad
+  int thereflag;
+  if (comm->me == 0)
+    for (int i = 0; i < twomol->natoms; i++) {
+      if (landlocked_atoms[i][myrxn] == 1) continue;
+      for (int j = 0; j < twomol_nxspecial[i][0]; j++) {
+        int oneneighID = equivalences[twomol_xspecial[i][j]-1][1][myrxn];
+        int ii = equivalences[i][1][myrxn] - 1;
+        thereflag = 0;
+        for (int k = 0; k < onemol_nxspecial[ii][0]; k++) {
+          if (oneneighID == onemol_xspecial[ii][k]) {
+            thereflag = 1;
+            break;
+          }
+        }
+        if (thereflag == 0) {
+          warnflag = 1;
+          break;
+        }
+      }
+      if (warnflag == 1) break;
+    }
+
+  if (comm->me == 0 && warnflag == 1) error->warning(FLERR, "Fix bond/react: Atom affected "
+                       "by reaction {} is too close to template edge",rxn_name[myrxn]);
+
   // finally, if a created atom is not landlocked, bad!
   for (int i = 0; i < twomol->natoms; i++) {
     if (create_atoms[i][myrxn] == 1 && landlocked_atoms[i][myrxn] == 0) {
@@ -3349,7 +3314,7 @@ void FixBondReact::update_everything()
         dynamic_cast<FixBondHistory *>(ihistory)->clear_cache();
 
     // Angles! First let's delete all angle info:
-    if (force->angle && twomol->angleflag) {
+    if (force->angle) {
       int *num_angle = atom->num_angle;
       int **angle_type = atom->angle_type;
       tagint **angle_atom1 = atom->angle_atom1;
@@ -3390,33 +3355,35 @@ void FixBondReact::update_everything()
           }
         }
         // now let's add the new angle info.
-        for (int j = 0; j < twomol->natoms; j++) {
-          int jj = equivalences[j][1][rxnID]-1;
-          if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) {
-            if (landlocked_atoms[j][rxnID] == 1) {
-              num_angle[atom->map(update_mega_glove[jj+1][i])] = twomol->num_angle[j];
-              delta_angle += twomol->num_angle[j];
-              for (int p = 0; p < twomol->num_angle[j]; p++) {
-                angle_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->angle_type[j][p];
-                angle_atom1[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->angle_atom1[j][p]-1][1][rxnID]][i];
-                angle_atom2[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->angle_atom2[j][p]-1][1][rxnID]][i];
-                angle_atom3[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->angle_atom3[j][p]-1][1][rxnID]][i];
+        if (twomol->angleflag) {
+          for (int j = 0; j < twomol->natoms; j++) {
+            int jj = equivalences[j][1][rxnID]-1;
+            if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) {
+              if (landlocked_atoms[j][rxnID] == 1) {
+                num_angle[atom->map(update_mega_glove[jj+1][i])] = twomol->num_angle[j];
+                delta_angle += twomol->num_angle[j];
+                for (int p = 0; p < twomol->num_angle[j]; p++) {
+                  angle_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->angle_type[j][p];
+                  angle_atom1[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->angle_atom1[j][p]-1][1][rxnID]][i];
+                  angle_atom2[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->angle_atom2[j][p]-1][1][rxnID]][i];
+                  angle_atom3[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->angle_atom3[j][p]-1][1][rxnID]][i];
+                }
               }
-            }
-            if (landlocked_atoms[j][rxnID] == 0) {
-              for (int p = 0; p < twomol->num_angle[j]; p++) {
-                if (landlocked_atoms[twomol->angle_atom1[j][p]-1][rxnID] == 1 ||
-                    landlocked_atoms[twomol->angle_atom2[j][p]-1][rxnID] == 1 ||
-                    landlocked_atoms[twomol->angle_atom3[j][p]-1][rxnID] == 1) {
-                  insert_num = num_angle[atom->map(update_mega_glove[jj+1][i])];
-                  angle_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->angle_type[j][p];
-                  angle_atom1[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->angle_atom1[j][p]-1][1][rxnID]][i];
-                  angle_atom2[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->angle_atom2[j][p]-1][1][rxnID]][i];
-                  angle_atom3[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->angle_atom3[j][p]-1][1][rxnID]][i];
-                  num_angle[atom->map(update_mega_glove[jj+1][i])]++;
-                  if (num_angle[atom->map(update_mega_glove[jj+1][i])] > atom->angle_per_atom)
-                    error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom");
-                  delta_angle++;
+              if (landlocked_atoms[j][rxnID] == 0) {
+                for (int p = 0; p < twomol->num_angle[j]; p++) {
+                  if (landlocked_atoms[twomol->angle_atom1[j][p]-1][rxnID] == 1 ||
+                      landlocked_atoms[twomol->angle_atom2[j][p]-1][rxnID] == 1 ||
+                      landlocked_atoms[twomol->angle_atom3[j][p]-1][rxnID] == 1) {
+                    insert_num = num_angle[atom->map(update_mega_glove[jj+1][i])];
+                    angle_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->angle_type[j][p];
+                    angle_atom1[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->angle_atom1[j][p]-1][1][rxnID]][i];
+                    angle_atom2[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->angle_atom2[j][p]-1][1][rxnID]][i];
+                    angle_atom3[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->angle_atom3[j][p]-1][1][rxnID]][i];
+                    num_angle[atom->map(update_mega_glove[jj+1][i])]++;
+                    if (num_angle[atom->map(update_mega_glove[jj+1][i])] > atom->angle_per_atom)
+                      error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom");
+                    delta_angle++;
+                  }
                 }
               }
             }
@@ -3426,7 +3393,7 @@ void FixBondReact::update_everything()
     }
 
     // Dihedrals! first let's delete all dihedral info for landlocked atoms
-    if (force->dihedral && twomol->dihedralflag) {
+    if (force->dihedral) {
       int *num_dihedral = atom->num_dihedral;
       int **dihedral_type = atom->dihedral_type;
       tagint **dihedral_atom1 = atom->dihedral_atom1;
@@ -3470,36 +3437,38 @@ void FixBondReact::update_everything()
           }
         }
         // now let's add new dihedral info
-        for (int j = 0; j < twomol->natoms; j++) {
-          int jj = equivalences[j][1][rxnID]-1;
-          if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) {
-            if (landlocked_atoms[j][rxnID] == 1) {
-              num_dihedral[atom->map(update_mega_glove[jj+1][i])] = twomol->num_dihedral[j];
-              delta_dihed += twomol->num_dihedral[j];
-              for (int p = 0; p < twomol->num_dihedral[j]; p++) {
-                dihedral_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->dihedral_type[j][p];
-                dihedral_atom1[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom1[j][p]-1][1][rxnID]][i];
-                dihedral_atom2[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom2[j][p]-1][1][rxnID]][i];
-                dihedral_atom3[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom3[j][p]-1][1][rxnID]][i];
-                dihedral_atom4[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom4[j][p]-1][1][rxnID]][i];
+        if (twomol->dihedralflag) {
+          for (int j = 0; j < twomol->natoms; j++) {
+            int jj = equivalences[j][1][rxnID]-1;
+            if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) {
+              if (landlocked_atoms[j][rxnID] == 1) {
+                num_dihedral[atom->map(update_mega_glove[jj+1][i])] = twomol->num_dihedral[j];
+                delta_dihed += twomol->num_dihedral[j];
+                for (int p = 0; p < twomol->num_dihedral[j]; p++) {
+                  dihedral_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->dihedral_type[j][p];
+                  dihedral_atom1[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom1[j][p]-1][1][rxnID]][i];
+                  dihedral_atom2[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom2[j][p]-1][1][rxnID]][i];
+                  dihedral_atom3[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom3[j][p]-1][1][rxnID]][i];
+                  dihedral_atom4[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->dihedral_atom4[j][p]-1][1][rxnID]][i];
+                }
               }
-            }
-            if (landlocked_atoms[j][rxnID] == 0) {
-              for (int p = 0; p < twomol->num_dihedral[j]; p++) {
-                if (landlocked_atoms[twomol->dihedral_atom1[j][p]-1][rxnID] == 1 ||
-                    landlocked_atoms[twomol->dihedral_atom2[j][p]-1][rxnID] == 1 ||
-                    landlocked_atoms[twomol->dihedral_atom3[j][p]-1][rxnID] == 1 ||
-                    landlocked_atoms[twomol->dihedral_atom4[j][p]-1][rxnID] == 1) {
-                  insert_num = num_dihedral[atom->map(update_mega_glove[jj+1][i])];
-                  dihedral_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->dihedral_type[j][p];
-                  dihedral_atom1[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom1[j][p]-1][1][rxnID]][i];
-                  dihedral_atom2[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom2[j][p]-1][1][rxnID]][i];
-                  dihedral_atom3[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom3[j][p]-1][1][rxnID]][i];
-                  dihedral_atom4[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom4[j][p]-1][1][rxnID]][i];
-                  num_dihedral[atom->map(update_mega_glove[jj+1][i])]++;
-                  if (num_dihedral[atom->map(update_mega_glove[jj+1][i])] > atom->dihedral_per_atom)
-                    error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom");
-                  delta_dihed++;
+              if (landlocked_atoms[j][rxnID] == 0) {
+                for (int p = 0; p < twomol->num_dihedral[j]; p++) {
+                  if (landlocked_atoms[twomol->dihedral_atom1[j][p]-1][rxnID] == 1 ||
+                      landlocked_atoms[twomol->dihedral_atom2[j][p]-1][rxnID] == 1 ||
+                      landlocked_atoms[twomol->dihedral_atom3[j][p]-1][rxnID] == 1 ||
+                      landlocked_atoms[twomol->dihedral_atom4[j][p]-1][rxnID] == 1) {
+                    insert_num = num_dihedral[atom->map(update_mega_glove[jj+1][i])];
+                    dihedral_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->dihedral_type[j][p];
+                    dihedral_atom1[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom1[j][p]-1][1][rxnID]][i];
+                    dihedral_atom2[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom2[j][p]-1][1][rxnID]][i];
+                    dihedral_atom3[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom3[j][p]-1][1][rxnID]][i];
+                    dihedral_atom4[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->dihedral_atom4[j][p]-1][1][rxnID]][i];
+                    num_dihedral[atom->map(update_mega_glove[jj+1][i])]++;
+                    if (num_dihedral[atom->map(update_mega_glove[jj+1][i])] > atom->dihedral_per_atom)
+                      error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom");
+                    delta_dihed++;
+                  }
                 }
               }
             }
@@ -3509,7 +3478,7 @@ void FixBondReact::update_everything()
     }
 
     // finally IMPROPERS!!!! first let's delete all improper info for landlocked atoms
-    if (force->improper && twomol->improperflag) {
+    if (force->improper) {
       int *num_improper = atom->num_improper;
       int **improper_type = atom->improper_type;
       tagint **improper_atom1 = atom->improper_atom1;
@@ -3553,36 +3522,38 @@ void FixBondReact::update_everything()
           }
         }
         // now let's add new improper info
-        for (int j = 0; j < twomol->natoms; j++) {
-          int jj = equivalences[j][1][rxnID]-1;
-          if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) {
-            if (landlocked_atoms[j][rxnID] == 1) {
-              num_improper[atom->map(update_mega_glove[jj+1][i])] = twomol->num_improper[j];
-              delta_imprp += twomol->num_improper[j];
-              for (int p = 0; p < twomol->num_improper[j]; p++) {
-                improper_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->improper_type[j][p];
-                improper_atom1[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom1[j][p]-1][1][rxnID]][i];
-                improper_atom2[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom2[j][p]-1][1][rxnID]][i];
-                improper_atom3[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom3[j][p]-1][1][rxnID]][i];
-                improper_atom4[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom4[j][p]-1][1][rxnID]][i];
+        if (twomol->improperflag) {
+          for (int j = 0; j < twomol->natoms; j++) {
+            int jj = equivalences[j][1][rxnID]-1;
+            if (atom->map(update_mega_glove[jj+1][i]) < nlocal && atom->map(update_mega_glove[jj+1][i]) >= 0) {
+              if (landlocked_atoms[j][rxnID] == 1) {
+                num_improper[atom->map(update_mega_glove[jj+1][i])] = twomol->num_improper[j];
+                delta_imprp += twomol->num_improper[j];
+                for (int p = 0; p < twomol->num_improper[j]; p++) {
+                  improper_type[atom->map(update_mega_glove[jj+1][i])][p] = twomol->improper_type[j][p];
+                  improper_atom1[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom1[j][p]-1][1][rxnID]][i];
+                  improper_atom2[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom2[j][p]-1][1][rxnID]][i];
+                  improper_atom3[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom3[j][p]-1][1][rxnID]][i];
+                  improper_atom4[atom->map(update_mega_glove[jj+1][i])][p] = update_mega_glove[equivalences[twomol->improper_atom4[j][p]-1][1][rxnID]][i];
+                }
               }
-            }
-            if (landlocked_atoms[j][rxnID] == 0) {
-              for (int p = 0; p < twomol->num_improper[j]; p++) {
-                if (landlocked_atoms[twomol->improper_atom1[j][p]-1][rxnID] == 1 ||
-                    landlocked_atoms[twomol->improper_atom2[j][p]-1][rxnID] == 1 ||
-                    landlocked_atoms[twomol->improper_atom3[j][p]-1][rxnID] == 1 ||
-                    landlocked_atoms[twomol->improper_atom4[j][p]-1][rxnID] == 1) {
-                  insert_num = num_improper[atom->map(update_mega_glove[jj+1][i])];
-                  improper_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->improper_type[j][p];
-                  improper_atom1[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom1[j][p]-1][1][rxnID]][i];
-                  improper_atom2[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom2[j][p]-1][1][rxnID]][i];
-                  improper_atom3[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom3[j][p]-1][1][rxnID]][i];
-                  improper_atom4[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom4[j][p]-1][1][rxnID]][i];
-                  num_improper[atom->map(update_mega_glove[jj+1][i])]++;
-                  if (num_improper[atom->map(update_mega_glove[jj+1][i])] > atom->improper_per_atom)
-                    error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom");
-                  delta_imprp++;
+              if (landlocked_atoms[j][rxnID] == 0) {
+                for (int p = 0; p < twomol->num_improper[j]; p++) {
+                  if (landlocked_atoms[twomol->improper_atom1[j][p]-1][rxnID] == 1 ||
+                      landlocked_atoms[twomol->improper_atom2[j][p]-1][rxnID] == 1 ||
+                      landlocked_atoms[twomol->improper_atom3[j][p]-1][rxnID] == 1 ||
+                      landlocked_atoms[twomol->improper_atom4[j][p]-1][rxnID] == 1) {
+                    insert_num = num_improper[atom->map(update_mega_glove[jj+1][i])];
+                    improper_type[atom->map(update_mega_glove[jj+1][i])][insert_num] = twomol->improper_type[j][p];
+                    improper_atom1[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom1[j][p]-1][1][rxnID]][i];
+                    improper_atom2[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom2[j][p]-1][1][rxnID]][i];
+                    improper_atom3[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom3[j][p]-1][1][rxnID]][i];
+                    improper_atom4[atom->map(update_mega_glove[jj+1][i])][insert_num] = update_mega_glove[equivalences[twomol->improper_atom4[j][p]-1][1][rxnID]][i];
+                    num_improper[atom->map(update_mega_glove[jj+1][i])]++;
+                    if (num_improper[atom->map(update_mega_glove[jj+1][i])] > atom->improper_per_atom)
+                      error->one(FLERR,"Fix bond/react topology/atom exceed system topology/atom");
+                    delta_imprp++;
+                  }
                 }
               }
             }
@@ -3895,7 +3866,8 @@ int FixBondReact::insert_atoms(tagint **my_update_mega_glove, int iupdate)
         // guess a somewhat reasonable initial velocity based on reaction site
         // further control is possible using bond_react_MASTER_group
         // compute |velocity| corresponding to a given temperature t, using specific atom's mass
-        double vtnorm = sqrt(t / (force->mvv2e / (dimension * force->boltz)) / atom->mass[twomol->type[m]]);
+        double mymass = atom->rmass ? atom->rmass[n] : atom->mass[twomol->type[m]];
+        double vtnorm = sqrt(t / (force->mvv2e / (dimension * force->boltz)) / mymass);
         v[n][0] = random[rxnID]->uniform();
         v[n][1] = random[rxnID]->uniform();
         v[n][2] = random[rxnID]->uniform();
@@ -3950,7 +3922,8 @@ read map file
 void FixBondReact::read_map_file(int myrxn)
 {
   int rv;
-  char line[MAXLINE],keyword[MAXLINE];
+  char line[MAXLINE] = {'\0'};
+  char keyword[MAXLINE] = {'\0'};
   char *eof,*ptr;
 
   // skip 1st line of file
diff --git a/src/REACTION/fix_bond_react.h b/src/REACTION/fix_bond_react.h
index 534261e11d..8c9fc9dce4 100644
--- a/src/REACTION/fix_bond_react.h
+++ b/src/REACTION/fix_bond_react.h
@@ -12,7 +12,7 @@
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
-   Contributing Author: Jacob Gissinger (jacob.r.gissinger@gmail.com)
+   Contributing Author: Jacob Gissinger (jgissing@stevens.edu)
 ------------------------------------------------------------------------- */
 
 #ifdef FIX_CLASS
@@ -139,7 +139,7 @@ class FixBondReact : public Fix {
   int avail_guesses;     // num of restore points available
   int *guess_branch;     // used when there is more than two choices when guessing
   int **restore_pt;      // contains info about restore points
-  tagint **restore;      // contaings info about restore points
+  tagint **restore;      // contains info about restore points
   int *pioneer_count;    // counts pioneers
 
   int **edge;                // atoms in molecule templates with incorrect valences
diff --git a/src/REAXFF/compute_reaxff_atom.cpp b/src/REAXFF/compute_reaxff_atom.cpp
new file mode 100644
index 0000000000..1834de0b4b
--- /dev/null
+++ b/src/REAXFF/compute_reaxff_atom.cpp
@@ -0,0 +1,254 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Richard Berger (LANL)
+------------------------------------------------------------------------- */
+
+#include "compute_reaxff_atom.h"
+#include "atom.h"
+#include "molecule.h"
+#include "update.h"
+#include "force.h"
+#include "memory.h"
+#include "error.h"
+#include "neigh_list.h"
+
+#include "pair_reaxff.h"
+#include "reaxff_api.h"
+
+using namespace LAMMPS_NS;
+using namespace ReaxFF;
+
+/* ---------------------------------------------------------------------- */
+
+ComputeReaxFFAtom::ComputeReaxFFAtom(LAMMPS *lmp, int narg, char **arg) :
+    Compute(lmp, narg, arg), neighid(nullptr), abo(nullptr), bondcount(nullptr), reaxff(nullptr)
+{
+  if (atom->tag_consecutive() == 0)
+    error->all(FLERR, "Atom IDs must be consecutive for compute reaxff/atom");
+
+  peratom_flag = 1;
+
+  // initialize output
+
+  nlocal = -1;
+  nbonds = 0;
+  prev_nbonds = -1;
+
+  size_peratom_cols = 3;
+
+  size_local_rows = 0;
+  size_local_cols = 3;
+
+  invoked_bonds = -1;
+
+  store_bonds = false;
+  nsub = 0;
+
+  int iarg = 3;
+  while (iarg<narg) {
+    if (strcmp(arg[iarg], "pair") == 0) {
+      if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "compute reaxff/atom pair", error);
+      ++iarg;
+
+      if (isdigit(arg[iarg][0])) {
+        nsub = utils::inumeric(FLERR, arg[iarg], false, lmp);
+        ++iarg;
+        if (nsub > 0) continue;
+      }
+      error->all(FLERR, "Illegal compute reaxff/atom command");
+    } else if (strcmp(arg[iarg], "bonds") == 0) {
+      if (iarg+2 > narg) utils::missing_cmd_args(FLERR, "compute reaxff/atom bonds", error);
+      store_bonds = utils::logical(FLERR, arg[iarg+1], false, lmp);
+      iarg += 2;
+    } else error->all(FLERR,"Illegal compute reaxff/atom command");
+  }
+
+  local_flag = store_bonds;
+}
+
+/* ---------------------------------------------------------------------- */
+
+ComputeReaxFFAtom::~ComputeReaxFFAtom()
+{
+  memory->destroy(array_local);
+  memory->destroy(array_atom);
+  memory->destroy(abo);
+  memory->destroy(neighid);
+  memory->destroy(bondcount);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ComputeReaxFFAtom::init()
+{
+  if (lmp->suffix_enable) {
+    if (lmp->suffix)
+      reaxff = dynamic_cast<PairReaxFF *>(force->pair_match(fmt::format("^reax../{}", lmp->suffix), 0, nsub));
+    if (!reaxff && lmp->suffix2)
+      reaxff = dynamic_cast<PairReaxFF *>(force->pair_match(fmt::format("^reax../{}", lmp->suffix2), 0, nsub));
+  }
+
+  if (!reaxff) reaxff = dynamic_cast<PairReaxFF *>(force->pair_match("^reax..", 0, nsub));
+
+  if (!reaxff) error->all(FLERR,"Cannot use compute reaxff/atom without "
+                                "pair_style reaxff or reaxff/omp");
+
+  if (reaxff->kokkosable && !kokkosable)
+    error->all(FLERR,"Cannot use compute reaxff/atom with pair_style reaxff/kk. Use reaxff/atom/kk.");
+}
+
+/* ---------------------------------------------------------------------- */
+
+int ComputeReaxFFAtom::FindBond()
+{
+  int *ilist, i, ii, inum;
+  int j, pj, nj;
+  tagint jtag;
+  double bo_tmp,bo_cut;
+
+  inum = reaxff->list->inum;
+  ilist = reaxff->list->ilist;
+  bond_data *bo_ij;
+  bo_cut = reaxff->api->control->bg_cut;
+
+  tagint *tag = atom->tag;
+  int * mask = atom->mask;
+  int numbonds = 0;
+
+  for (ii = 0; ii < inum; ii++) {
+    i = ilist[ii];
+    if (mask[i] & groupbit) {
+      nj = 0;
+
+      for (pj = Start_Index(i, reaxff->api->lists); pj < End_Index(i, reaxff->api->lists); ++pj) {
+        bo_ij = &(reaxff->api->lists->select.bond_list[pj]);
+        j = bo_ij->nbr;
+        if (mask[j] & groupbit) {
+          jtag = tag[j];
+          bo_tmp = bo_ij->bo_data.BO;
+
+          if (bo_tmp > bo_cut) {
+            if (store_bonds) {
+              neighid[i][nj] = jtag;
+              abo[i][nj] = bo_tmp;
+            }
+            nj++;
+          }
+        }
+      }
+      bondcount[i] = nj;
+      numbonds += nj;
+    }
+  }
+  return numbonds;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ComputeReaxFFAtom::compute_bonds()
+{
+  invoked_bonds = update->ntimestep;
+
+  if (atom->nlocal > nlocal) {
+    memory->destroy(abo);
+    memory->destroy(neighid);
+    memory->destroy(bondcount);
+    memory->destroy(array_atom);
+    nlocal = atom->nlocal;
+    if (store_bonds) {
+      memory->create(abo, nlocal, MAXREAXBOND, "reaxff/atom:abo");
+      memory->create(neighid, nlocal, MAXREAXBOND, "reaxff/atom:neighid");
+    }
+    memory->create(bondcount, nlocal, "reaxff/atom:bondcount");
+    memory->create(array_atom, nlocal, 3, "reaxff/atom:array_atom");
+  }
+
+  for (int i = 0; i < nlocal; i++) {
+    bondcount[i] = 0;
+    for (int j = 0; store_bonds && j < MAXREAXBOND; j++) {
+      neighid[i][j] = 0;
+      abo[i][j] = 0.0;
+    }
+  }
+
+  nbonds = FindBond();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ComputeReaxFFAtom::compute_local()
+{
+  invoked_local = update->ntimestep;
+
+  if (invoked_bonds < update->ntimestep)
+    compute_bonds();
+
+  if (nbonds > prev_nbonds) {
+    // grow array_local
+    memory->destroy(array_local);
+    memory->create(array_local, nbonds, 3, "reaxff/atom:array_local");
+    prev_nbonds = nbonds;
+  }
+
+  size_local_rows = nbonds;
+  auto tag = atom->tag;
+
+  int b = 0;
+
+  for (int i = 0; i < nlocal; ++i) {
+    const int numbonds = bondcount[i];
+
+    for (int k = 0; k < numbonds; k++) {
+      auto bond = array_local[b++];
+      bond[0] = tag[i];
+      bond[1] = neighid[i][k];
+      bond[2] = abo[i][k];
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void ComputeReaxFFAtom::compute_peratom()
+{
+  invoked_peratom = update->ntimestep;
+
+  if (invoked_bonds < update->ntimestep) {
+    compute_bonds();
+  }
+
+  for (int i = 0; i < nlocal; ++i) {
+    auto ptr = array_atom[i];
+    ptr[0] = reaxff->api->workspace->total_bond_order[i];
+    ptr[1] = reaxff->api->workspace->nlp[i];
+    ptr[2] = bondcount[i];
+  }
+}
+
+/* ----------------------------------------------------------------------
+   memory usage of local data
+------------------------------------------------------------------------- */
+
+double ComputeReaxFFAtom::memory_usage()
+{
+  double bytes = (double)(nlocal*3) * sizeof(double);
+  bytes += (double)(nlocal) * sizeof(int);
+  if (store_bonds) {
+    bytes += (double)(2*nlocal*MAXREAXBOND) * sizeof(double);
+    bytes += (double)(nbonds*3) * sizeof(double);
+  }
+  return bytes;
+}
diff --git a/src/REAXFF/compute_reaxff_atom.h b/src/REAXFF/compute_reaxff_atom.h
new file mode 100644
index 0000000000..1f9aaec1ae
--- /dev/null
+++ b/src/REAXFF/compute_reaxff_atom.h
@@ -0,0 +1,61 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Richard Berger (LANL)
+------------------------------------------------------------------------- */
+
+#ifdef COMPUTE_CLASS
+// clang-format off
+ComputeStyle(reaxff/atom,ComputeReaxFFAtom);
+// clang-format on
+#else
+
+#ifndef LMP_COMPUTE_REAXFF_ATOM_H
+#define LMP_COMPUTE_REAXFF_ATOM_H
+
+#include "compute.h"
+
+namespace LAMMPS_NS {
+
+class ComputeReaxFFAtom : public Compute {
+ public:
+  ComputeReaxFFAtom(class LAMMPS *, int, char **);
+  ~ComputeReaxFFAtom() override;
+  void init() override;
+  void compute_local() override;
+  void compute_peratom() override;
+  virtual void compute_bonds();
+  double memory_usage() override;
+
+ protected:
+  bigint invoked_bonds;     // last timestep on which compute_bonds() was invoked
+  int nlocal;
+  int nbonds;
+  int prev_nbonds;
+  int nsub;
+  bool store_bonds;
+
+  tagint **neighid;
+  double **abo;
+  int *bondcount;
+  class PairReaxFF *reaxff;
+
+ private:
+  int FindBond();
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/REAXFF/fix_reaxff.cpp b/src/REAXFF/fix_reaxff.cpp
index 06941cd8a0..bec16b5d04 100644
--- a/src/REAXFF/fix_reaxff.cpp
+++ b/src/REAXFF/fix_reaxff.cpp
@@ -19,7 +19,7 @@
    Please cite the related publication:
    H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
    "Parallel Reactive Molecular Dynamics: Numerical Methods and
-   Algorithmic Techniques", Parallel Computing, in press.
+   Algorithmic Techniques", Parallel Computing, 38 (4-5), 245-259.
 ------------------------------------------------------------------------- */
 
 #include "fix_reaxff.h"
diff --git a/src/REAXFF/reaxff_allocate.cpp b/src/REAXFF/reaxff_allocate.cpp
index ce56668a01..06ebc20f30 100644
--- a/src/REAXFF/reaxff_allocate.cpp
+++ b/src/REAXFF/reaxff_allocate.cpp
@@ -11,7 +11,7 @@
   Please cite the related publication:
   H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
   "Parallel Reactive Molecular Dynamics: Numerical Methods and
-  Algorithmic Techniques", Parallel Computing, in press.
+  Algorithmic Techniques", Parallel Computing, 38 (4-5), 245-259.
 
   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
diff --git a/src/REAXFF/reaxff_control.cpp b/src/REAXFF/reaxff_control.cpp
index d914765f45..99e498b428 100644
--- a/src/REAXFF/reaxff_control.cpp
+++ b/src/REAXFF/reaxff_control.cpp
@@ -11,7 +11,7 @@
   Please cite the related publication:
   H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
   "Parallel Reactive Molecular Dynamics: Numerical Methods and
-  Algorithmic Techniques", Parallel Computing, in press.
+  Algorithmic Techniques", Parallel Computing, 38 (4-5), 245-259.
 
   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
diff --git a/src/REAXFF/reaxff_ffield.cpp b/src/REAXFF/reaxff_ffield.cpp
index d5761eb343..6ca8dc6256 100644
--- a/src/REAXFF/reaxff_ffield.cpp
+++ b/src/REAXFF/reaxff_ffield.cpp
@@ -11,7 +11,7 @@
   Please cite the related publication:
   H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
   "Parallel Reactive Molecular Dynamics: Numerical Methods and
-  Algorithmic Techniques", Parallel Computing, in press.
+  Algorithmic Techniques", Parallel Computing,  38, 245-259 (2012).
 
   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
@@ -30,6 +30,7 @@
 #include "error.h"
 #include "memory.h"
 #include "text_file_reader.h"
+#include "tokenizer.h"
 #include "utils.h"
 
 #include <cmath>
@@ -40,6 +41,8 @@
 using LAMMPS_NS::utils::open_potential;
 using LAMMPS_NS::utils::getsyserror;
 using LAMMPS_NS::utils::uppercase;
+using LAMMPS_NS::EOFException;
+using LAMMPS_NS::ValueTokenizer;
 
 namespace ReaxFF {
 
@@ -538,17 +541,20 @@ namespace ReaxFF {
           }
         }
 
-        // next line is number of hydrogen bond parameters
-
-        values = reader.next_values(0);
-        n = values.next_int();
-        ++lineno;
+        // next line is number of hydrogen bond parameters. that block may be missing
 
         for (i = 0; i < ntypes; ++i)
           for (j = 0; j < ntypes; ++j)
             for (k = 0; k < ntypes; ++k)
               hbp[i][j][k].r0_hb = -1.0;
 
+        auto thisline = reader.next_line();
+        if (!thisline) throw EOFException("ReaxFF parameter file has no hydrogen bond parameters");
+
+        values = ValueTokenizer(thisline);
+        n = values.next_int();
+        ++lineno;
+
         for (i = 0; i < n; ++i) {
           values = reader.next_values(0);
           ++lineno;
@@ -570,6 +576,8 @@ namespace ReaxFF {
         }
 
         memory->destroy(tor_flag);
+      } catch (EOFException &e) {
+        error->warning(FLERR, e.what());
       } catch (std::exception &e) {
         error->one(FLERR,e.what());
       }
diff --git a/src/REAXFF/reaxff_forces.cpp b/src/REAXFF/reaxff_forces.cpp
index a4edfeee5c..274799c30c 100644
--- a/src/REAXFF/reaxff_forces.cpp
+++ b/src/REAXFF/reaxff_forces.cpp
@@ -11,7 +11,7 @@
   Please cite the related publication:
   H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
   "Parallel Reactive Molecular Dynamics: Numerical Methods and
-  Algorithmic Techniques", Parallel Computing, in press.
+  Algorithmic Techniques", Parallel Computing, 38 (4-5), 245-259.
 
   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
diff --git a/src/REAXFF/reaxff_hydrogen_bonds.cpp b/src/REAXFF/reaxff_hydrogen_bonds.cpp
index 6a56675f19..0389db7832 100644
--- a/src/REAXFF/reaxff_hydrogen_bonds.cpp
+++ b/src/REAXFF/reaxff_hydrogen_bonds.cpp
@@ -11,7 +11,7 @@
   Please cite the related publication:
   H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
   "Parallel Reactive Molecular Dynamics: Numerical Methods and
-  Algorithmic Techniques", Parallel Computing, in press.
+  Algorithmic Techniques", Parallel Computing, 38 (4-5), 245-259.
 
   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
diff --git a/src/REAXFF/reaxff_list.cpp b/src/REAXFF/reaxff_list.cpp
index 0ff0852a04..2989f717d6 100644
--- a/src/REAXFF/reaxff_list.cpp
+++ b/src/REAXFF/reaxff_list.cpp
@@ -11,7 +11,7 @@
   Please cite the related publication:
   H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
   "Parallel Reactive Molecular Dynamics: Numerical Methods and
-  Algorithmic Techniques", Parallel Computing, in press.
+  Algorithmic Techniques", Parallel Computing, 38 (4-5), 245-259.
 
   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
diff --git a/src/REAXFF/reaxff_lookup.cpp b/src/REAXFF/reaxff_lookup.cpp
index c0e7bf2c54..d9ee471caf 100644
--- a/src/REAXFF/reaxff_lookup.cpp
+++ b/src/REAXFF/reaxff_lookup.cpp
@@ -11,7 +11,7 @@
   Please cite the related publication:
   H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
   "Parallel Reactive Molecular Dynamics: Numerical Methods and
-  Algorithmic Techniques", Parallel Computing, in press.
+  Algorithmic Techniques", Parallel Computing, 38 (4-5), 245-259.
 
   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
diff --git a/src/REAXFF/reaxff_multi_body.cpp b/src/REAXFF/reaxff_multi_body.cpp
index 2390b54474..855d82623f 100644
--- a/src/REAXFF/reaxff_multi_body.cpp
+++ b/src/REAXFF/reaxff_multi_body.cpp
@@ -11,7 +11,7 @@
   Please cite the related publication:
   H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
   "Parallel Reactive Molecular Dynamics: Numerical Methods and
-  Algorithmic Techniques", Parallel Computing, in press.
+  Algorithmic Techniques", Parallel Computing, 38 (4-5), 245-259.
 
   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
diff --git a/src/REAXFF/reaxff_nonbonded.cpp b/src/REAXFF/reaxff_nonbonded.cpp
index 75cbd79b29..e0a8d092b2 100644
--- a/src/REAXFF/reaxff_nonbonded.cpp
+++ b/src/REAXFF/reaxff_nonbonded.cpp
@@ -11,7 +11,7 @@
   Please cite the related publication:
   H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
   "Parallel Reactive Molecular Dynamics: Numerical Methods and
-  Algorithmic Techniques", Parallel Computing, in press.
+  Algorithmic Techniques", Parallel Computing, 38 (4-5), 245-259.
 
   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
diff --git a/src/REAXFF/reaxff_reset_tools.cpp b/src/REAXFF/reaxff_reset_tools.cpp
index bebb2e2cfc..9de917e142 100644
--- a/src/REAXFF/reaxff_reset_tools.cpp
+++ b/src/REAXFF/reaxff_reset_tools.cpp
@@ -11,7 +11,7 @@
   Please cite the related publication:
   H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
   "Parallel Reactive Molecular Dynamics: Numerical Methods and
-  Algorithmic Techniques", Parallel Computing, in press.
+  Algorithmic Techniques", Parallel Computing, 38 (4-5), 245-259.
 
   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
diff --git a/src/REAXFF/reaxff_tool_box.cpp b/src/REAXFF/reaxff_tool_box.cpp
index 22ef299b41..aa6f831e95 100644
--- a/src/REAXFF/reaxff_tool_box.cpp
+++ b/src/REAXFF/reaxff_tool_box.cpp
@@ -10,7 +10,7 @@
   Please cite the related publication:
   H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
   "Parallel Reactive Molecular Dynamics: Numerical Methods and
-  Algorithmic Techniques", Parallel Computing, in press.
+  Algorithmic Techniques", Parallel Computing, 38 (4-5), 245-259.
 
   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
diff --git a/src/REAXFF/reaxff_torsion_angles.cpp b/src/REAXFF/reaxff_torsion_angles.cpp
index e9b6bc618d..29233a56dc 100644
--- a/src/REAXFF/reaxff_torsion_angles.cpp
+++ b/src/REAXFF/reaxff_torsion_angles.cpp
@@ -11,7 +11,7 @@
   Please cite the related publication:
   H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
   "Parallel Reactive Molecular Dynamics: Numerical Methods and
-  Algorithmic Techniques", Parallel Computing, in press.
+  Algorithmic Techniques", Parallel Computing, 38 (4-5), 245-259.
 
   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
diff --git a/src/REAXFF/reaxff_valence_angles.cpp b/src/REAXFF/reaxff_valence_angles.cpp
index ac3e2dbd1e..b46f09d23a 100644
--- a/src/REAXFF/reaxff_valence_angles.cpp
+++ b/src/REAXFF/reaxff_valence_angles.cpp
@@ -11,7 +11,7 @@
   Please cite the related publication:
   H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama,
   "Parallel Reactive Molecular Dynamics: Numerical Methods and
-  Algorithmic Techniques", Parallel Computing, in press.
+  Algorithmic Techniques", Parallel Computing, 38 (4-5), 245-259.
 
   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
diff --git a/src/REPLICA/fix_hyper_global.cpp b/src/REPLICA/fix_hyper_global.cpp
index d262c49fda..79d901893e 100644
--- a/src/REPLICA/fix_hyper_global.cpp
+++ b/src/REPLICA/fix_hyper_global.cpp
@@ -32,8 +32,8 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-#define DELTABOND 16384
-#define VECLEN 5
+static constexpr int DELTABOND = 16384;
+static constexpr int VECLEN = 5;
 
 // possible enhancements
 //   should there be a virial contribution from boosted bond?
diff --git a/src/REPLICA/fix_hyper_local.cpp b/src/REPLICA/fix_hyper_local.cpp
index d0cfc4bb01..dde4940403 100644
--- a/src/REPLICA/fix_hyper_local.cpp
+++ b/src/REPLICA/fix_hyper_local.cpp
@@ -35,11 +35,11 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-#define DELTABOND 16384
-#define DELTABIAS 16
-#define COEFFINIT 1.0
-#define FCCBONDS 12
-#define BIG 1.0e20
+static constexpr int DELTABOND = 16384;
+static constexpr int DELTABIAS = 16;
+static constexpr double COEFFINIT = 1.0;
+static constexpr int FCCBONDS = 12;
+static constexpr double BIG = 1.0e20;
 
 enum{STRAIN,STRAINDOMAIN,BIASFLAG,BIASCOEFF};
 enum{IGNORE,WARN,ERROR};
diff --git a/src/REPLICA/fix_neb.cpp b/src/REPLICA/fix_neb.cpp
index f2962d9b9d..9c920f26dc 100644
--- a/src/REPLICA/fix_neb.cpp
+++ b/src/REPLICA/fix_neb.cpp
@@ -40,7 +40,7 @@ using namespace MathConst;
 enum { SINGLE_PROC_DIRECT, SINGLE_PROC_MAP, MULTI_PROC };
 enum { NEIGHBOR, IDEAL, EQUAL };
 
-#define BUFSIZE 8
+static constexpr int BUFSIZE = 8;
 
 /* ---------------------------------------------------------------------- */
 
@@ -139,7 +139,15 @@ FixNEB::FixNEB(LAMMPS *lmp, int narg, char **arg) :
 
   uworld = universe->uworld;
 
-  if ((neb_mode == IDEAL) || (neb_mode == EQUAL)) {
+  // set comm mode for inter-replica exchange of coords
+  // may change from SINGLE_PROC_MAP to SINGLE_PROC_DIRECT only in Fix::init()
+
+  if (nreplica == nprocs_universe)
+    cmode = SINGLE_PROC_MAP;
+  else
+    cmode = MULTI_PROC;
+
+  if (cmode == MULTI_PROC) {
     int *iroots = new int[nreplica];
     MPI_Group uworldgroup, rootgroup;
 
@@ -150,7 +158,7 @@ FixNEB::FixNEB(LAMMPS *lmp, int narg, char **arg) :
     if (rootgroup != MPI_GROUP_NULL) MPI_Group_free(&rootgroup);
     if (uworldgroup != MPI_GROUP_NULL) MPI_Group_free(&uworldgroup);
     delete[] iroots;
-  }
+  } else rootworld = MPI_COMM_NULL;
 
   // create a new compute pe style
   // id = fix-ID + pe, compute group = all
@@ -193,8 +201,10 @@ FixNEB::~FixNEB()
   memory->destroy(counts);
   memory->destroy(displacements);
 
-  if ((neb_mode == IDEAL) || (neb_mode == EQUAL)) {
+  if (cmode == MULTI_PROC)
     if (rootworld != MPI_COMM_NULL) MPI_Comm_free(&rootworld);
+
+  if ((neb_mode == IDEAL) || (neb_mode == EQUAL)) {
     memory->destroy(nlenall);
   }
   if (neb_mode == EQUAL) memory->destroy(vengall);
@@ -227,14 +237,10 @@ void FixNEB::init()
   if (count > MAXSMALLINT) error->all(FLERR, "Too many active NEB atoms");
   nebatoms = count;
 
-  // comm mode for inter-replica exchange of coords
+  // change comm mode for inter-replica exchange of coords to direct if possible
 
-  if (nreplica == nprocs_universe && nebatoms == atom->natoms && atom->sortfreq == 0)
+  if ((cmode == SINGLE_PROC_MAP) && (nebatoms == atom->natoms) && (atom->sortfreq == 0))
     cmode = SINGLE_PROC_DIRECT;
-  else if (nreplica == nprocs_universe)
-    cmode = SINGLE_PROC_MAP;
-  else
-    cmode = MULTI_PROC;
 
   // ntotal = total # of atoms in system, NEB atoms or not
 
@@ -298,9 +304,8 @@ void FixNEB::min_post_force(int /*vflag*/)
       int procFirst;
       procFirst = universe->root_proc[0];
       MPI_Bcast(&vIni, 1, MPI_DOUBLE, procFirst, uworld);
-    } else {
+    } else { // cmode == MULTI_PROC
       if (me == 0) MPI_Bcast(&vIni, 1, MPI_DOUBLE, 0, rootworld);
-
       MPI_Bcast(&vIni, 1, MPI_DOUBLE, 0, world);
     }
   }
@@ -812,7 +817,7 @@ void FixNEB::calculate_ideal_positions()
   if ((neb_mode == EQUAL) && (rclimber > 0.0)) {
     if ((cmode == SINGLE_PROC_DIRECT) || (cmode == SINGLE_PROC_MAP)) {
       MPI_Allgather(&veng, 1, MPI_DOUBLE, &vengall[0], 1, MPI_DOUBLE, uworld);
-    } else {
+    } else { // cmode == MULTI_PROC
       if (me == 0) MPI_Allgather(&veng, 1, MPI_DOUBLE, &vengall[0], 1, MPI_DOUBLE, rootworld);
       MPI_Bcast(vengall, nreplica, MPI_DOUBLE, 0, world);
     }
@@ -823,7 +828,7 @@ void FixNEB::calculate_ideal_positions()
   } else if ((neb_mode == IDEAL) || (neb_mode == EQUAL)) {
     if ((cmode == SINGLE_PROC_DIRECT) || (cmode == SINGLE_PROC_MAP)) {
       MPI_Allgather(&nlen, 1, MPI_DOUBLE, &nlenall[0], 1, MPI_DOUBLE, uworld);
-    } else {
+    } else { // cmode == MULTI_PROC
       if (me == 0) MPI_Allgather(&nlen, 1, MPI_DOUBLE, &nlenall[0], 1, MPI_DOUBLE, rootworld);
       MPI_Bcast(nlenall, nreplica, MPI_DOUBLE, 0, world);
     }
diff --git a/src/REPLICA/neb.cpp b/src/REPLICA/neb.cpp
index 11933164eb..d9144a9489 100644
--- a/src/REPLICA/neb.cpp
+++ b/src/REPLICA/neb.cpp
@@ -36,32 +36,16 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define MAXLINE 256
-#define CHUNK 1024
-#define ATTRIBUTE_PERLINE 4
+static constexpr int MAXLINE = 256;
+static constexpr int CHUNK = 1024;
+static constexpr int ATTRIBUTE_PERLINE = 4;
 
 enum { DEFAULT, TERSE, VERBOSE };
 
 /* ---------------------------------------------------------------------- */
 
-NEB::NEB(LAMMPS *lmp) : Command(lmp), fp(nullptr), all(nullptr), rdist(nullptr) {}
-
-/* ----------------------------------------------------------------------
-   internal NEB constructor, called from TAD
-------------------------------------------------------------------------- */
-
-NEB::NEB(LAMMPS *lmp, double etol_in, double ftol_in, int n1steps_in, int n2steps_in, int nevery_in,
-         double *buf_init, double *buf_final) :
-    Command(lmp),
-    fp(nullptr), all(nullptr), rdist(nullptr)
+NEB::NEB(LAMMPS *lmp) : Command(lmp), fp(nullptr), all(nullptr), rdist(nullptr)
 {
-  double delx, dely, delz;
-
-  etol = etol_in;
-  ftol = ftol_in;
-  n1steps = n1steps_in;
-  n2steps = n2steps_in;
-  nevery = nevery_in;
   print_mode = DEFAULT;
 
   // replica info
@@ -71,8 +55,25 @@ NEB::NEB(LAMMPS *lmp, double etol_in, double ftol_in, int n1steps_in, int n2step
   me_universe = universe->me;
   uworld = universe->uworld;
   MPI_Comm_rank(world, &me);
+}
 
-  // generate linear interpolate replica
+/* ----------------------------------------------------------------------
+   internal NEB constructor, called from TAD
+------------------------------------------------------------------------- */
+
+NEB::NEB(LAMMPS *lmp, double etol_in, double ftol_in, int n1steps_in, int n2steps_in, int nevery_in,
+         double *buf_init, double *buf_final) :
+    NEB(lmp)
+{
+  double delx, dely, delz;
+
+  etol = etol_in;
+  ftol = ftol_in;
+  n1steps = n1steps_in;
+  n2steps = n2steps_in;
+  nevery = nevery_in;
+
+  // generate linear interpolated replica
   double fraction = ireplica / (nreplica - 1.0);
   double **x = atom->x;
   int nlocal = atom->nlocal;
@@ -129,19 +130,11 @@ void NEB::command(int narg, char **arg)
   if (nevery <= 0)
     error->universe_all(FLERR, fmt::format("Illegal NEB command every parameter: {}", nevery));
   if (n1steps % nevery)
-    error->universe_all(FLERR, fmt::format("NEB N1 value {} incompatible with every {}",
-                                           n1steps, nevery));
+    error->universe_all(FLERR,
+                        fmt::format("NEB N1 value {} incompatible with every {}", n1steps, nevery));
   if (n2steps % nevery)
-    error->universe_all(FLERR, fmt::format("NEB N2 value {} incompatible with every {}",
-                                           n2steps, nevery));
-
-  // replica info
-
-  nreplica = universe->nworlds;
-  ireplica = universe->iworld;
-  me_universe = universe->me;
-  uworld = universe->uworld;
-  MPI_Comm_rank(world, &me);
+    error->universe_all(FLERR,
+                        fmt::format("NEB N2 value {} incompatible with every {}", n2steps, nevery));
 
   // error checks
 
@@ -437,7 +430,7 @@ void NEB::readfile(char *file, int flag)
   int i, nchunk, eofflag, nlines;
   tagint tag;
   char *eof, *start, *next, *buf;
-  char line[MAXLINE];
+  char line[MAXLINE] = {'\0'};
   double delx, dely, delz;
 
   if (me_universe == 0 && universe->uscreen)
diff --git a/src/RIGID/compute_rigid_local.cpp b/src/RIGID/compute_rigid_local.cpp
index bd0db29d20..ea45389e7b 100644
--- a/src/RIGID/compute_rigid_local.cpp
+++ b/src/RIGID/compute_rigid_local.cpp
@@ -24,7 +24,7 @@
 
 using namespace LAMMPS_NS;
 
-#define DELTA 10000
+static constexpr int DELTA = 10000;
 
 enum{ID,MOL,MASS,X,Y,Z,XU,YU,ZU,VX,VY,VZ,FX,FY,FZ,IX,IY,IZ,
      TQX,TQY,TQZ,OMEGAX,OMEGAY,OMEGAZ,ANGMOMX,ANGMOMY,ANGMOMZ,
diff --git a/src/RIGID/fix_rigid.cpp b/src/RIGID/fix_rigid.cpp
index 628abb240e..7a63c52220 100644
--- a/src/RIGID/fix_rigid.cpp
+++ b/src/RIGID/fix_rigid.cpp
@@ -1247,7 +1247,7 @@ void FixRigid::enforce2d()
    return total count of DOF
 ------------------------------------------------------------------------- */
 
-int FixRigid::dof(int tgroup)
+bigint FixRigid::dof(int tgroup)
 {
   // cannot count DOF correctly unless setup_bodies_static() has been called
 
@@ -1306,7 +1306,7 @@ int FixRigid::dof(int tgroup)
   // 3d body with any finite-size M should have 6 dof, remove (3N+6M) - 6
   // 2d body with any finite-size M should have 3 dof, remove (2N+3M) - 3
 
-  int n = 0;
+  bigint n = 0;
   nlinear = 0;
   if (domain->dimension == 3) {
     for (int ibody = 0; ibody < nbody; ibody++)
@@ -2300,7 +2300,7 @@ void FixRigid::readfile(int which, double *vec, double **array1, double **array2
   int nlines;
   FILE *fp;
   char *eof,*start,*next,*buf;
-  char line[MAXLINE];
+  char line[MAXLINE] = {'\0'};
 
   // open file and read and parse first non-empty, non-comment line containing the number of bodies
   if (comm->me == 0) {
diff --git a/src/RIGID/fix_rigid.h b/src/RIGID/fix_rigid.h
index 361ddd2720..c2f04ecf1a 100644
--- a/src/RIGID/fix_rigid.h
+++ b/src/RIGID/fix_rigid.h
@@ -48,7 +48,7 @@ class FixRigid : public Fix {
 
   void setup_pre_neighbor() override;
   void pre_neighbor() override;
-  int dof(int) override;
+  bigint dof(int) override;
   void deform(int) override;
   void reset_dt() override;
   void zero_momentum() override;
diff --git a/src/RIGID/fix_rigid_nh_small.cpp b/src/RIGID/fix_rigid_nh_small.cpp
index 952dc29032..3ee11e28d2 100644
--- a/src/RIGID/fix_rigid_nh_small.cpp
+++ b/src/RIGID/fix_rigid_nh_small.cpp
@@ -219,7 +219,6 @@ void FixRigidNHSmall::init()
     }
   }
 
-  int icompute;
   if (tcomputeflag) {
     temperature = modify->get_compute_by_id(id_temp);
     if (!temperature)
diff --git a/src/RIGID/fix_rigid_small.cpp b/src/RIGID/fix_rigid_small.cpp
index bd49834f15..9e185a4de2 100644
--- a/src/RIGID/fix_rigid_small.cpp
+++ b/src/RIGID/fix_rigid_small.cpp
@@ -49,7 +49,7 @@ using namespace FixConst;
 using namespace MathConst;
 using namespace RigidConst;
 
-#define RVOUS 1   // 0 for irregular, 1 for all2all
+static constexpr int RVOUS = 1;   // 0 for irregular, 1 for all2all
 
 /* ---------------------------------------------------------------------- */
 
@@ -1123,7 +1123,7 @@ void FixRigidSmall::enforce2d()
    return total count of DOF
 ------------------------------------------------------------------------- */
 
-int FixRigidSmall::dof(int tgroup)
+bigint FixRigidSmall::dof(int tgroup)
 {
   int i,j;
 
@@ -1195,7 +1195,7 @@ int FixRigidSmall::dof(int tgroup)
 
   double *inertia;
 
-  int n = 0;
+  bigint n = 0;
   nlinear = 0;
   if (domain->dimension == 3) {
     for (int ibody = 0; ibody < nlocal_body; ibody++) {
@@ -1216,8 +1216,8 @@ int FixRigidSmall::dof(int tgroup)
 
   memory->destroy(counts);
 
-  int nall;
-  MPI_Allreduce(&n,&nall,1,MPI_INT,MPI_SUM,world);
+  bigint nall;
+  MPI_Allreduce(&n,&nall,1,MPI_LMP_BIGINT,MPI_SUM,world);
   return nall;
 }
 
@@ -2470,7 +2470,7 @@ void FixRigidSmall::readfile(int which, double **array, int *inbody)
   int nchunk,eofflag,nlines,xbox,ybox,zbox;
   FILE *fp;
   char *eof,*start,*next,*buf;
-  char line[MAXLINE];
+  char line[MAXLINE] = {'\0'};
 
   // create local hash with key/value pairs
   // key = mol ID of bodies my atoms own
diff --git a/src/RIGID/fix_rigid_small.h b/src/RIGID/fix_rigid_small.h
index 0070d976df..0508063f05 100644
--- a/src/RIGID/fix_rigid_small.h
+++ b/src/RIGID/fix_rigid_small.h
@@ -54,7 +54,7 @@ class FixRigidSmall : public Fix {
 
   void setup_pre_neighbor() override;
   void pre_neighbor() override;
-  int dof(int) override;
+  bigint dof(int) override;
   void deform(int) override;
   void reset_dt() override;
   void zero_momentum() override;
diff --git a/src/RIGID/fix_shake.cpp b/src/RIGID/fix_shake.cpp
index b2c65220bc..73c29d86bd 100644
--- a/src/RIGID/fix_shake.cpp
+++ b/src/RIGID/fix_shake.cpp
@@ -39,7 +39,7 @@ using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathConst;
 
-#define RVOUS 1    // 0 for irregular, 1 for all2all
+static constexpr int RVOUS = 1;    // 0 for irregular, 1 for all2all
 
 static constexpr double BIG = 1.0e20;
 static constexpr double MASSDELTA = 0.1;
@@ -207,8 +207,8 @@ FixShake::FixShake(LAMMPS *lmp, int narg, char **arg) :
 
   if (output_every) {
     int nb = atom->nbondtypes + 1;
-    b_count = new int[nb];
-    b_count_all = new int[nb];
+    b_count = new bigint[nb];
+    b_count_all = new bigint[nb];
     b_ave = new double[nb];
     b_ave_all = new double[nb];
     b_max = new double[nb];
@@ -217,8 +217,8 @@ FixShake::FixShake(LAMMPS *lmp, int narg, char **arg) :
     b_min_all = new double[nb];
 
     int na = atom->nangletypes + 1;
-    a_count = new int[na];
-    a_count_all = new int[na];
+    a_count = new bigint[na];
+    a_count_all = new bigint[na];
     a_ave = new double[na];
     a_ave_all = new double[na];
     a_max = new double[na];
@@ -755,7 +755,7 @@ void FixShake::min_post_force(int vflag)
    count # of degrees-of-freedom removed by SHAKE for atoms in igroup
 ------------------------------------------------------------------------- */
 
-int FixShake::dof(int igroup)
+bigint FixShake::dof(int igroup)
 {
   int groupbit = group->bitmask[igroup];
 
@@ -766,7 +766,7 @@ int FixShake::dof(int igroup)
   // count dof in a cluster if and only if
   // the central atom is in group and atom i is the central atom
 
-  int n = 0;
+  bigint n = 0;
   for (int i = 0; i < nlocal; i++) {
     if (!(mask[i] & groupbit)) continue;
     if (shake_flag[i] == 0) continue;
@@ -777,8 +777,8 @@ int FixShake::dof(int igroup)
     else if (shake_flag[i] == 4) n += 3;
   }
 
-  int nall;
-  MPI_Allreduce(&n,&nall,1,MPI_INT,MPI_SUM,world);
+  bigint nall;
+  MPI_Allreduce(&n,&nall,1,MPI_LMP_BIGINT,MPI_SUM,world);
   return nall;
 }
 
@@ -1098,7 +1098,7 @@ void FixShake::find_clusters()
   // print info on SHAKE clusters
   // -----------------------------------------------------
 
-  int count1,count2,count3,count4;
+  bigint count1,count2,count3,count4;
   count1 = count2 = count3 = count4 = 0;
   for (i = 0; i < nlocal; i++) {
     if (shake_flag[i] == 1) count1++;
@@ -1107,15 +1107,15 @@ void FixShake::find_clusters()
     else if (shake_flag[i] == 4) count4++;
   }
 
-  int tmp;
+  bigint tmp;
   tmp = count1;
-  MPI_Allreduce(&tmp,&count1,1,MPI_INT,MPI_SUM,world);
+  MPI_Allreduce(&tmp,&count1,1,MPI_LMP_BIGINT,MPI_SUM,world);
   tmp = count2;
-  MPI_Allreduce(&tmp,&count2,1,MPI_INT,MPI_SUM,world);
+  MPI_Allreduce(&tmp,&count2,1,MPI_LMP_BIGINT,MPI_SUM,world);
   tmp = count3;
-  MPI_Allreduce(&tmp,&count3,1,MPI_INT,MPI_SUM,world);
+  MPI_Allreduce(&tmp,&count3,1,MPI_LMP_BIGINT,MPI_SUM,world);
   tmp = count4;
-  MPI_Allreduce(&tmp,&count4,1,MPI_INT,MPI_SUM,world);
+  MPI_Allreduce(&tmp,&count4,1,MPI_LMP_BIGINT,MPI_SUM,world);
 
   if (comm->me == 0) {
     utils::logmesg(lmp,"{:>8} = # of size 2 clusters\n"
@@ -2682,12 +2682,12 @@ void FixShake::stats()
 
   // sum across all procs
 
-  MPI_Allreduce(b_count,b_count_all,nb,MPI_INT,MPI_SUM,world);
+  MPI_Allreduce(b_count,b_count_all,nb,MPI_LMP_BIGINT,MPI_SUM,world);
   MPI_Allreduce(b_ave,b_ave_all,nb,MPI_DOUBLE,MPI_SUM,world);
   MPI_Allreduce(b_max,b_max_all,nb,MPI_DOUBLE,MPI_MAX,world);
   MPI_Allreduce(b_min,b_min_all,nb,MPI_DOUBLE,MPI_MIN,world);
 
-  MPI_Allreduce(a_count,a_count_all,na,MPI_INT,MPI_SUM,world);
+  MPI_Allreduce(a_count,a_count_all,na,MPI_LMP_BIGINT,MPI_SUM,world);
   MPI_Allreduce(a_ave,a_ave_all,na,MPI_DOUBLE,MPI_SUM,world);
   MPI_Allreduce(a_max,a_max_all,na,MPI_DOUBLE,MPI_MAX,world);
   MPI_Allreduce(a_min,a_min_all,na,MPI_DOUBLE,MPI_MIN,world);
diff --git a/src/RIGID/fix_shake.h b/src/RIGID/fix_shake.h
index 3b04560f09..d02fdd784a 100644
--- a/src/RIGID/fix_shake.h
+++ b/src/RIGID/fix_shake.h
@@ -59,7 +59,7 @@ class FixShake : public Fix {
   virtual void correct_coordinates(int vflag);
   virtual void correct_velocities();
 
-  int dof(int) override;
+  bigint dof(int) override;
   void reset_dt() override;
   void *extract(const char *, int &) override;
   double compute_scalar() override;
@@ -117,10 +117,10 @@ class FixShake : public Fix {
   int nlist, maxlist;    // size and max-size of list
 
   // stat quantities
-  int *b_count, *b_count_all;                   // counts for each bond type, atoms in bond cluster
+  bigint *b_count, *b_count_all;                // counts for each bond type, atoms in bond cluster
   double *b_ave, *b_max, *b_min;                // ave/max/min dist for each bond type
   double *b_ave_all, *b_max_all, *b_min_all;    // MPI summing arrays
-  int *a_count, *a_count_all;                   // ditto for angle types
+  bigint *a_count, *a_count_all;                // ditto for angle types
   double *a_ave, *a_max, *a_min;
   double *a_ave_all, *a_max_all, *a_min_all;
 
diff --git a/src/SHOCK/fix_append_atoms.cpp b/src/SHOCK/fix_append_atoms.cpp
index 9ab788b352..677b3b55fd 100644
--- a/src/SHOCK/fix_append_atoms.cpp
+++ b/src/SHOCK/fix_append_atoms.cpp
@@ -14,8 +14,6 @@
 
 #include "fix_append_atoms.h"
 
-#include <cmath>
-#include <cstring>
 #include "atom.h"
 #include "atom_vec.h"
 #include "comm.h"
@@ -27,11 +25,13 @@
 #include "error.h"
 #include "force.h"
 
+#include <cmath>
+#include <cstring>
+
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-#define BIG      1.0e30
-#define EPSILON  1.0e-6
+static constexpr double BIG = 1.0e30;
 
 /* ---------------------------------------------------------------------- */
 
@@ -43,7 +43,7 @@ FixAppendAtoms::FixAppendAtoms(LAMMPS *lmp, int narg, char **arg) :
   next_reneighbor = -1;
   time_depend = 1;
 
-  if (narg < 4) error->all(FLERR,"Illegal fix append/atoms command");
+  if (narg < 4) utils::missing_cmd_args(FLERR,"fix append/atoms", error);
 
   // default settings
 
@@ -121,8 +121,7 @@ FixAppendAtoms::FixAppendAtoms(LAMMPS *lmp, int narg, char **arg) :
     } else if (strcmp(arg[iarg],"spatial") == 0) {
       if (iarg+3 > narg) error->all(FLERR,"Illegal fix append/atoms command");
       if (strcmp(arg[iarg+1],"f_") == 0)
-        error->all(FLERR,
-                   "Bad fix ID in fix append/atoms command");
+        error->all(FLERR, "Bad fix ID in fix append/atoms command");
       spatflag = 1;
       spatialid = utils::strdup(arg[iarg+1]+2);
       spatlead = utils::numeric(FLERR,arg[iarg+2],false,lmp);
@@ -208,14 +207,14 @@ FixAppendAtoms::FixAppendAtoms(LAMMPS *lmp, int narg, char **arg) :
 
 FixAppendAtoms::~FixAppendAtoms()
 {
-  delete [] basistype;
+  delete[] basistype;
 
   if (ranflag) delete randomx;
   if (spatflag) delete[] spatialid;
   if (tempflag) {
     delete randomt;
-    delete [] gfactor1;
-    delete [] gfactor2;
+    delete[] gfactor1;
+    delete[] gfactor2;
   }
 }
 
@@ -239,22 +238,30 @@ void FixAppendAtoms::initial_integrate(int /*vflag*/)
 
 /* ---------------------------------------------------------------------- */
 
+void FixAppendAtoms::init()
+{
+  if (spatflag) {
+    Fix *ifix = modify->get_fix_by_id(spatialid);
+    if (!ifix) error->all(FLERR,"Fix ID {} for fix ave/chunk does not exist", spatialid);
+    if (!utils::strmatch(ifix->style, "^ave/chunk"))
+      error->all(FLERR,"Fix {} for spatial keyword is not fix style ave/chunk", spatialid);}
+}
+
+/* ---------------------------------------------------------------------- */
+
 void FixAppendAtoms::setup(int vflag)
 {
   /*** CALL TO CREATE GROUP?  SEE POST_FORCE ***/
   post_force(vflag);
 }
 
-
 /* ---------------------------------------------------------------------- */
 
 int FixAppendAtoms::get_spatial()
 {
   if (update->ntimestep % freq == 0) {
-    int ifix = modify->find_fix(spatialid);
-    if (ifix < 0)
-      error->all(FLERR,"Fix ID for fix ave/spatial does not exist");
-    Fix *fix = modify->fix[ifix];
+    Fix *fix = modify->get_fix_by_id(spatialid);
+    if (!fix) error->all(FLERR,"Fix ID {} for fix ave/chunk does not exist", spatialid);
 
     int failed = 0;
     int count = 0;
@@ -319,8 +326,8 @@ int FixAppendAtoms::get_spatial()
 
     if (domain->boxhi[2] - shockfront_loc < spatlead) advance = 1;
 
-    delete [] pos;
-    delete [] val;
+    delete[] pos;
+    delete[] val;
   }
 
   advance_sum = 0;
@@ -433,22 +440,14 @@ void FixAppendAtoms::pre_exchange()
       xmin = ymin = zmin = BIG;
       xmax = ymax = zmax = -BIG;
 
-      domain->lattice->bbox(1,bboxlo[0],bboxlo[1],bboxlo[2],
-                            xmin,ymin,zmin,xmax,ymax,zmax);
-      domain->lattice->bbox(1,bboxhi[0],bboxlo[1],bboxlo[2],
-                            xmin,ymin,zmin,xmax,ymax,zmax);
-      domain->lattice->bbox(1,bboxlo[0],bboxhi[1],bboxlo[2],
-                            xmin,ymin,zmin,xmax,ymax,zmax);
-      domain->lattice->bbox(1,bboxhi[0],bboxhi[1],bboxlo[2],
-                            xmin,ymin,zmin,xmax,ymax,zmax);
-      domain->lattice->bbox(1,bboxlo[0],bboxlo[1],bboxhi[2],
-                            xmin,ymin,zmin,xmax,ymax,zmax);
-      domain->lattice->bbox(1,bboxhi[0],bboxlo[1],bboxhi[2],
-                            xmin,ymin,zmin,xmax,ymax,zmax);
-      domain->lattice->bbox(1,bboxlo[0],bboxhi[1],bboxhi[2],
-                            xmin,ymin,zmin,xmax,ymax,zmax);
-      domain->lattice->bbox(1,bboxhi[0],bboxhi[1],bboxhi[2],
-                            xmin,ymin,zmin,xmax,ymax,zmax);
+      domain->lattice->bbox(1,bboxlo[0],bboxlo[1],bboxlo[2],xmin,ymin,zmin,xmax,ymax,zmax);
+      domain->lattice->bbox(1,bboxhi[0],bboxlo[1],bboxlo[2],xmin,ymin,zmin,xmax,ymax,zmax);
+      domain->lattice->bbox(1,bboxlo[0],bboxhi[1],bboxlo[2],xmin,ymin,zmin,xmax,ymax,zmax);
+      domain->lattice->bbox(1,bboxhi[0],bboxhi[1],bboxlo[2],xmin,ymin,zmin,xmax,ymax,zmax);
+      domain->lattice->bbox(1,bboxlo[0],bboxlo[1],bboxhi[2],xmin,ymin,zmin,xmax,ymax,zmax);
+      domain->lattice->bbox(1,bboxhi[0],bboxlo[1],bboxhi[2],xmin,ymin,zmin,xmax,ymax,zmax);
+      domain->lattice->bbox(1,bboxlo[0],bboxhi[1],bboxhi[2],xmin,ymin,zmin,xmax,ymax,zmax);
+      domain->lattice->bbox(1,bboxhi[0],bboxhi[1],bboxhi[2],xmin,ymin,zmin,xmax,ymax,zmax);
 
       int ilo,ihi,jlo,jhi,klo,khi;
       ilo = static_cast<int> (xmin);
diff --git a/src/SHOCK/fix_append_atoms.h b/src/SHOCK/fix_append_atoms.h
index 5fbe9e904b..a7e89a3976 100644
--- a/src/SHOCK/fix_append_atoms.h
+++ b/src/SHOCK/fix_append_atoms.h
@@ -29,6 +29,7 @@ class FixAppendAtoms : public Fix {
   FixAppendAtoms(class LAMMPS *, int, char **);
   ~FixAppendAtoms() override;
   int setmask() override;
+  void init() override;
   void setup(int) override;
   void pre_exchange() override;
   void initial_integrate(int) override;
diff --git a/src/SMTBQ/pair_smtbq.cpp b/src/SMTBQ/pair_smtbq.cpp
index 4f924a6fca..d262491912 100644
--- a/src/SMTBQ/pair_smtbq.cpp
+++ b/src/SMTBQ/pair_smtbq.cpp
@@ -70,11 +70,11 @@ using namespace MathConst;
 using namespace MathExtra;
 using namespace MathSpecial;
 
-#define MAXLINE 2048
-#define MAXTOKENS 2048
-#define DELTA 4
-#define PGDELTA 1
-#define MAXNEIGH 24
+static constexpr int MAXLINE = 2048;
+static constexpr int MAXTOKENS = 2048;
+static constexpr int DELTA = 4;
+static constexpr int PGDELTA = 1;
+static constexpr int MAXNEIGH = 24;
 
 static constexpr char SMTBQ_SEPARATORS[] = "' \t\n\r";
 
diff --git a/src/SPIN/fix_neb_spin.cpp b/src/SPIN/fix_neb_spin.cpp
index ea8ce9c1fc..0d7703d06f 100644
--- a/src/SPIN/fix_neb_spin.cpp
+++ b/src/SPIN/fix_neb_spin.cpp
@@ -42,7 +42,7 @@ using namespace FixConst;
 
 enum{SINGLE_PROC_DIRECT,SINGLE_PROC_MAP,MULTI_PROC};
 
-#define BUFSIZE 8
+static constexpr int BUFSIZE = 8;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/SPIN/min_spin.cpp b/src/SPIN/min_spin.cpp
index a2202cba32..99b3bd7145 100644
--- a/src/SPIN/min_spin.cpp
+++ b/src/SPIN/min_spin.cpp
@@ -38,7 +38,7 @@ using namespace MathConst;
 
 #define EPS_ENERGY 1.0e-8
 
-#define DELAYSTEP 5
+static constexpr int DELAYSTEP = 5;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/SPIN/min_spin_cg.cpp b/src/SPIN/min_spin_cg.cpp
index ee72609ed9..25ea83d6de 100644
--- a/src/SPIN/min_spin_cg.cpp
+++ b/src/SPIN/min_spin_cg.cpp
@@ -56,7 +56,7 @@ static const char cite_minstyle_spin_cg[] =
 
 #define EPS_ENERGY 1.0e-8
 
-#define DELAYSTEP 5
+static constexpr int DELAYSTEP = 5;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/SPIN/min_spin_lbfgs.cpp b/src/SPIN/min_spin_lbfgs.cpp
index ae9d33a705..357d8364e1 100644
--- a/src/SPIN/min_spin_lbfgs.cpp
+++ b/src/SPIN/min_spin_lbfgs.cpp
@@ -56,7 +56,7 @@ static const char cite_minstyle_spin_lbfgs[] =
 
 #define EPS_ENERGY 1.0e-8
 
-#define DELAYSTEP 5
+static constexpr int DELAYSTEP = 5;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/SPIN/neb_spin.cpp b/src/SPIN/neb_spin.cpp
index e7ef9ff7ea..ac54f069a9 100644
--- a/src/SPIN/neb_spin.cpp
+++ b/src/SPIN/neb_spin.cpp
@@ -62,10 +62,11 @@ static const char cite_neb_spin[] =
   "doi={10.1016/j.cpc.2015.07.001}\n"
   "}\n\n";
 
-#define MAXLINE 256
-#define CHUNK 1024
+static constexpr int MAXLINE = 256;
+static constexpr int CHUNK = 1024;
+
 // 8 attributes: tag, spin norm, position (3), spin direction (3)
-#define ATTRIBUTE_PERLINE 8
+static constexpr int ATTRIBUTE_PERLINE = 8;
 
 /* ---------------------------------------------------------------------- */
 
@@ -374,7 +375,7 @@ void NEBSpin::readfile(char *file, int flag)
   int i,nchunk,eofflag,nlines;
   tagint tag;
   char *eof,*start,*next,*buf;
-  char line[MAXLINE];
+  char line[MAXLINE] = {'\0'};
   double musp,xx,yy,zz,spx,spy,spz;
 
   if (me_universe == 0 && universe->uscreen)
diff --git a/src/SRD/fix_srd.cpp b/src/SRD/fix_srd.cpp
index e96321a7a3..9b153a1c28 100644
--- a/src/SRD/fix_srd.cpp
+++ b/src/SRD/fix_srd.cpp
@@ -52,13 +52,13 @@ enum { BIG_MOVE, SRD_MOVE, SRD_ROTATE };
 enum { CUBIC_ERROR, CUBIC_WARN };
 enum { SHIFT_NO, SHIFT_YES, SHIFT_POSSIBLE };
 
-#define EINERTIA 0.2    // moment of inertia prefactor for ellipsoid
+static constexpr double EINERTIA = 0.2;    // moment of inertia prefactor for ellipsoid
 
-#define ATOMPERBIN 30
-#define BIG 1.0e20
-#define VBINSIZE 5
-#define TOLERANCE 0.00001
-#define MAXITER 20
+static constexpr int ATOMPERBIN = 30;
+static constexpr double BIG = 1.0e20;
+static constexpr int VBINSIZE = 5;
+static constexpr double TOLERANCE = 0.00001;
+static constexpr int MAXITER = 20;
 
 static const char cite_fix_srd[] =
     "fix srd command: doi:10.1063/1.3419070\n\n"
diff --git a/src/UEF/dump_cfg_uef.cpp b/src/UEF/dump_cfg_uef.cpp
index d72225b238..34ef655af9 100644
--- a/src/UEF/dump_cfg_uef.cpp
+++ b/src/UEF/dump_cfg_uef.cpp
@@ -26,9 +26,9 @@
 
 using namespace LAMMPS_NS;
 
-#define UNWRAPEXPAND 10.0
-#define ONEFIELD 32
-#define DELTA 1048576
+static constexpr double UNWRAPEXPAND = 10.0;
+static constexpr int ONEFIELD = 32;
+static constexpr int DELTA = 1048576;
 
 /* ----------------------------------------------------------------------
  * base method is mostly fine, just need to find the FixNHUef
diff --git a/src/VORONOI/compute_voronoi_atom.cpp b/src/VORONOI/compute_voronoi_atom.cpp
index b4f1aa3055..ca4ad4a85c 100644
--- a/src/VORONOI/compute_voronoi_atom.cpp
+++ b/src/VORONOI/compute_voronoi_atom.cpp
@@ -35,7 +35,7 @@
 using namespace LAMMPS_NS;
 using namespace voro;
 
-#define FACESDELTA 10000
+static constexpr int FACESDELTA = 10000;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/VTK/dump_vtk.cpp b/src/VTK/dump_vtk.cpp
index 172a092629..12fc4ad1ea 100644
--- a/src/VTK/dump_vtk.cpp
+++ b/src/VTK/dump_vtk.cpp
@@ -93,8 +93,8 @@ enum{X,Y,Z, // required for vtk, must come first
 enum{LT,LE,GT,GE,EQ,NEQ,XOR};
 enum{VTK,VTP,VTU,PVTP,PVTU}; // file formats
 
-#define ONEFIELD 32
-#define DELTA 1048576
+static constexpr int ONEFIELD = 32;
+static constexpr int DELTA = 1048576;
 
 #if (VTK_MAJOR_VERSION < 5) || (VTK_MAJOR_VERSION > 9)
 #error This code has only been tested with VTK 5, 6, 7, 8, and 9
@@ -2096,7 +2096,7 @@ int DumpVTK::modify_param(int narg, char **arg)
     if (refreshflag) error->all(FLERR,"Dump_modify can only have one refresh");
 
     refreshflag = 1;
-    refresh = argi.copy_name();
+    idrefresh = argi.copy_name();
     return 2;
   }
 
diff --git a/src/YAFF/angle_cross.cpp b/src/YAFF/angle_cross.cpp
index 46833b7f74..d3e127e935 100644
--- a/src/YAFF/angle_cross.cpp
+++ b/src/YAFF/angle_cross.cpp
@@ -32,7 +32,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/YAFF/angle_mm3.cpp b/src/YAFF/angle_mm3.cpp
index c75a0d8308..3ff7df1653 100644
--- a/src/YAFF/angle_mm3.cpp
+++ b/src/YAFF/angle_mm3.cpp
@@ -32,11 +32,14 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define SMALL 0.001
+static constexpr double SMALL = 0.001;
 
 /* ---------------------------------------------------------------------- */
 
-AngleMM3::AngleMM3(LAMMPS *lmp) : Angle(lmp) {}
+AngleMM3::AngleMM3(LAMMPS *lmp) : Angle(lmp)
+{
+  born_matrix_enable = 1;
+}
 
 /* ---------------------------------------------------------------------- */
 
@@ -284,3 +287,43 @@ double AngleMM3::single(int type, int i1, int i2, int i3)
 
   return energy;
 }
+
+/* ---------------------------------------------------------------------- */
+
+void AngleMM3::born_matrix(int type, int i1, int i2, int i3, double &du, double &du2)
+{
+  double **x = atom->x;
+
+  double delx1 = x[i1][0] - x[i2][0];
+  double dely1 = x[i1][1] - x[i2][1];
+  double delz1 = x[i1][2] - x[i2][2];
+  domain->minimum_image(delx1,dely1,delz1);
+  double r1 = sqrt(delx1*delx1 + dely1*dely1 + delz1*delz1);
+
+  double delx2 = x[i3][0] - x[i2][0];
+  double dely2 = x[i3][1] - x[i2][1];
+  double delz2 = x[i3][2] - x[i2][2];
+  domain->minimum_image(delx2,dely2,delz2);
+  double r2 = sqrt(delx2*delx2 + dely2*dely2 + delz2*delz2);
+
+  double c = delx1*delx2 + dely1*dely2 + delz1*delz2;
+  c /= r1*r2;
+  if (c > 1.0) c = 1.0;
+  if (c < -1.0) c = -1.0;
+  double theta = acos(c);
+
+  double s = sqrt(1.0 - c*c);
+  if (s < SMALL) s = SMALL;
+  s = 1.0/s;
+
+  double dtheta = theta - theta0[type];
+  double dtheta2 = dtheta*dtheta;
+  double dtheta3 = dtheta2*dtheta;
+  double dtheta4 = dtheta3*dtheta;
+  double dtheta5 = dtheta4*dtheta;
+  double df = 2.0 * dtheta - 2.406423 * dtheta2 + 0.735348 * dtheta3 - 0.65832 * dtheta4 + 1.42254 * dtheta5;
+  double d2f = 2.0 - 4.812846 * dtheta + 2.206044 * dtheta2 - 2.63328 * dtheta3 + 7.1127 * dtheta4;
+
+  du = -k2[type] * df / s;
+  du2 = k2[type] * (d2f - df  * c / s) / (s * s) ;
+}
diff --git a/src/YAFF/angle_mm3.h b/src/YAFF/angle_mm3.h
index 95009a9cf6..22f5bd746c 100644
--- a/src/YAFF/angle_mm3.h
+++ b/src/YAFF/angle_mm3.h
@@ -35,6 +35,7 @@ class AngleMM3 : public Angle {
   void read_restart(FILE *) override;
   void write_data(FILE *) override;
   double single(int, int, int, int) override;
+  void born_matrix(int type, int i1, int i2, int i3, double &du, double &du2) override;
 
  protected:
   double *theta0, *k2;
diff --git a/src/YAFF/bond_mm3.cpp b/src/YAFF/bond_mm3.cpp
index a5ef6fb8bc..31ce2dad3e 100644
--- a/src/YAFF/bond_mm3.cpp
+++ b/src/YAFF/bond_mm3.cpp
@@ -31,7 +31,10 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-BondMM3::BondMM3(LAMMPS *lmp) : Bond(lmp) {}
+BondMM3::BondMM3(LAMMPS *lmp) : Bond(lmp)
+{
+  born_matrix_enable = 1;
+}
 
 /* ---------------------------------------------------------------------- */
 
@@ -219,3 +222,19 @@ double BondMM3::single(int type, double rsq,
   else fforce = 0.0;
   return k2[type]*dr2*(1.0+K3*dr+K4*dr2);
 }
+
+/* ---------------------------------------------------------------------- */
+
+void BondMM3::born_matrix(int type, double rsq, int /*i*/, int /*j*/, double &du, double &du2)
+{
+  double r = sqrt(rsq);
+  double dr = r - r0[type];
+  double dr2 = dr * dr;
+  double dr3 = dr2 * dr;
+
+  double K3 = -2.55 * k2[type] /force->angstrom;
+  double K4 = 7.0 * k2[type] * 2.55 * 2.55 / (12.0 * force->angstrom * force->angstrom);
+
+  du = 2.0 * k2[type] * dr + 3.0 * K3 * dr2 + 4.0 * K4 * dr3;
+  du2 = 2.0 * k2[type] + 6.0 * K3 * dr + 12.0 * K4 * dr2;
+}
diff --git a/src/YAFF/bond_mm3.h b/src/YAFF/bond_mm3.h
index 302c4052d0..ea89ac826d 100644
--- a/src/YAFF/bond_mm3.h
+++ b/src/YAFF/bond_mm3.h
@@ -35,6 +35,7 @@ class BondMM3 : public Bond {
   void read_restart(FILE *) override;
   void write_data(FILE *) override;
   double single(int, double, int, int, double &) override;
+  void born_matrix(int, double, int, int, double &, double &) override;
 
  protected:
   double *r0, *k2;
diff --git a/src/YAFF/improper_distharm.cpp b/src/YAFF/improper_distharm.cpp
index 27516fa416..53658a1377 100644
--- a/src/YAFF/improper_distharm.cpp
+++ b/src/YAFF/improper_distharm.cpp
@@ -31,8 +31,8 @@
 
 using namespace LAMMPS_NS;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/YAFF/improper_sqdistharm.cpp b/src/YAFF/improper_sqdistharm.cpp
index 1cd8515d9a..e85f8e4c25 100644
--- a/src/YAFF/improper_sqdistharm.cpp
+++ b/src/YAFF/improper_sqdistharm.cpp
@@ -31,8 +31,8 @@
 
 using namespace LAMMPS_NS;
 
-#define TOLERANCE 0.05
-#define SMALL     0.001
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL =     0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/angle.cpp b/src/angle.cpp
index 93d217237e..79893cc52f 100644
--- a/src/angle.cpp
+++ b/src/angle.cpp
@@ -24,7 +24,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define FOURTH 0.25
+static constexpr double FOURTH = 0.25;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/angle_hybrid.cpp b/src/angle_hybrid.cpp
index 7419139942..e79776b0d2 100644
--- a/src/angle_hybrid.cpp
+++ b/src/angle_hybrid.cpp
@@ -24,7 +24,7 @@
 
 using namespace LAMMPS_NS;
 
-#define EXTRA 1000
+static constexpr int EXTRA = 1000;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/angle_write.cpp b/src/angle_write.cpp
index fb0e65ccf5..0a0c457000 100644
--- a/src/angle_write.cpp
+++ b/src/angle_write.cpp
@@ -35,7 +35,7 @@ using MathConst::DEG2RAD;
 using MathConst::RAD2DEG;
 
 static constexpr double epsilon = 6.5e-6;
-#define MAXLINE 1024
+static constexpr int MAXLINE = 1024;
 /* ---------------------------------------------------------------------- */
 
 void AngleWrite::command(int narg, char **arg)
@@ -147,7 +147,7 @@ void AngleWrite::command(int narg, char **arg)
     writer->input->one("mass * 1.0");
     writer->input->one(fmt::format("angle_style {}", force->angle_style));
     FILE *coeffs;
-    char line[MAXLINE];
+    char line[MAXLINE] = {'\0'};
     coeffs = fopen(coeffs_file.c_str(), "r");
     for (int i = 0; i < atom->nangletypes; ++i) {
       fgets(line, MAXLINE, coeffs);
diff --git a/src/atom.cpp b/src/atom.cpp
index b604c54e6b..f7f61d6ced 100644
--- a/src/atom.cpp
+++ b/src/atom.cpp
@@ -26,6 +26,7 @@
 #include "input.h"
 #include "label_map.h"
 #include "math_const.h"
+#include "math_extra.h"
 #include "memory.h"
 #include "modify.h"
 #include "molecule.h"
@@ -47,9 +48,9 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define DELTA 1
-#define EPSILON 1.0e-6
-#define MAXLINE 256
+static constexpr int DELTA = 1;
+static constexpr double EPSILON = 1.0e-6;
+static constexpr int MAXLINE = 256;
 
 /* ----------------------------------------------------------------------
    one instance per AtomVec style in style_atom.h
@@ -234,6 +235,7 @@ Atom::Atom(LAMMPS *_lmp) : Pointers(_lmp), atom_style(nullptr), avec(nullptr), a
   darray = nullptr;
   icols = dcols = nullptr;
   ivname = dvname = ianame = daname = nullptr;
+  ivghost = dvghost = iaghost = daghost = nullptr;
 
   // initialize atom style and array existence flags
 
@@ -333,6 +335,10 @@ Atom::~Atom()
   memory->sfree(darray);
   memory->sfree(icols);
   memory->sfree(dcols);
+  memory->destroy(ivghost);
+  memory->destroy(dvghost);
+  memory->destroy(iaghost);
+  memory->destroy(daghost);
 
   // delete user-defined molecules
 
@@ -618,7 +624,7 @@ void Atom::set_atomflag_defaults()
   // identical list as 2nd customization in atom.h
 
   labelmapflag = 0;
-  sphere_flag = ellipsoid_flag = line_flag = tri_flag = body_flag = 0;
+  ellipsoid_flag = line_flag = tri_flag = body_flag = 0;
   quat_flag = 0;
   peri_flag = electron_flag = 0;
   wavepacket_flag = sph_flag = 0;
@@ -2112,6 +2118,15 @@ std::vector<Molecule *>Atom::get_molecule_by_id(const std::string &id)
 void Atom::add_molecule_atom(Molecule *onemol, int iatom, int ilocal, tagint offset)
 {
   if (onemol->qflag && q_flag) q[ilocal] = onemol->q[iatom];
+  if (onemol->muflag && mu_flag) {
+    double r[3], rotmat[3][3];
+    MathExtra::quat_to_mat(onemol->quat_external, rotmat);
+    MathExtra::matvec(rotmat, onemol->mu[iatom], r);
+    mu[ilocal][0] = r[0];
+    mu[ilocal][1] = r[1];
+    mu[ilocal][2] = r[2];
+    mu[ilocal][3] = sqrt(r[0] * r[0] + r[1] * r[1] + r[2] * r[2]);
+  }
   if (onemol->radiusflag && radius_flag) radius[ilocal] = onemol->radius[iatom];
   if (onemol->rmassflag && rmass_flag) rmass[ilocal] = onemol->rmass[iatom];
   else if (rmass_flag)
@@ -2599,6 +2614,18 @@ void Atom::update_callback(int ifix)
     if (extra_border[i] > ifix) extra_border[i]--;
 }
 
+/** \brief Find a custom per-atom property with given name
+\verbatim embed:rst
+
+This function returns the list index of a custom per-atom property
+with the name "name", also returning by reference its data type and
+number of values per atom.
+\endverbatim
+ * \param name Name of the property (w/o a "i_" or "d_" or "i2_" or "d2_" prefix)
+ * \param &flag Returns data type of property: 0 for int, 1 for double
+ * \param &cols Returns number of values: 0 for a single value, 1 or more for a vector of values
+ * \return index of property in the respective list of properties
+ */
 /* ----------------------------------------------------------------------
    find custom per-atom vector with name
    return index if found, -1 if not found
@@ -2642,6 +2669,33 @@ int Atom::find_custom(const char *name, int &flag, int &cols)
   return -1;
 }
 
+/** \brief Find a custom per-atom property with given name and retrieve ghost property
+\verbatim embed:rst
+
+This function returns the list index of a custom per-atom property
+with the name "name", also returning by reference its data type,
+number of values per atom, and if it is communicated to ghost particles.
+Classes rarely need to check on ghost communication and so `find_custom`
+is typically preferred to this function. See :doc:`pair amoeba <pair_amoeba>`
+for an example where checking ghost communication is necessary.
+\endverbatim
+ * \param name Name of the property (w/o a "i_" or "d_" or "i2_" or "d2_" prefix)
+ * \param &flag Returns data type of property: 0 for int, 1 for double
+ * \param &cols Returns number of values: 0 for a single value, 1 or more for a vector of values
+ * \param &ghost Returns whether property is communicated to ghost atoms: 0 for no, 1 for yes
+ * \return index of property in the respective list of properties
+ */
+int Atom::find_custom_ghost(const char *name, int &flag, int &cols, int &ghost)
+{
+  int i = find_custom(name, flag, cols);
+  if (i == -1) return i;
+  if ((flag == 0) && (cols == 0)) ghost = ivghost[i];
+  else if ((flag == 1) && (cols == 0)) ghost = dvghost[i];
+  else if ((flag == 0) && (cols == 1)) ghost = iaghost[i];
+  else if ((flag == 1) && (cols == 1)) ghost = daghost[i];
+  return i;
+}
+
 /** \brief Add a custom per-atom property with the given name and type and size
 \verbatim embed:rst
 
@@ -2652,9 +2706,10 @@ This function is called, e.g. from :doc:`fix property/atom <fix_property_atom>`.
  * \param name Name of the property (w/o a "i_" or "d_" or "i2_" or "d2_" prefix)
  * \param flag Data type of property: 0 for int, 1 for double
  * \param cols Number of values: 0 for a single value, 1 or more for a vector of values
+ * \param ghost Whether property is communicated to ghost atoms: 0 for no, 1 for yes
  * \return index of property in the respective list of properties
  */
-int Atom::add_custom(const char *name, int flag, int cols)
+int Atom::add_custom(const char *name, int flag, int cols, int ghost)
 {
   int index = -1;
 
@@ -2663,6 +2718,8 @@ int Atom::add_custom(const char *name, int flag, int cols)
     nivector++;
     ivname = (char **) memory->srealloc(ivname,nivector*sizeof(char *),"atom:ivname");
     ivname[index] = utils::strdup(name);
+    ivghost = (int *) memory->srealloc(ivghost,nivector*sizeof(int),"atom:ivghost");
+    ivghost[index] = ghost;
     ivector = (int **) memory->srealloc(ivector,nivector*sizeof(int *),"atom:ivector");
     memory->create(ivector[index],nmax,"atom:ivector");
 
@@ -2671,6 +2728,8 @@ int Atom::add_custom(const char *name, int flag, int cols)
     ndvector++;
     dvname = (char **) memory->srealloc(dvname,ndvector*sizeof(char *),"atom:dvname");
     dvname[index] = utils::strdup(name);
+    dvghost = (int *) memory->srealloc(dvghost,ndvector*sizeof(int),"atom:dvghost");
+    dvghost[index] = ghost;
     dvector = (double **) memory->srealloc(dvector,ndvector*sizeof(double *),"atom:dvector");
     memory->create(dvector[index],nmax,"atom:dvector");
 
@@ -2679,6 +2738,8 @@ int Atom::add_custom(const char *name, int flag, int cols)
     niarray++;
     ianame = (char **) memory->srealloc(ianame,niarray*sizeof(char *),"atom:ianame");
     ianame[index] = utils::strdup(name);
+    iaghost = (int *) memory->srealloc(iaghost,niarray*sizeof(int),"atom:iaghost");
+    iaghost[index] = ghost;
     iarray = (int ***) memory->srealloc(iarray,niarray*sizeof(int **),"atom:iarray");
     memory->create(iarray[index],nmax,cols,"atom:iarray");
     icols = (int *) memory->srealloc(icols,niarray*sizeof(int),"atom:icols");
@@ -2689,6 +2750,8 @@ int Atom::add_custom(const char *name, int flag, int cols)
     ndarray++;
     daname = (char **) memory->srealloc(daname,ndarray*sizeof(char *),"atom:daname");
     daname[index] = utils::strdup(name);
+    daghost = (int *) memory->srealloc(daghost,ndarray*sizeof(int),"atom:daghost");
+    daghost[index] = ghost;
     darray = (double ***) memory->srealloc(darray,ndarray*sizeof(double **),"atom:darray");
     memory->create(darray[index],nmax,cols,"atom:darray");
     dcols = (int *) memory->srealloc(dcols,ndarray*sizeof(int),"atom:dcols");
@@ -2697,6 +2760,7 @@ int Atom::add_custom(const char *name, int flag, int cols)
 
   if (index < 0)
     error->all(FLERR,"Invalid call to Atom::add_custom()");
+
   return index;
 }
 
diff --git a/src/atom.h b/src/atom.h
index 548168ac59..f238b2d5b1 100644
--- a/src/atom.h
+++ b/src/atom.h
@@ -180,7 +180,7 @@ class Atom : protected Pointers {
   // 1 if variable is used, 0 if not
 
   int labelmapflag, types_style;
-  int sphere_flag, ellipsoid_flag, line_flag, tri_flag, body_flag;
+  int ellipsoid_flag, line_flag, tri_flag, body_flag;
   int peri_flag, electron_flag;
   int wavepacket_flag, sph_flag;
 
@@ -242,6 +242,7 @@ class Atom : protected Pointers {
   int *icols, *dcols;
   char **ivname, **dvname, **ianame, **daname;
   int nivector, ndvector, niarray, ndarray;
+  int *ivghost, *dvghost, *iaghost, *daghost;
 
   // molecule templates
   // each template can be a set of consecutive molecules
@@ -363,7 +364,8 @@ class Atom : protected Pointers {
   void update_callback(int);
 
   int find_custom(const char *, int &, int &);
-  virtual int add_custom(const char *, int, int);
+  int find_custom_ghost(const char *, int &, int &, int &);
+  virtual int add_custom(const char *, int, int, int ghost = 0);
   virtual void remove_custom(int, int, int);
 
   void *extract(const char *);
diff --git a/src/atom_map.cpp b/src/atom_map.cpp
index c28b886335..37b46182c1 100644
--- a/src/atom_map.cpp
+++ b/src/atom_map.cpp
@@ -22,7 +22,7 @@
 
 using namespace LAMMPS_NS;
 
-#define EXTRA 1000
+static constexpr int EXTRA = 1000;
 
 /* ----------------------------------------------------------------------
    allocate and initialize array or hash table for global -> local map
diff --git a/src/atom_vec_line.cpp b/src/atom_vec_line.cpp
index ff09bed6d0..cd747d0862 100644
--- a/src/atom_vec_line.cpp
+++ b/src/atom_vec_line.cpp
@@ -44,7 +44,6 @@ AtomVecLine::AtomVecLine(LAMMPS *lmp) : AtomVec(lmp)
   atom->line_flag = 1;
   atom->molecule_flag = atom->rmass_flag = 1;
   atom->radius_flag = atom->omega_flag = atom->torque_flag = 1;
-  atom->sphere_flag = 1;
 
   nlocal_bonus = nghost_bonus = nmax_bonus = 0;
   bonus = nullptr;
diff --git a/src/atom_vec_sphere.cpp b/src/atom_vec_sphere.cpp
index 8769c316d9..3e8c2fd2a3 100644
--- a/src/atom_vec_sphere.cpp
+++ b/src/atom_vec_sphere.cpp
@@ -32,7 +32,6 @@ AtomVecSphere::AtomVecSphere(LAMMPS *lmp) : AtomVec(lmp)
   molecular = Atom::ATOMIC;
   radvary = 0;
 
-  atom->sphere_flag = 1;
   atom->radius_flag = atom->rmass_flag = atom->omega_flag = atom->torque_flag = 1;
 
   // strings with peratom variables to include in each AtomVec method
@@ -60,13 +59,10 @@ AtomVecSphere::AtomVecSphere(LAMMPS *lmp) : AtomVec(lmp)
 
 void AtomVecSphere::process_args(int narg, char **arg)
 {
-  if (narg != 0 && narg != 1) error->all(FLERR, "Illegal atom_style sphere command");
+  if (narg > 1) error->all(FLERR, "Illegal atom_style sphere command");
 
   radvary = 0;
-  if (narg == 1) {
-    radvary = utils::numeric(FLERR, arg[0], true, lmp);
-    if (radvary < 0 || radvary > 1) error->all(FLERR, "Illegal atom_style sphere command");
-  }
+  if (narg == 1) radvary = utils::logical(FLERR, arg[0], true, lmp);
 
   // dynamic particle radius and mass must be communicated every step
 
diff --git a/src/atom_vec_tri.cpp b/src/atom_vec_tri.cpp
index a46609b02c..205e94d792 100644
--- a/src/atom_vec_tri.cpp
+++ b/src/atom_vec_tri.cpp
@@ -47,7 +47,6 @@ AtomVecTri::AtomVecTri(LAMMPS *lmp) : AtomVec(lmp)
   atom->molecule_flag = atom->rmass_flag = 1;
   atom->radius_flag = atom->omega_flag = atom->angmom_flag = 1;
   atom->torque_flag = 1;
-  atom->sphere_flag = 1;
 
   nlocal_bonus = nghost_bonus = nmax_bonus = 0;
   bonus = nullptr;
diff --git a/src/bond_hybrid.cpp b/src/bond_hybrid.cpp
index 4e477ab3a6..401358dda0 100644
--- a/src/bond_hybrid.cpp
+++ b/src/bond_hybrid.cpp
@@ -24,7 +24,7 @@
 
 using namespace LAMMPS_NS;
 
-#define EXTRA 1000
+static constexpr int EXTRA = 1000;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/comm.cpp b/src/comm.cpp
index a6ac1c4bc8..1293dd3d6d 100644
--- a/src/comm.cpp
+++ b/src/comm.cpp
@@ -42,7 +42,7 @@
 
 using namespace LAMMPS_NS;
 
-#define BUFEXTRA 1024
+static constexpr int BUFEXTRA = 1024;
 
 enum{ONELEVEL,TWOLEVEL,NUMA,CUSTOM};
 enum{CART,CARTREORDER,XYZ};
diff --git a/src/comm_brick.cpp b/src/comm_brick.cpp
index 08d372187e..cf38271029 100644
--- a/src/comm_brick.cpp
+++ b/src/comm_brick.cpp
@@ -35,9 +35,9 @@
 
 using namespace LAMMPS_NS;
 
-#define BUFFACTOR 1.5
-#define BUFMIN 1024
-#define BIG 1.0e20
+static constexpr double BUFFACTOR = 1.5;
+static constexpr int BUFMIN = 1024;
+static constexpr double BIG = 1.0e20;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/comm_tiled.cpp b/src/comm_tiled.cpp
index f2d91c07fa..6c44af686f 100644
--- a/src/comm_tiled.cpp
+++ b/src/comm_tiled.cpp
@@ -36,10 +36,9 @@
 
 using namespace LAMMPS_NS;
 
-#define BUFFACTOR 1.5
-#define BUFFACTOR 1.5
-#define BUFMIN 1024
-#define EPSILON 1.0e-6
+static constexpr double BUFFACTOR = 1.5;
+static constexpr int BUFMIN = 1024;
+static constexpr double EPSILON = 1.0e-6;
 
 #define DELTA_PROCS 16
 
diff --git a/src/compute.cpp b/src/compute.cpp
index 2bd1544fd7..d6ac382151 100644
--- a/src/compute.cpp
+++ b/src/compute.cpp
@@ -26,8 +26,8 @@
 
 using namespace LAMMPS_NS;
 
-#define DELTA 4
-#define BIG MAXTAGINT
+static constexpr int DELTA = 4;
+static constexpr double BIG = MAXTAGINT;
 
 // allocate space for static class instance variable and initialize it
 
@@ -83,7 +83,7 @@ Compute::Compute(LAMMPS *lmp, int narg, char **arg) :
 
   extra_dof = domain->dimension;
   dynamic_user = 0;
-  fix_dof = 0;
+  fix_dof = 0.0;
 
   // setup list of timesteps
 
diff --git a/src/compute.h b/src/compute.h
index 8ae01a4469..6956c3ae99 100644
--- a/src/compute.h
+++ b/src/compute.h
@@ -178,7 +178,7 @@ class Compute : protected Pointers {
 
   double natoms_temp;    // # of atoms used for temperature calculation
   double extra_dof;      // extra DOF for temperature computes
-  int fix_dof;           // DOF due to fixes
+  double fix_dof;        // DOF due to fixes
   int dynamic;           // recount atoms for temperature computes
   int dynamic_user;      // user request for temp compute to be dynamic
 
diff --git a/src/compute_angle_local.cpp b/src/compute_angle_local.cpp
index 3600562664..3e8b15fd64 100644
--- a/src/compute_angle_local.cpp
+++ b/src/compute_angle_local.cpp
@@ -31,7 +31,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define DELTA 10000
+static constexpr int DELTA = 10000;
 
 enum{THETA,ENG,VARIABLE};
 
diff --git a/src/compute_bond_local.cpp b/src/compute_bond_local.cpp
index f2603e8cdd..143539d435 100644
--- a/src/compute_bond_local.cpp
+++ b/src/compute_bond_local.cpp
@@ -31,8 +31,8 @@
 
 using namespace LAMMPS_NS;
 
-#define DELTA 10000
-#define EPSILON 1.0e-12
+static constexpr int DELTA = 10000;
+static constexpr double EPSILON = 1.0e-12;
 
 enum{DIST,DX,DY,DZ,VELVIB,OMEGA,ENGTRANS,ENGVIB,ENGROT,ENGPOT,FORCE,FX,FY,FZ,VARIABLE,BN};
 
diff --git a/src/compute_centroid_stress_atom.cpp b/src/compute_centroid_stress_atom.cpp
index 5226af1998..c6854737a8 100644
--- a/src/compute_centroid_stress_atom.cpp
+++ b/src/compute_centroid_stress_atom.cpp
@@ -303,8 +303,8 @@ void ComputeCentroidStressAtom::compute_peratom()
   // add in per-atom contributions from relevant fixes
   // skip if vatom = nullptr
   // possible during setup phase if fix has not initialized its vatom yet
-  // e.g. fix ave/spatial defined before fix shake,
-  //   and fix ave/spatial uses a per-atom stress from this compute as input
+  // e.g. fix ave/chunk defined before fix shake,
+  //   and fix ave/chunk uses a per-atom stress from this compute as input
   // fix styles are CENTROID_SAME, CENTROID_AVAIL or CENTROID_NOTAVAIL
 
   if (fixflag) {
diff --git a/src/compute_chunk_atom.cpp b/src/compute_chunk_atom.cpp
index fc70a3246f..a1d595a086 100644
--- a/src/compute_chunk_atom.cpp
+++ b/src/compute_chunk_atom.cpp
@@ -46,7 +46,7 @@ enum { NODISCARD, MIXED, YESDISCARD };
 enum { ONCE, NFREQ, EVERY };    // used in several files
 enum { LIMITMAX, LIMITEXACT };
 
-#define IDMAX (1024 * 1024)
+static constexpr int IDMAX = (1024 * 1024);
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/compute_dihedral_local.cpp b/src/compute_dihedral_local.cpp
index a6bcbccf00..6543c06d20 100644
--- a/src/compute_dihedral_local.cpp
+++ b/src/compute_dihedral_local.cpp
@@ -30,8 +30,8 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define DELTA 10000
-#define SMALL 0.001
+static constexpr int DELTA = 10000;
+static constexpr double SMALL = 0.001;
 
 enum{PHI,VARIABLE};
 
diff --git a/src/compute_erotate_sphere.cpp b/src/compute_erotate_sphere.cpp
index b020fc4e0e..cb92b73731 100644
--- a/src/compute_erotate_sphere.cpp
+++ b/src/compute_erotate_sphere.cpp
@@ -20,7 +20,7 @@
 
 using namespace LAMMPS_NS;
 
-#define INERTIA 0.4    // moment of inertia prefactor for sphere
+static constexpr double INERTIA = 0.4;    // moment of inertia prefactor for sphere
 
 /* ---------------------------------------------------------------------- */
 
@@ -34,7 +34,7 @@ ComputeERotateSphere::ComputeERotateSphere(LAMMPS *lmp, int narg, char **arg) :
 
   // error check
 
-  if (!atom->sphere_flag) error->all(FLERR, "Compute erotate/sphere requires atom style sphere");
+  if (!atom->omega_flag) error->all(FLERR, "Compute erotate/sphere requires atom attribute omega");
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/compute_erotate_sphere_atom.cpp b/src/compute_erotate_sphere_atom.cpp
index 3ec0f402a8..b1aca68614 100644
--- a/src/compute_erotate_sphere_atom.cpp
+++ b/src/compute_erotate_sphere_atom.cpp
@@ -23,7 +23,7 @@
 
 using namespace LAMMPS_NS;
 
-#define INERTIA 0.4          // moment of inertia prefactor for sphere
+static constexpr double INERTIA = 0.4;          // moment of inertia prefactor for sphere
 
 /* ---------------------------------------------------------------------- */
 
@@ -40,8 +40,10 @@ ComputeErotateSphereAtom(LAMMPS *lmp, int narg, char **arg) :
 
   // error check
 
-  if (!atom->sphere_flag)
-    error->all(FLERR,"Compute erotate/sphere/atom requires atom style sphere");
+  if (!atom->omega_flag)
+    error->all(FLERR,"Compute erotate/sphere/atom requires atom attribute omega");
+  if (!atom->radius_flag)
+    error->all(FLERR,"Compute erotate/sphere/atom requires atom attribute radius");
 
   nmax = 0;
 }
diff --git a/src/compute_group_group.cpp b/src/compute_group_group.cpp
index 31bbc81597..afc825ac3c 100644
--- a/src/compute_group_group.cpp
+++ b/src/compute_group_group.cpp
@@ -37,7 +37,7 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define SMALL 0.00001
+static constexpr double SMALL = 0.00001;
 
 enum { OFF, INTER, INTRA };
 
diff --git a/src/compute_heat_flux.cpp b/src/compute_heat_flux.cpp
index 55fa27cafe..1b6fd27086 100644
--- a/src/compute_heat_flux.cpp
+++ b/src/compute_heat_flux.cpp
@@ -28,7 +28,6 @@
 
 using namespace LAMMPS_NS;
 
-
 /* ---------------------------------------------------------------------- */
 
 ComputeHeatFlux::ComputeHeatFlux(LAMMPS *lmp, int narg, char **arg) :
@@ -45,22 +44,24 @@ ComputeHeatFlux::ComputeHeatFlux(LAMMPS *lmp, int narg, char **arg) :
   // ensure they are valid for these computations
 
   id_ke = utils::strdup(arg[3]);
-  id_pe = utils::strdup(arg[4]);
-  id_stress = utils::strdup(arg[5]);
+  auto ike = modify->get_compute_by_id(id_ke);
+  if (!ike) error->all(FLERR,"Could not find compute heat/flux compute ID {}", id_ke);
+  if (!utils::strmatch(ike->style,"^ke/atom"))
+    error->all(FLERR,"Compute heat/flux compute ID {} does not compute ke/atom", id_ke);
 
-  int ike = modify->find_compute(id_ke);
-  int ipe = modify->find_compute(id_pe);
-  int istress = modify->find_compute(id_stress);
-  if (ike < 0 || ipe < 0 || istress < 0)
-    error->all(FLERR,"Could not find compute heat/flux compute ID");
-  if (strcmp(modify->compute[ike]->style,"ke/atom") != 0)
-    error->all(FLERR,"Compute heat/flux compute ID does not compute ke/atom");
-  if (modify->compute[ipe]->peatomflag == 0)
-    error->all(FLERR,"Compute heat/flux compute ID does not compute pe/atom");
-  if (modify->compute[istress]->pressatomflag != 1
-      && modify->compute[istress]->pressatomflag != 2)
+  id_pe = utils::strdup(arg[4]);
+  auto ipe = modify->get_compute_by_id(id_pe);
+  if (!ipe) error->all(FLERR,"Could not find compute heat/flux compute ID {}", id_pe);
+  if (ipe->peatomflag == 0)
+    error->all(FLERR,"Compute heat/flux compute ID {} does not compute pe/atom", id_pe);
+
+  id_stress = utils::strdup(arg[5]);
+  auto istress = modify->get_compute_by_id(id_stress);
+  if (!istress) error->all(FLERR,"Could not find compute heat/flux compute ID {}", id_stress);
+  if ((istress->pressatomflag != 1) && (istress->pressatomflag != 2))
     error->all(FLERR,
-               "Compute heat/flux compute ID does not compute stress/atom or centroid/stress/atom");
+               "Compute heat/flux compute ID {} does not compute stress/atom or "
+               "centroid/stress/atom", id_stress);
 
   vector = new double[size_vector];
 }
@@ -69,10 +70,10 @@ ComputeHeatFlux::ComputeHeatFlux(LAMMPS *lmp, int narg, char **arg) :
 
 ComputeHeatFlux::~ComputeHeatFlux()
 {
-  delete [] id_ke;
-  delete [] id_pe;
-  delete [] id_stress;
-  delete [] vector;
+  delete[] id_ke;
+  delete[] id_pe;
+  delete[] id_stress;
+  delete[] vector;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -81,15 +82,12 @@ void ComputeHeatFlux::init()
 {
   // error checks
 
-  int ike = modify->find_compute(id_ke);
-  int ipe = modify->find_compute(id_pe);
-  int istress = modify->find_compute(id_stress);
-  if (ike < 0 || ipe < 0 || istress < 0)
-    error->all(FLERR,"Could not find compute heat/flux compute ID");
-
-  c_ke = modify->compute[ike];
-  c_pe = modify->compute[ipe];
-  c_stress = modify->compute[istress];
+  c_ke = modify->get_compute_by_id(id_ke);
+  if (!c_ke) error->all(FLERR,"Could not find compute heat/flux compute ID {}", id_ke);
+  c_pe = modify->get_compute_by_id(id_pe);
+  if (!c_pe) error->all(FLERR,"Could not find compute heat/flux compute ID {}", id_pe);
+  c_stress = modify->get_compute_by_id(id_stress);
+  if (!c_stress) error->all(FLERR,"Could not find compute heat/flux compute ID {}", id_stress);
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/compute_improper_local.cpp b/src/compute_improper_local.cpp
index 48070a4f7c..a58f4f4d0d 100644
--- a/src/compute_improper_local.cpp
+++ b/src/compute_improper_local.cpp
@@ -28,9 +28,9 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define DELTA 10000
+static constexpr int DELTA = 10000;
 
-#define SMALL     0.001
+static constexpr double SMALL =     0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/compute_omega_chunk.cpp b/src/compute_omega_chunk.cpp
index 3c345ab7a4..1a8852b144 100644
--- a/src/compute_omega_chunk.cpp
+++ b/src/compute_omega_chunk.cpp
@@ -23,7 +23,7 @@
 
 using namespace LAMMPS_NS;
 
-#define EPSILON 1.0e-6
+static constexpr double EPSILON = 1.0e-6;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/compute_pair.cpp b/src/compute_pair.cpp
index 2788b632d2..1cb22a006f 100644
--- a/src/compute_pair.cpp
+++ b/src/compute_pair.cpp
@@ -30,7 +30,7 @@ enum { EPAIR, EVDWL, ECOUL };
 ComputePair::ComputePair(LAMMPS *lmp, int narg, char **arg) :
     Compute(lmp, narg, arg), pstyle(nullptr), pair(nullptr), one(nullptr)
 {
-  if (narg < 4) error->all(FLERR, "Illegal compute pair command");
+  if (narg < 4) utils::missing_cmd_args(FLERR, "compute pair", error);
 
   scalar_flag = 1;
   extscalar = 1;
@@ -63,7 +63,7 @@ ComputePair::ComputePair(LAMMPS *lmp, int narg, char **arg) :
     else if (strcmp(arg[iarg], "ecoul") == 0)
       evalue = ECOUL;
     else
-      error->all(FLERR, "Illegal compute pair command");
+      error->all(FLERR, "Unknown compute pair keyword {}", arg[iarg]);
     ++iarg;
   }
 
@@ -75,7 +75,7 @@ ComputePair::ComputePair(LAMMPS *lmp, int narg, char **arg) :
     pair = force->pair_match(pstyle, 1, nsub);
   }
 
-  if (!pair) error->all(FLERR, "Unrecognized pair style in compute pair command");
+  if (!pair) error->all(FLERR, "Unused pair style {} in compute pair command", pstyle);
   npair = pair->nextra;
 
   if (npair) {
@@ -104,7 +104,7 @@ void ComputePair::init()
   // recheck for pair style in case it has been deleted
 
   pair = force->pair_match(pstyle, 1, nsub);
-  if (!pair) error->all(FLERR, "Unrecognized pair style in compute pair command");
+  if (!pair) error->all(FLERR, "Unrecognized pair style {} in compute pair command", pstyle);
 }
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/compute_pair_local.cpp b/src/compute_pair_local.cpp
index 5dad405bc3..88991f7481 100644
--- a/src/compute_pair_local.cpp
+++ b/src/compute_pair_local.cpp
@@ -28,7 +28,7 @@
 
 using namespace LAMMPS_NS;
 
-#define DELTA 10000
+static constexpr int DELTA = 10000;
 
 enum { DIST, ENG, FORCE, FX, FY, FZ, PN, DX, DY, DZ };
 enum { TYPE, RADIUS };
diff --git a/src/compute_property_grid.cpp b/src/compute_property_grid.cpp
index 17f9689bf6..6e316de3f8 100644
--- a/src/compute_property_grid.cpp
+++ b/src/compute_property_grid.cpp
@@ -28,7 +28,7 @@ using namespace LAMMPS_NS;
 enum { LOW, CTR };
 enum { UNSCALED, SCALED };
 
-#define DELTA 10000
+static constexpr int DELTA = 10000;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/compute_property_local.cpp b/src/compute_property_local.cpp
index 87517a3e05..64f3859117 100644
--- a/src/compute_property_local.cpp
+++ b/src/compute_property_local.cpp
@@ -31,7 +31,7 @@ using namespace LAMMPS_NS;
 enum { NONE, NEIGH, PAIR, BOND, ANGLE, DIHEDRAL, IMPROPER };
 enum { TYPE, RADIUS };
 
-#define DELTA 10000
+static constexpr int DELTA = 10000;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/compute_reduce.cpp b/src/compute_reduce.cpp
index b296804248..ee94c2d9a7 100644
--- a/src/compute_reduce.cpp
+++ b/src/compute_reduce.cpp
@@ -31,7 +31,7 @@
 
 using namespace LAMMPS_NS;
 
-#define BIG 1.0e20
+static constexpr double BIG = 1.0e20;
 
 //----------------------------------------------------------------
 
diff --git a/src/compute_reduce_chunk.cpp b/src/compute_reduce_chunk.cpp
index 51781eac7b..9ba0e30a8d 100644
--- a/src/compute_reduce_chunk.cpp
+++ b/src/compute_reduce_chunk.cpp
@@ -31,7 +31,7 @@ using namespace LAMMPS_NS;
 
 enum { SUM, MINN, MAXX };
 
-#define BIG 1.0e20
+static constexpr double BIG = 1.0e20;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/compute_stress_atom.cpp b/src/compute_stress_atom.cpp
index 55824b1ce5..3560570163 100644
--- a/src/compute_stress_atom.cpp
+++ b/src/compute_stress_atom.cpp
@@ -54,11 +54,11 @@ ComputeStressAtom::ComputeStressAtom(LAMMPS *lmp, int narg, char **arg) :
     id_temp = nullptr;
   else {
     id_temp = utils::strdup(arg[3]);
-
-    int icompute = modify->find_compute(id_temp);
-    if (icompute < 0) error->all(FLERR, "Could not find compute stress/atom temperature ID");
-    if (modify->compute[icompute]->tempflag == 0)
-      error->all(FLERR, "Compute stress/atom temperature ID does not compute temperature");
+    auto icompute = modify->get_compute_by_id(id_temp);
+    if (!icompute)
+      error->all(FLERR, "Could not find compute stress/atom temperature compute {}", id_temp);
+    if (icompute->tempflag == 0)
+      error->all(FLERR, "Compute stress/atom compute {} does not compute temperature", id_temp);
   }
 
   // process optional args
@@ -122,9 +122,9 @@ void ComputeStressAtom::init()
   // fixes could have changed or compute_modify could have changed it
 
   if (id_temp) {
-    int icompute = modify->find_compute(id_temp);
-    if (icompute < 0) error->all(FLERR, "Could not find compute stress/atom temperature ID");
-    temperature = modify->compute[icompute];
+    temperature = modify->get_compute_by_id(id_temp);
+    if (!temperature)
+      error->all(FLERR, "Could not find compute stress/atom temperature compute {}", id_temp);
     if (temperature->tempbias)
       biasflag = BIAS;
     else
@@ -216,8 +216,8 @@ void ComputeStressAtom::compute_peratom()
   // add in per-atom contributions from relevant fixes
   // skip if vatom = nullptr
   // possible during setup phase if fix has not initialized its vatom yet
-  // e.g. fix ave/spatial defined before fix shake,
-  //   and fix ave/spatial uses a per-atom stress from this compute as input
+  // e.g. fix ave/chunk defined before fix shake,
+  //   and fix ave/chunk uses a per-atom stress from this compute as input
 
   if (fixflag) {
     for (auto &ifix : modify->get_fix_list())
diff --git a/src/compute_temp_sphere.cpp b/src/compute_temp_sphere.cpp
index 0cfc9a93ba..2294177e6f 100644
--- a/src/compute_temp_sphere.cpp
+++ b/src/compute_temp_sphere.cpp
@@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    https://www.lammps.org/, Sandia National Laboratories
@@ -14,28 +13,28 @@
 
 #include "compute_temp_sphere.h"
 
-#include <cstring>
 #include "atom.h"
-#include "update.h"
-#include "force.h"
 #include "domain.h"
-#include "modify.h"
-#include "group.h"
 #include "error.h"
+#include "force.h"
+#include "group.h"
+#include "modify.h"
+#include "update.h"
+
+#include <cstring>
 
 using namespace LAMMPS_NS;
 
-enum{ROTATE,ALL};
+enum { ROTATE, ALL };
 
-#define INERTIA 0.4          // moment of inertia prefactor for sphere
+static constexpr double INERTIA = 0.4;    // moment of inertia prefactor for sphere
 
 /* ---------------------------------------------------------------------- */
 
 ComputeTempSphere::ComputeTempSphere(LAMMPS *lmp, int narg, char **arg) :
-  Compute(lmp, narg, arg),
-  id_bias(nullptr)
+    Compute(lmp, narg, arg), id_bias(nullptr)
 {
-  if (narg < 3) error->all(FLERR,"Illegal compute temp/sphere command");
+  if (narg < 3) utils::missing_cmd_args(FLERR, "compute temp/sphere", error);
 
   scalar_flag = vector_flag = 1;
   size_vector = 6;
@@ -48,20 +47,22 @@ ComputeTempSphere::ComputeTempSphere(LAMMPS *lmp, int narg, char **arg) :
 
   int iarg = 3;
   while (iarg < narg) {
-    if (strcmp(arg[iarg],"bias") == 0) {
-      if (iarg+2 > narg)
-        error->all(FLERR,"Illegal compute temp/sphere command");
+    if (strcmp(arg[iarg], "bias") == 0) {
+      if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "compute temp/sphere bias", error);
       tempbias = 1;
-      id_bias = utils::strdup(arg[iarg+1]);
+      id_bias = utils::strdup(arg[iarg + 1]);
       iarg += 2;
-    } else if (strcmp(arg[iarg],"dof") == 0) {
-      if (iarg+2 > narg)
-        error->all(FLERR,"Illegal compute temp/sphere command");
-      if (strcmp(arg[iarg+1],"rotate") == 0) mode = ROTATE;
-      else if (strcmp(arg[iarg+1],"all") == 0) mode = ALL;
-      else error->all(FLERR,"Illegal compute temp/sphere command");
+    } else if (strcmp(arg[iarg], "dof") == 0) {
+      if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "compute temp/sphere dof", error);
+      if (strcmp(arg[iarg + 1], "rotate") == 0)
+        mode = ROTATE;
+      else if (strcmp(arg[iarg + 1], "all") == 0)
+        mode = ALL;
+      else
+        error->all(FLERR, "Unknown compute temp/sphere dof keyword {}", arg[iarg + 1]);
       iarg += 2;
-    } else error->all(FLERR,"Illegal compute temp/sphere command");
+    } else
+      error->all(FLERR, "Unknown compute temp/sphere keyword {}", arg[iarg]);
   }
 
   // when computing only the rotational temperature,
@@ -73,16 +74,18 @@ ComputeTempSphere::ComputeTempSphere(LAMMPS *lmp, int narg, char **arg) :
 
   // error checks
 
-  if (!atom->sphere_flag)
-    error->all(FLERR,"Compute temp/sphere requires atom style sphere");
+  if (!atom->omega_flag)
+    error->all(FLERR,"Compute temp/sphere requires atom attribute omega");
+  if (!atom->radius_flag)
+    error->all(FLERR,"Compute temp/sphere requires atom attribute radius");
 }
 
 /* ---------------------------------------------------------------------- */
 
 ComputeTempSphere::~ComputeTempSphere()
 {
-  delete [] id_bias;
-  delete [] vector;
+  delete[] id_bias;
+  delete[] vector;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -90,18 +93,16 @@ ComputeTempSphere::~ComputeTempSphere()
 void ComputeTempSphere::init()
 {
   if (tempbias) {
-    int i = modify->find_compute(id_bias);
-    if (i < 0)
-      error->all(FLERR,"Could not find compute ID for temperature bias");
-    tbias = modify->compute[i];
-    if (tbias->tempflag == 0)
-      error->all(FLERR,"Bias compute does not calculate temperature");
-    if (tbias->tempbias == 0)
-      error->all(FLERR,"Bias compute does not calculate a velocity bias");
+    tbias = modify->get_compute_by_id(id_bias);
+    if (!tbias) error->all(FLERR, "Could not find compute {} for temperature bias", id_bias);
+    if (tbias->tempflag == 0) error->all(FLERR, "Bias compute does not calculate temperature");
+    if (tbias->tempbias == 0) error->all(FLERR, "Bias compute does not calculate a velocity bias");
     if (tbias->igroup != igroup)
-      error->all(FLERR,"Bias compute group does not match compute group");
-    if (strcmp(tbias->style,"temp/region") == 0) tempbias = 2;
-    else tempbias = 1;
+      error->all(FLERR, "Bias compute group does not match compute group");
+    if (strcmp(tbias->style, "temp/region") == 0)
+      tempbias = 2;
+    else
+      tempbias = 1;
 
     // init and setup bias compute because
     // this compute's setup()->dof_compute() may be called first
@@ -124,7 +125,7 @@ void ComputeTempSphere::setup()
 
 void ComputeTempSphere::dof_compute()
 {
-  int count,count_all;
+  int count, count_all;
 
   adjust_dof_fix();
   natoms_temp = group->count(igroup);
@@ -146,8 +147,10 @@ void ComputeTempSphere::dof_compute()
         if (radius[i] == 0.0) {
           if (mode == ALL) count += 3;
         } else {
-          if (mode == ALL) count += 6;
-          else count += 3;
+          if (mode == ALL)
+            count += 6;
+          else
+            count += 3;
         }
       }
   } else {
@@ -156,13 +159,15 @@ void ComputeTempSphere::dof_compute()
         if (radius[i] == 0.0) {
           if (mode == ALL) count += 2;
         } else {
-          if (mode == ALL) count += 3;
-          else count += 1;
+          if (mode == ALL)
+            count += 3;
+          else
+            count += 1;
         }
       }
   }
 
-  MPI_Allreduce(&count,&count_all,1,MPI_INT,MPI_SUM,world);
+  MPI_Allreduce(&count, &count_all, 1, MPI_INT, MPI_SUM, world);
   dof = count_all;
 
   // additional adjustments to dof
@@ -181,8 +186,10 @@ void ComputeTempSphere::dof_compute()
             if (radius[i] == 0.0) {
               if (mode == ALL) count += 3;
             } else {
-              if (mode == ALL) count += 6;
-              else count += 3;
+              if (mode == ALL)
+                count += 6;
+              else
+                count += 3;
             }
           }
         }
@@ -193,20 +200,24 @@ void ComputeTempSphere::dof_compute()
             if (radius[i] == 0.0) {
               if (mode == ALL) count += 2;
             } else {
-              if (mode == ALL) count += 3;
-              else count += 1;
+              if (mode == ALL)
+                count += 3;
+              else
+                count += 1;
             }
           }
         }
     }
 
-    MPI_Allreduce(&count,&count_all,1,MPI_INT,MPI_SUM,world);
+    MPI_Allreduce(&count, &count_all, 1, MPI_INT, MPI_SUM, world);
     dof -= count_all;
   }
 
   dof -= extra_dof + fix_dof;
-  if (dof > 0) tfactor = force->mvv2e / (dof * force->boltz);
-  else tfactor = 0.0;
+  if (dof > 0)
+    tfactor = force->mvv2e / (dof * force->boltz);
+  else
+    tfactor = 0.0;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -231,6 +242,8 @@ double ComputeTempSphere::compute_scalar()
 
   double t = 0.0;
 
+  // clang-format off
+
   if (mode == ALL) {
     for (int i = 0; i < nlocal; i++)
       if (mask[i] & groupbit) {
@@ -244,13 +257,14 @@ double ComputeTempSphere::compute_scalar()
         t += (omega[i][0]*omega[i][0] + omega[i][1]*omega[i][1] +
               omega[i][2]*omega[i][2]) * INERTIA*rmass[i]*radius[i]*radius[i];
   }
+  // clang-format on
 
   if (tempbias) tbias->restore_bias_all();
 
-  MPI_Allreduce(&t,&scalar,1,MPI_DOUBLE,MPI_SUM,world);
+  MPI_Allreduce(&t, &scalar, 1, MPI_DOUBLE, MPI_SUM, world);
   if (dynamic || tempbias == 2) dof_compute();
   if (dof < 0.0 && natoms_temp > 0.0)
-    error->all(FLERR,"Temperature compute degrees of freedom < 0");
+    error->all(FLERR, "Temperature compute degrees of freedom < 0");
   scalar *= tfactor;
   return scalar;
 }
@@ -275,44 +289,44 @@ void ComputeTempSphere::compute_vector()
 
   // point particles will not contribute rotation due to radius = 0
 
-  double massone,inertiaone,t[6];
+  double massone, inertiaone, t[6];
   for (auto &ti : t) ti = 0.0;
 
   if (mode == ALL) {
     for (int i = 0; i < nlocal; i++)
       if (mask[i] & groupbit) {
         massone = rmass[i];
-        t[0] += massone * v[i][0]*v[i][0];
-        t[1] += massone * v[i][1]*v[i][1];
-        t[2] += massone * v[i][2]*v[i][2];
-        t[3] += massone * v[i][0]*v[i][1];
-        t[4] += massone * v[i][0]*v[i][2];
-        t[5] += massone * v[i][1]*v[i][2];
+        t[0] += massone * v[i][0] * v[i][0];
+        t[1] += massone * v[i][1] * v[i][1];
+        t[2] += massone * v[i][2] * v[i][2];
+        t[3] += massone * v[i][0] * v[i][1];
+        t[4] += massone * v[i][0] * v[i][2];
+        t[5] += massone * v[i][1] * v[i][2];
 
-        inertiaone = INERTIA*rmass[i]*radius[i]*radius[i];
-        t[0] += inertiaone * omega[i][0]*omega[i][0];
-        t[1] += inertiaone * omega[i][1]*omega[i][1];
-        t[2] += inertiaone * omega[i][2]*omega[i][2];
-        t[3] += inertiaone * omega[i][0]*omega[i][1];
-        t[4] += inertiaone * omega[i][0]*omega[i][2];
-        t[5] += inertiaone * omega[i][1]*omega[i][2];
+        inertiaone = INERTIA * rmass[i] * radius[i] * radius[i];
+        t[0] += inertiaone * omega[i][0] * omega[i][0];
+        t[1] += inertiaone * omega[i][1] * omega[i][1];
+        t[2] += inertiaone * omega[i][2] * omega[i][2];
+        t[3] += inertiaone * omega[i][0] * omega[i][1];
+        t[4] += inertiaone * omega[i][0] * omega[i][2];
+        t[5] += inertiaone * omega[i][1] * omega[i][2];
       }
   } else {
     for (int i = 0; i < nlocal; i++)
       if (mask[i] & groupbit) {
-        inertiaone = INERTIA*rmass[i]*radius[i]*radius[i];
-        t[0] += inertiaone * omega[i][0]*omega[i][0];
-        t[1] += inertiaone * omega[i][1]*omega[i][1];
-        t[2] += inertiaone * omega[i][2]*omega[i][2];
-        t[3] += inertiaone * omega[i][0]*omega[i][1];
-        t[4] += inertiaone * omega[i][0]*omega[i][2];
-        t[5] += inertiaone * omega[i][1]*omega[i][2];
+        inertiaone = INERTIA * rmass[i] * radius[i] * radius[i];
+        t[0] += inertiaone * omega[i][0] * omega[i][0];
+        t[1] += inertiaone * omega[i][1] * omega[i][1];
+        t[2] += inertiaone * omega[i][2] * omega[i][2];
+        t[3] += inertiaone * omega[i][0] * omega[i][1];
+        t[4] += inertiaone * omega[i][0] * omega[i][2];
+        t[5] += inertiaone * omega[i][1] * omega[i][2];
       }
   }
 
   if (tempbias) tbias->restore_bias_all();
 
-  MPI_Allreduce(t,vector,6,MPI_DOUBLE,MPI_SUM,world);
+  MPI_Allreduce(t, vector, 6, MPI_DOUBLE, MPI_SUM, world);
   for (int i = 0; i < 6; i++) vector[i] *= force->mvv2e;
 }
 
@@ -322,7 +336,7 @@ void ComputeTempSphere::compute_vector()
 
 void ComputeTempSphere::remove_bias(int i, double *v)
 {
-  tbias->remove_bias(i,v);
+  tbias->remove_bias(i, v);
 }
 
 /* ----------------------------------------------------------------------
@@ -331,7 +345,7 @@ void ComputeTempSphere::remove_bias(int i, double *v)
 
 void ComputeTempSphere::remove_bias_thr(int i, double *v, double *b)
 {
-  tbias->remove_bias_thr(i,v,b);
+  tbias->remove_bias_thr(i, v, b);
 }
 
 /* ----------------------------------------------------------------------
@@ -341,7 +355,7 @@ void ComputeTempSphere::remove_bias_thr(int i, double *v, double *b)
 
 void ComputeTempSphere::restore_bias(int i, double *v)
 {
-  tbias->restore_bias(i,v);
+  tbias->restore_bias(i, v);
 }
 
 /* ----------------------------------------------------------------------
@@ -351,5 +365,5 @@ void ComputeTempSphere::restore_bias(int i, double *v)
 
 void ComputeTempSphere::restore_bias_thr(int i, double *v, double *b)
 {
-  tbias->restore_bias_thr(i,v,b);
+  tbias->restore_bias_thr(i, v, b);
 }
diff --git a/src/dihedral_hybrid.cpp b/src/dihedral_hybrid.cpp
index d38ccf5d52..9da4df1f68 100644
--- a/src/dihedral_hybrid.cpp
+++ b/src/dihedral_hybrid.cpp
@@ -24,7 +24,7 @@
 
 using namespace LAMMPS_NS;
 
-#define EXTRA 1000
+static constexpr int EXTRA = 1000;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/dihedral_write.cpp b/src/dihedral_write.cpp
index 3d87591bcc..375373523b 100644
--- a/src/dihedral_write.cpp
+++ b/src/dihedral_write.cpp
@@ -35,7 +35,7 @@ using MathConst::DEG2RAD;
 using MathConst::RAD2DEG;
 
 static constexpr double epsilon = 6.5e-6;
-#define MAXLINE 1024
+static constexpr int MAXLINE = 1024;
 /* ---------------------------------------------------------------------- */
 
 void DihedralWrite::command(int narg, char **arg)
@@ -148,7 +148,7 @@ void DihedralWrite::command(int narg, char **arg)
     writer->input->one("mass * 1.0");
     writer->input->one(fmt::format("dihedral_style {}", force->dihedral_style));
     FILE *coeffs;
-    char line[MAXLINE];
+    char line[MAXLINE] = {'\0'};
     coeffs = fopen(coeffs_file.c_str(), "r");
     for (int i = 0; i < atom->ndihedraltypes; ++i) {
       fgets(line, MAXLINE, coeffs);
diff --git a/src/displace_atoms.cpp b/src/displace_atoms.cpp
index fa333f1bc2..5ecf5a2c9e 100644
--- a/src/displace_atoms.cpp
+++ b/src/displace_atoms.cpp
@@ -160,7 +160,7 @@ void DisplaceAtoms::command(int narg, char **arg)
     int *mask = atom->mask;
     int nlocal = atom->nlocal;
 
-    double fraction,dramp;
+    double fraction, dramp;
 
     for (i = 0; i < nlocal; i++) {
       if (mask[i] & groupbit) {
@@ -255,11 +255,12 @@ void DisplaceAtoms::command(int narg, char **arg)
     int line_flag = atom->line_flag;
     int tri_flag = atom->tri_flag;
     int body_flag = atom->body_flag;
+    int quat_atom_flag = atom->quat_flag;
 
     int theta_flag = 0;
     int quat_flag = 0;
     if (line_flag) theta_flag = 1;
-    if (ellipsoid_flag || tri_flag || body_flag) quat_flag = 1;
+    if (ellipsoid_flag || tri_flag || body_flag || quat_atom_flag) quat_flag = 1;
 
     // AtomVec pointers to retrieve per-atom storage of extra quantities
 
@@ -269,6 +270,7 @@ void DisplaceAtoms::command(int narg, char **arg)
     auto avec_body = dynamic_cast<AtomVecBody *>(atom->style_match("body"));
 
     double **x = atom->x;
+    double **quat_atom = atom->quat;
     int *ellipsoid = atom->ellipsoid;
     int *line = atom->line;
     int *tri = atom->tri;
@@ -313,7 +315,7 @@ void DisplaceAtoms::command(int narg, char **arg)
 
         // quats for ellipsoids, tris, and bodies
 
-        if (quat_flag) {
+        if (quat_flag && !quat_atom_flag) {
           quat = nullptr;
           if (ellipsoid_flag && ellipsoid[i] >= 0)
             quat = avec_ellipsoid->bonus[ellipsoid[i]].quat;
@@ -322,12 +324,18 @@ void DisplaceAtoms::command(int narg, char **arg)
           else if (body_flag && body[i] >= 0)
             quat = avec_body->bonus[body[i]].quat;
           if (quat) {
-            MathExtra::quatquat(qrotate,quat,qnew);
+            MathExtra::quatquat(qrotate, quat, qnew);
             quat[0] = qnew[0];
             quat[1] = qnew[1];
             quat[2] = qnew[2];
             quat[3] = qnew[3];
           }
+        } else if (quat_atom_flag) {
+          MathExtra::quatquat(qrotate, quat_atom[i], qnew);
+          quat_atom[i][0] = qnew[0];
+          quat_atom[i][1] = qnew[1];
+          quat_atom[i][2] = qnew[2];
+          quat_atom[i][3] = qnew[3];
         }
       }
     }
diff --git a/src/domain.cpp b/src/domain.cpp
index 3627af26cf..7513d384e2 100644
--- a/src/domain.cpp
+++ b/src/domain.cpp
@@ -42,10 +42,9 @@
 
 using namespace LAMMPS_NS;
 
-#define BIG   1.0e20
-#define SMALL 1.0e-4
-#define DELTAREGION 4
-#define BONDSTRETCH 1.1
+static constexpr double BIG =   1.0e20;
+static constexpr double SMALL = 1.0e-4;
+static constexpr double BONDSTRETCH = 1.1;
 
 /* ----------------------------------------------------------------------
    one instance per region style in style_region.h
diff --git a/src/dump.cpp b/src/dump.cpp
index a231b367d5..c735a4a60d 100644
--- a/src/dump.cpp
+++ b/src/dump.cpp
@@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    https://www.lammps.org/, Sandia National Laboratories
@@ -38,16 +37,16 @@ using namespace LAMMPS_NS;
 Dump *Dump::dumpptr;
 #endif
 
-#define BIG 1.0e20
-#define EPSILON 1.0e-6
+static constexpr double BIG = 1.0e20;
+static constexpr double EPSILON = 1.0e-6;
 
 enum { ASCEND, DESCEND };
 
 /* ---------------------------------------------------------------------- */
 
 Dump::Dump(LAMMPS *lmp, int /*narg*/, char **arg) :
-    Pointers(lmp), multiname(nullptr), refresh(nullptr), skipvar(nullptr), format(nullptr),
-    format_default(nullptr), format_line_user(nullptr), format_float_user(nullptr),
+    Pointers(lmp), multiname(nullptr), idrefresh(nullptr), irefresh(nullptr), skipvar(nullptr),
+    format(nullptr), format_default(nullptr), format_line_user(nullptr), format_float_user(nullptr),
     format_int_user(nullptr), format_bigint_user(nullptr), format_column_user(nullptr), fp(nullptr),
     nameslist(nullptr), buf(nullptr), sbuf(nullptr), ids(nullptr), bufsort(nullptr),
     idsort(nullptr), index(nullptr), proclist(nullptr), xpbc(nullptr), vpbc(nullptr),
@@ -119,21 +118,21 @@ Dump::Dump(LAMMPS *lmp, int /*narg*/, char **arg) :
   fileproc = 0;
 
   char *ptr;
-  if ((ptr = strchr(filename,'%'))) {
+  if ((ptr = strchr(filename, '%'))) {
     multiproc = 1;
     nclusterprocs = 1;
     filewriter = 1;
     fileproc = me;
-    MPI_Comm_split(world,me,0,&clustercomm);
+    MPI_Comm_split(world, me, 0, &clustercomm);
     *ptr = '\0';
-    multiname = utils::strdup(fmt::format("{}{}{}", filename, me, ptr+1));
+    multiname = utils::strdup(fmt::format("{}{}{}", filename, me, ptr + 1));
     *ptr = '%';
   }
 
-  if (strchr(filename,'*')) multifile = 1;
+  if (strchr(filename, '*')) multifile = 1;
 
-  if (utils::strmatch(filename, "\\.bin$")
-      || utils::strmatch(filename, "\\.lammpsbin$")) binary = 1;
+  if (utils::strmatch(filename, "\\.bin$") || utils::strmatch(filename, "\\.lammpsbin$"))
+    binary = 1;
   if (platform::has_compress_extension(filename)) compressed = 1;
 }
 
@@ -153,7 +152,7 @@ Dump::~Dump()
   delete[] format_int_user;
   delete[] format_bigint_user;
 
-  delete[] refresh;
+  delete[] idrefresh;
   delete[] skipvar;
 
   // format_column_user is deallocated by child classes that use it
@@ -179,8 +178,7 @@ Dump::~Dump()
   // delete storage for caching file names
 
   if (maxfiles > 0) {
-    for (int idx=0; idx < numfiles; ++idx)
-      delete[] nameslist[idx];
+    for (int idx = 0; idx < numfiles; ++idx) delete[] nameslist[idx];
     delete[] nameslist;
   }
 
@@ -196,6 +194,8 @@ Dump::~Dump()
   }
 }
 
+// clang-format off
+
 /* ---------------------------------------------------------------------- */
 
 void Dump::init()
@@ -216,21 +216,21 @@ void Dump::init()
     index = proclist = nullptr;
     irregular = nullptr;
     if ((has_id == 0) && (me == 0))
-      error->warning(FLERR,"Dump {} includes no atom IDs and is not sorted by ID. This may complicate "
-                     "post-processing tasks or visualization", id);
+      error->warning(FLERR,"Dump {} includes no atom IDs and is not sorted by ID. "
+                     "This may complicate post-processing tasks or visualization", id);
   }
 
   if (sort_flag) {
     if (multiproc > 1)
       error->all(FLERR,
-                 "Cannot sort dump when 'nfile' or 'fileper' keywords are set to non-default values");
+                 "Cannot sort dump when 'nfile' or 'fileper' keywords have non-default values");
     if (sortcol == 0 && atom->tag_enable == 0)
       error->all(FLERR,"Cannot sort dump on atom IDs with no atom IDs defined");
     if (sortcol && sortcol > size_one)
-      error->all(FLERR,"Dump sort column is invalid");
+      error->all(FLERR,"Dump sort column index {} is invalid", sortcol);
     if ((sortcol != 0) && (has_id == 0) && (me == 0))
-      error->warning(FLERR,"Dump {} includes no atom IDs and is not sorted by ID. This may complicate "
-                     "post-processing tasks or visualization", id);
+      error->warning(FLERR,"Dump {} includes no atom IDs and is not sorted by ID. "
+                     "This may complicate post-processing tasks or visualization", id);
     if (nprocs > 1 && irregular == nullptr)
       irregular = new Irregular(lmp);
 
@@ -288,11 +288,8 @@ void Dump::init()
   // search for refresh compute specified by dump_modify refresh
 
   if (refreshflag) {
-    int icompute;
-    for (icompute = 0; icompute < modify->ncompute; icompute++)
-      if (strcmp(refresh,modify->compute[icompute]->id) == 0) break;
-    if (icompute < modify->ncompute) irefresh = icompute;
-    else error->all(FLERR,"Dump could not find refresh compute ID");
+    irefresh = modify->get_compute_by_id(idrefresh);
+    if (!irefresh) error->all(FLERR,"Dump could not find refresh compute ID {}", idrefresh);
   }
 
   // if skipflag, check skip variable
@@ -531,7 +528,7 @@ void Dump::write()
   // trigger post-dump refresh by specified compute
   // currently used for incremental dump files
 
-  if (refreshflag) modify->compute[irefresh]->refresh();
+  if (refreshflag) irefresh->refresh();
 
   if (filewriter && fp != nullptr) write_footer();
 
diff --git a/src/dump.h b/src/dump.h
index bae7dbd8c8..43baf96ccf 100644
--- a/src/dump.h
+++ b/src/dump.h
@@ -19,6 +19,7 @@
 #include <map>
 
 namespace LAMMPS_NS {
+class Compute;
 
 class Dump : protected Pointers {
   friend class Output;
@@ -45,15 +46,9 @@ class Dump : protected Pointers {
   void init();
   virtual void write();
 
-  virtual int pack_forward_comm(int, int *, double *, int, int *)
-  {
-    return 0;
-  }
+  virtual int pack_forward_comm(int, int *, double *, int, int *) { return 0; }
   virtual void unpack_forward_comm(int, int, double *) {}
-  virtual int pack_reverse_comm(int, int, double *)
-  {
-    return 0;
-  }
+  virtual int pack_reverse_comm(int, int, double *) { return 0; }
   virtual void unpack_reverse_comm(int, int *, double *) {}
 
   void modify_params(int, char **);
@@ -94,9 +89,9 @@ class Dump : protected Pointers {
 
   bigint delaystep;
 
-  int refreshflag;    // 1 if dump_modify refresh specified
-  char *refresh;      // compute ID to invoke refresh() on
-  int irefresh;       // index of compute
+  int refreshflag;      // 1 if dump_modify refresh specified
+  char *idrefresh;      // compute ID to invoke refresh() on
+  Compute *irefresh;    // index of compute
 
   int skipflag;     // 1 if skip condition defined
   char *skipvar;    // name of variable to check for skip condition
@@ -158,17 +153,11 @@ class Dump : protected Pointers {
 
   virtual void init_style() = 0;
   virtual void openfile();
-  virtual int modify_param(int, char **)
-  {
-    return 0;
-  }
+  virtual int modify_param(int, char **) { return 0; }
   virtual void write_header(bigint) = 0;
   virtual int count();
   virtual void pack(tagint *) = 0;
-  virtual int convert_string(int, double *)
-  {
-    return 0;
-  }
+  virtual int convert_string(int, double *) { return 0; }
   virtual void write_data(int, double *) = 0;
   virtual void write_footer() {}
 
diff --git a/src/dump_atom.cpp b/src/dump_atom.cpp
index 2d047dc0a0..fb3f58042c 100644
--- a/src/dump_atom.cpp
+++ b/src/dump_atom.cpp
@@ -24,8 +24,8 @@
 
 using namespace LAMMPS_NS;
 
-#define ONELINE 256
-#define DELTA 1048576
+static constexpr int ONELINE = 256;
+static constexpr int DELTA = 1048576;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/dump_cfg.cpp b/src/dump_cfg.cpp
index a64507dc16..e5af83a3c6 100644
--- a/src/dump_cfg.cpp
+++ b/src/dump_cfg.cpp
@@ -29,9 +29,9 @@
 
 using namespace LAMMPS_NS;
 
-#define UNWRAPEXPAND 10.0
-#define ONEFIELD 32
-#define DELTA 1048576
+static constexpr double UNWRAPEXPAND = 10.0;
+static constexpr int ONEFIELD = 32;
+static constexpr int DELTA = 1048576;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/dump_custom.cpp b/src/dump_custom.cpp
index 1e60295bbe..ed70c7413d 100644
--- a/src/dump_custom.cpp
+++ b/src/dump_custom.cpp
@@ -47,8 +47,8 @@ enum{ID,MOL,PROC,PROCP1,TYPE,ELEMENT,MASS,
      COMPUTE,FIX,VARIABLE,IVEC,DVEC,IARRAY,DARRAY};
 enum{LT,LE,GT,GE,EQ,NEQ,XOR};
 
-#define ONEFIELD 32
-#define DELTA 1048576
+static constexpr int ONEFIELD = 32;
+static constexpr int DELTA = 1048576;
 
 /* ---------------------------------------------------------------------- */
 
@@ -1768,7 +1768,7 @@ int DumpCustom::modify_param(int narg, char **arg)
     if (refreshflag) error->all(FLERR,"Dump_modify can only have one refresh");
 
     refreshflag = 1;
-    refresh = argi.copy_name();
+    idrefresh = argi.copy_name();
     return 2;
   }
 
diff --git a/src/dump_grid.cpp b/src/dump_grid.cpp
index 8e3a2977cf..4c89b05739 100644
--- a/src/dump_grid.cpp
+++ b/src/dump_grid.cpp
@@ -35,8 +35,8 @@ using namespace LAMMPS_NS;
 
 enum {COMPUTE,FIX};
 
-#define ONEFIELD 32
-#define DELTA 1048576
+static constexpr int ONEFIELD = 32;
+static constexpr int DELTA = 1048576;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/dump_image.cpp b/src/dump_image.cpp
index ba7e36eb0b..ed4fc8dff4 100644
--- a/src/dump_image.cpp
+++ b/src/dump_image.cpp
@@ -49,7 +49,7 @@
 using namespace LAMMPS_NS;
 using MathConst::DEG2RAD;
 
-#define BIG 1.0e20
+static constexpr double BIG = 1.0e20;
 
 enum{NUMERIC,ATOM,TYPE,ELEMENT,ATTRIBUTE};
 enum{SPHERE,LINE,TRI};           // also in some Body and Fix child classes
diff --git a/src/dump_local.cpp b/src/dump_local.cpp
index 9695e152b2..8d546634b6 100644
--- a/src/dump_local.cpp
+++ b/src/dump_local.cpp
@@ -27,8 +27,8 @@
 
 using namespace LAMMPS_NS;
 
-#define ONEFIELD 32
-#define DELTA 1048576
+static constexpr int ONEFIELD = 32;
+static constexpr int DELTA = 1048576;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/dump_xyz.cpp b/src/dump_xyz.cpp
index 241ec1c059..f7ab77b2bf 100644
--- a/src/dump_xyz.cpp
+++ b/src/dump_xyz.cpp
@@ -23,8 +23,8 @@
 
 using namespace LAMMPS_NS;
 
-#define ONELINE 128
-#define DELTA 1048576
+static constexpr int ONELINE = 128;
+static constexpr int DELTA = 1048576;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/fix.h b/src/fix.h
index 9b595f0c60..ca0a1ef84b 100644
--- a/src/fix.h
+++ b/src/fix.h
@@ -99,8 +99,8 @@ class Fix : protected Pointers {
   int size_local_cols;    // 0 = vector, N = columns in local array
   int local_freq;         // frequency local data is available at
 
-  int pergrid_flag;       // 0/1 if per-grid data is stored
-  int pergrid_freq;       // frequency per-grid data is available at
+  int pergrid_flag;    // 0/1 if per-grid data is stored
+  int pergrid_freq;    // frequency per-grid data is available at
 
   int extscalar;    // 0/1 if global scalar is intensive/extensive
   int extvector;    // 0/1/-1 if global vector is all int/ext/extlist
@@ -129,11 +129,11 @@ class Fix : protected Pointers {
 
   // KOKKOS flags and variables
 
-  int kokkosable;             // 1 if Kokkos fix
-  int forward_comm_device;    // 1 if forward comm on Device
-  int exchange_comm_device;   // 1 if exchange comm on Device
-  int fuse_integrate_flag;    // 1 if can fuse initial integrate with final integrate
-  int sort_device;            // 1 if sort on Device
+  int kokkosable;              // 1 if Kokkos fix
+  int forward_comm_device;     // 1 if forward comm on Device
+  int exchange_comm_device;    // 1 if exchange comm on Device
+  int fuse_integrate_flag;     // 1 if can fuse initial integrate with final integrate
+  int sort_device;             // 1 if sort on Device
   ExecutionSpace execution_space;
   unsigned int datamask_read, datamask_modify;
 
@@ -223,7 +223,7 @@ class Fix : protected Pointers {
   virtual void unpack_reverse_grid(int, void *, int, int *){};
   virtual void pack_remap_grid(int, void *, int, int *){};
   virtual void unpack_remap_grid(int, void *, int, int *){};
-  virtual int unpack_read_grid(int, char *) {return 0;};
+  virtual int unpack_read_grid(int, char *) { return 0; };
   virtual void pack_write_grid(int, void *){};
   virtual void unpack_write_grid(int, void *, int *){};
 
@@ -236,7 +236,7 @@ class Fix : protected Pointers {
   virtual double compute_vector(int) { return 0.0; }
   virtual double compute_array(int, int) { return 0.0; }
 
-  virtual int dof(int) { return 0; }
+  virtual bigint dof(int) { return 0; }
   virtual void deform(int) {}
   virtual void reset_target(double) {}
   virtual void reset_dt() {}
diff --git a/src/fix_ave_chunk.cpp b/src/fix_ave_chunk.cpp
index 7c37bbaaff..8ed518efc2 100644
--- a/src/fix_ave_chunk.cpp
+++ b/src/fix_ave_chunk.cpp
@@ -485,7 +485,7 @@ void FixAveChunk::init()
 
 /* ----------------------------------------------------------------------
    only does averaging if nvalid = current timestep
-   do not call setup_chunks(), even though fix ave/spatial called setup_bins()
+   do not call setup_chunks(), even though fix ave/chunk called setup_bins()
    b/c could cause nchunk to change if Nfreq epoch crosses 2 runs
    does mean that if change_box is used between runs to change box size,
      that nchunk may not track it
diff --git a/src/fix_ave_histo.cpp b/src/fix_ave_histo.cpp
index 4503ad56f4..a92efcdacd 100644
--- a/src/fix_ave_histo.cpp
+++ b/src/fix_ave_histo.cpp
@@ -35,7 +35,7 @@ enum { SCALAR, VECTOR, WINDOW };
 enum { DEFAULT, GLOBAL, PERATOM, LOCAL };
 enum { IGNORE, END, EXTRA };
 
-#define BIG 1.0e20
+static constexpr double BIG = 1.0e20;
 /* ---------------------------------------------------------------------- */
 
 FixAveHisto::FixAveHisto(LAMMPS *lmp, int narg, char **arg) :
diff --git a/src/fix_ave_histo_weight.cpp b/src/fix_ave_histo_weight.cpp
index 181aa2a79d..7a5458bd3d 100644
--- a/src/fix_ave_histo_weight.cpp
+++ b/src/fix_ave_histo_weight.cpp
@@ -38,7 +38,7 @@ enum { DEFAULT, GLOBAL, PERATOM, LOCAL };
 enum { IGNORE, END, EXTRA };
 enum { SINGLE, VALUE };
 
-#define BIG 1.0e20
+static constexpr double BIG = 1.0e20;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/fix_ave_time.cpp b/src/fix_ave_time.cpp
index 833d5f4207..f6ba0ad0e6 100644
--- a/src/fix_ave_time.cpp
+++ b/src/fix_ave_time.cpp
@@ -1081,17 +1081,17 @@ void FixAveTime::options(int iarg, int narg, char **arg)
       format = format_user;
       iarg += 2;
     } else if (strcmp(arg[iarg],"title1") == 0) {
-      if (iarg+2 > narg) error->all(FLERR,"Illegal fix ave/spatial command");
+      if (iarg+2 > narg) error->all(FLERR,"Illegal fix ave/time command");
       delete[] title1;
       title1 = utils::strdup(arg[iarg+1]);
       iarg += 2;
     } else if (strcmp(arg[iarg],"title2") == 0) {
-      if (iarg+2 > narg) error->all(FLERR,"Illegal fix ave/spatial command");
+      if (iarg+2 > narg) error->all(FLERR,"Illegal fix ave/time command");
       delete[] title2;
       title2 = utils::strdup(arg[iarg+1]);
       iarg += 2;
     } else if (strcmp(arg[iarg],"title3") == 0) {
-      if (iarg+2 > narg) error->all(FLERR,"Illegal fix ave/spatial command");
+      if (iarg+2 > narg) error->all(FLERR,"Illegal fix ave/time command");
       delete[] title3;
       title3 = utils::strdup(arg[iarg+1]);
       iarg += 2;
diff --git a/src/fix_balance.cpp b/src/fix_balance.cpp
index 7174765f52..23a56c0a9d 100644
--- a/src/fix_balance.cpp
+++ b/src/fix_balance.cpp
@@ -83,7 +83,7 @@ FixBalance::FixBalance(LAMMPS *lmp, int narg, char **arg) :
   // error checks
 
   if (lbstyle == SHIFT) {
-    int blen = bstr.size();
+    const int blen = bstr.size();
     for (int i = 0; i < blen; i++) {
       if (bstr[i] != 'x' && bstr[i] != 'y' && bstr[i] != 'z')
         error->all(FLERR,"Fix balance shift string is invalid");
diff --git a/src/fix_bond_history.cpp b/src/fix_bond_history.cpp
index cae9dc744d..2d344e24fc 100644
--- a/src/fix_bond_history.cpp
+++ b/src/fix_bond_history.cpp
@@ -27,8 +27,8 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-#define LB_FACTOR 1.5
-#define DELTA 8192
+static constexpr double LB_FACTOR = 1.5;
+static constexpr int DELTA = 8192;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/fix_deposit.cpp b/src/fix_deposit.cpp
index 4a9c5aa9da..d8ea665149 100644
--- a/src/fix_deposit.cpp
+++ b/src/fix_deposit.cpp
@@ -42,7 +42,7 @@ using namespace MathConst;
 enum{ATOM,MOLECULE};
 enum{DIST_UNIFORM,DIST_GAUSSIAN};
 
-#define EPSILON 1.0e6
+static constexpr double EPSILON = 1.0e6;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/fix_dt_reset.cpp b/src/fix_dt_reset.cpp
index ba69d17718..ea364657c6 100644
--- a/src/fix_dt_reset.cpp
+++ b/src/fix_dt_reset.cpp
@@ -31,7 +31,7 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-#define BIG 1.0e20
+static constexpr double BIG = 1.0e20;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/fix_halt.cpp b/src/fix_halt.cpp
index fcfefe102d..b34c79867f 100644
--- a/src/fix_halt.cpp
+++ b/src/fix_halt.cpp
@@ -1,4 +1,3 @@
-// clang-format off
 /* ----------------------------------------------------------------------
    LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
    https://www.lammps.org/, Sandia National Laboratories
@@ -31,49 +30,49 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-enum{BONDMAX,TLIMIT,DISKFREE,VARIABLE};
-enum{LT,LE,GT,GE,EQ,NEQ,XOR};
-enum{HARD,SOFT,CONTINUE};
-enum{NOMSG=0,YESMSG=1};
+enum { BONDMAX, TLIMIT, DISKFREE, VARIABLE };
+enum { LT, LE, GT, GE, EQ, NEQ, XOR };
+enum { HARD, SOFT, CONTINUE };
+enum { NOMSG = 0, YESMSG = 1 };
 
 /* ---------------------------------------------------------------------- */
 
 FixHalt::FixHalt(LAMMPS *lmp, int narg, char **arg) :
-  Fix(lmp, narg, arg), idvar(nullptr), dlimit_path(nullptr)
+    Fix(lmp, narg, arg), idvar(nullptr), dlimit_path(nullptr)
 {
-  if (narg < 7) error->all(FLERR,"Illegal fix halt command");
-  nevery = utils::inumeric(FLERR,arg[3],false,lmp);
-  if (nevery <= 0) error->all(FLERR,"Illegal fix halt command");
+  if (narg < 7) utils::missing_cmd_args(FLERR, "fix halt", error);
+  nevery = utils::inumeric(FLERR, arg[3], false, lmp);
+  if (nevery <= 0) error->all(FLERR, "Illegal fix halt command: nevery must be > 0");
 
   // comparison args
 
   idvar = nullptr;
   int iarg = 4;
 
-  if (strcmp(arg[iarg],"tlimit") == 0) {
+  if (strcmp(arg[iarg], "tlimit") == 0) {
     attribute = TLIMIT;
-  } else if (strcmp(arg[iarg],"diskfree") == 0) {
+  } else if (strcmp(arg[iarg], "diskfree") == 0) {
     attribute = DISKFREE;
     dlimit_path = utils::strdup(".");
-  } else if (strcmp(arg[iarg],"bondmax") == 0) {
+  } else if (strcmp(arg[iarg], "bondmax") == 0) {
     attribute = BONDMAX;
   } else {
-    ArgInfo argi(arg[iarg],ArgInfo::VARIABLE);
+    ArgInfo argi(arg[iarg], ArgInfo::VARIABLE);
 
-    if ((argi.get_type() == ArgInfo::UNKNOWN)
-        || (argi.get_type() == ArgInfo::NONE)
-        || (argi.get_dim() != 0))
-      error->all(FLERR,"Invalid fix halt attribute");
+    if ((argi.get_type() == ArgInfo::UNKNOWN) || (argi.get_type() == ArgInfo::NONE) ||
+        (argi.get_dim() != 0))
+      error->all(FLERR, "Invalid fix halt attribute {}", arg[iarg]);
 
     attribute = VARIABLE;
     idvar = argi.copy_name();
     ivar = input->variable->find(idvar);
 
-    if (ivar < 0) error->all(FLERR,"Could not find fix halt variable name");
+    if (ivar < 0) error->all(FLERR, "Could not find fix halt variable name");
     if (input->variable->equalstyle(ivar) == 0)
-      error->all(FLERR,"Fix halt variable is not equal-style variable");
+      error->all(FLERR, "Fix halt variable is not equal-style variable");
   }
 
+  // clang-format off
   ++iarg;
   if (strcmp(arg[iarg],"<") == 0) operation = LT;
   else if (strcmp(arg[iarg],"<=") == 0) operation = LE;
@@ -85,7 +84,7 @@ FixHalt::FixHalt(LAMMPS *lmp, int narg, char **arg) :
   else error->all(FLERR,"Invalid fix halt operator");
 
   ++iarg;
-  value = utils::numeric(FLERR,arg[iarg],false,lmp);
+  value = utils::numeric(FLERR, arg[iarg], false, lmp);
 
   // parse optional args
 
@@ -93,38 +92,40 @@ FixHalt::FixHalt(LAMMPS *lmp, int narg, char **arg) :
   msgflag = YESMSG;
   ++iarg;
   while (iarg < narg) {
-    if (strcmp(arg[iarg],"error") == 0) {
-      if (iarg+2 > narg) error->all(FLERR,"Illegal fix halt command");
-      if (strcmp(arg[iarg+1],"hard") == 0) eflag = HARD;
-      else if (strcmp(arg[iarg+1],"soft") == 0) eflag = SOFT;
-      else if (strcmp(arg[iarg+1],"continue") == 0) eflag = CONTINUE;
-      else error->all(FLERR,"Illegal fix halt command");
+    if (strcmp(arg[iarg], "error") == 0) {
+      if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix halt error", error);
+      if (strcmp(arg[iarg + 1], "hard") == 0) eflag = HARD;
+      else if (strcmp(arg[iarg + 1], "soft") == 0) eflag = SOFT;
+      else if (strcmp(arg[iarg + 1], "continue") == 0) eflag = CONTINUE;
+      else error->all(FLERR, "Unknown fix halt error condition {}", arg[iarg]);
       iarg += 2;
-    } else if (strcmp(arg[iarg],"message") == 0) {
-      if (iarg+2 > narg) error->all(FLERR,"Illegal fix halt command");
-      msgflag = utils::logical(FLERR,arg[iarg+1],false,lmp);
+    } else if (strcmp(arg[iarg], "message") == 0) {
+      if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix halt message", error);
+      msgflag = utils::logical(FLERR, arg[iarg + 1], false, lmp);
       iarg += 2;
-    } else if (strcmp(arg[iarg],"path") == 0) {
-      if (iarg+2 > narg) error->all(FLERR,"Illegal fix halt command");
+    } else if (strcmp(arg[iarg], "path") == 0) {
+      if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix halt error", error);
       ++iarg;
       delete[] dlimit_path;
       // strip off outer quotes, if present
-      int len = strlen(arg[iarg])+1;
-      if ( ((arg[iarg][0] == '"') || (arg[iarg][0] == '\''))
-           && (arg[iarg][0] == arg[iarg][len-2])) {
-        arg[iarg][len-2] = '\0';
-        dlimit_path = utils::strdup(arg[iarg]+1);
-      } else dlimit_path = utils::strdup(arg[iarg]);
+      int len = strlen(arg[iarg]) + 1;
+      if (((arg[iarg][0] == '"') || (arg[iarg][0] == '\'')) &&
+          (arg[iarg][0] == arg[iarg][len - 2])) {
+        arg[iarg][len - 2] = '\0';
+        dlimit_path = utils::strdup(arg[iarg] + 1);
+      } else
+        dlimit_path = utils::strdup(arg[iarg]);
       ++iarg;
-    } else error->all(FLERR,"Illegal fix halt command");
+    } else error->all(FLERR, "Unknown fix halt keyword {}", arg[iarg]);
   }
+  // clang-format on
 
   // add nfirst to all computes that store invocation times
   // since don't know a priori which are invoked via variables by this fix
   // once in end_of_step() can set timestep for ones actually invoked
 
   if (attribute == VARIABLE) {
-    const bigint nfirst = (update->ntimestep/nevery)*nevery + nevery;
+    const bigint nfirst = (update->ntimestep / nevery) * nevery + nevery;
     modify->addstep_compute_all(nfirst);
   }
 }
@@ -133,8 +134,8 @@ FixHalt::FixHalt(LAMMPS *lmp, int narg, char **arg) :
 
 FixHalt::~FixHalt()
 {
-  delete [] idvar;
-  delete [] dlimit_path;
+  delete[] idvar;
+  delete[] dlimit_path;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -156,22 +157,22 @@ void FixHalt::init()
 
   if (attribute == VARIABLE) {
     ivar = input->variable->find(idvar);
-    if (ivar < 0) error->all(FLERR,"Could not find fix halt variable name");
+    if (ivar < 0) error->all(FLERR, "Could not find fix halt variable {}", idvar);
     if (input->variable->equalstyle(ivar) == 0)
-      error->all(FLERR,"Fix halt variable is not equal-style variable");
+      error->all(FLERR, "Fix halt variable {} is not equal-style variable", idvar);
   }
 
   // settings used by TLIMIT
 
-  nextstep = (update->ntimestep/nevery)*nevery + nevery;
+  nextstep = (update->ntimestep / nevery) * nevery + nevery;
   thisstep = -1;
   tratio = 0.5;
 
   // check if disk limit is supported
 
   if (attribute == DISKFREE) {
-    if (diskfree() < 0.0)
-      error->all(FLERR,"Disk limit not supported by OS or illegal path");
+    if (!dlimit_path || platform::disk_free(dlimit_path) < 0.0)
+      error->all(FLERR, "Disk limit not supported by OS or illegal path");
   }
 }
 
@@ -196,7 +197,7 @@ void FixHalt::end_of_step()
     if (update->ntimestep != nextstep) return;
     attvalue = tlimit();
   } else if (attribute == DISKFREE) {
-    attvalue = diskfree();
+    attvalue = platform::disk_free(dlimit_path) / 1048576.0;    // MBytes
   } else if (attribute == BONDMAX) {
     attvalue = bondmax();
   } else {
@@ -205,6 +206,10 @@ void FixHalt::end_of_step()
     modify->addstep_compute(update->ntimestep + nevery);
   }
 
+  // ensure that the attribute is *exactly* the same on all ranks
+
+  MPI_Bcast(&attvalue, 1, MPI_DOUBLE, 0, world);
+
   // check if halt is triggered, else just return
 
   if (operation == LT) {
@@ -220,21 +225,19 @@ void FixHalt::end_of_step()
   } else if (operation == NEQ) {
     if (attvalue == value) return;
   } else if (operation == XOR) {
-    if ((attvalue == 0.0 && value == 0.0) ||
-        (attvalue != 0.0 && value != 0.0)) return;
+    if ((attvalue == 0.0 && value == 0.0) || (attvalue != 0.0 && value != 0.0)) return;
   }
 
   // hard halt -> exit LAMMPS
   // soft/continue halt -> trigger timer to break from run loop
   // print message with ID of fix halt in case multiple instances
 
-  std::string message = fmt::format("Fix halt condition for fix-id {} met on "
-                                    "step {} with value {}",
+  std::string message = fmt::format("Fix halt condition for fix-id {} met on step {} with value {}",
                                     id, update->ntimestep, attvalue);
   if (eflag == HARD) {
-    error->all(FLERR,message);
-  } else if (eflag == SOFT || eflag == CONTINUE) {
-    if (comm->me == 0 && msgflag == YESMSG) error->message(FLERR,message);
+    error->all(FLERR, message);
+  } else if ((eflag == SOFT) || (eflag == CONTINUE)) {
+    if ((comm->me == 0) && (msgflag == YESMSG)) error->message(FLERR, message);
     timer->force_timeout();
   }
 }
@@ -260,8 +263,8 @@ double FixHalt::bondmax()
   int **bondlist = neighbor->bondlist;
   int nbondlist = neighbor->nbondlist;
 
-  int i1,i2;
-  double delx,dely,delz,rsq;
+  int i1, i2;
+  double delx, dely, delz, rsq;
   double maxone = 0.0;
 
   for (int n = 0; n < nbondlist; n++) {
@@ -272,12 +275,12 @@ double FixHalt::bondmax()
     dely = x[i1][1] - x[i2][1];
     delz = x[i1][2] - x[i2][2];
 
-    rsq = delx*delx + dely*dely + delz*delz;
-    maxone = MAX(rsq,maxone);
+    rsq = delx * delx + dely * dely + delz * delz;
+    maxone = MAX(rsq, maxone);
   }
 
   double maxall;
-  MPI_Allreduce(&maxone,&maxall,1,MPI_DOUBLE,MPI_MAX,world);
+  MPI_Allreduce(&maxone, &maxall, 1, MPI_DOUBLE, MPI_MAX, world);
 
   return sqrt(maxall);
 }
@@ -291,48 +294,15 @@ double FixHalt::bondmax()
 double FixHalt::tlimit()
 {
   double cpu = timer->elapsed(Timer::TOTAL);
-  MPI_Bcast(&cpu,1,MPI_DOUBLE,0,world);
+  MPI_Bcast(&cpu, 1, MPI_DOUBLE, 0, world);
 
   if (cpu < value) {
     bigint elapsed = update->ntimestep - update->firststep;
-    bigint final = update->firststep +
-      static_cast<bigint> (tratio*value/cpu * elapsed);
-    nextstep = (final/nevery)*nevery + nevery;
+    bigint final = update->firststep + static_cast<bigint>(tratio * value / cpu * elapsed);
+    nextstep = (final / nevery) * nevery + nevery;
     if (nextstep == update->ntimestep) nextstep += nevery;
     tratio = 1.0;
   }
 
   return cpu;
 }
-
-/* ----------------------------------------------------------------------
-   determine available disk space, if supported. Return -1 if not.
-------------------------------------------------------------------------- */
-#if defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__) || defined(__DragonFly__) || defined(__OpenBSD__) || defined(__NetBSD__)
-#include <sys/statvfs.h>
-#endif
-double FixHalt::diskfree()
-{
-#if defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__) || defined(__DragonFly__) || defined(__OpenBSD__) || defined(__NetBSD__)
-  struct statvfs fs;
-  double disk_free = -1.0;
-
-  if (dlimit_path) {
-    disk_free = 1.0e100;
-    int rv = statvfs(dlimit_path,&fs);
-    if (rv == 0) {
-#if defined(__linux__)
-      disk_free = fs.f_bavail*fs.f_bsize/1048576.0;
-#elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__DragonFly__) || defined(__OpenBSD__) || defined(__NetBSD__)
-      disk_free = fs.f_bavail*fs.f_frsize/1048576.0;
-#endif
-    } else
-      disk_free = -1.0;
-
-    MPI_Bcast(&disk_free,1,MPI_DOUBLE,0,world);
-  }
-  return disk_free;
-#else
-  return -1.0;
-#endif
-}
diff --git a/src/fix_langevin.cpp b/src/fix_langevin.cpp
index 35bffb24fa..077e064871 100644
--- a/src/fix_langevin.cpp
+++ b/src/fix_langevin.cpp
@@ -46,8 +46,8 @@ using namespace FixConst;
 enum { NOBIAS, BIAS };
 enum { CONSTANT, EQUAL, ATOM };
 
-#define SINERTIA 0.4    // moment of inertia prefactor for sphere
-#define EINERTIA 0.2    // moment of inertia prefactor for ellipsoid
+static constexpr double SINERTIA = 0.4;    // moment of inertia prefactor for sphere
+static constexpr double EINERTIA = 0.2;    // moment of inertia prefactor for ellipsoid
 
 /* ---------------------------------------------------------------------- */
 
@@ -240,8 +240,10 @@ void FixLangevin::init()
     if (flag) error->all(FLERR, "Fix langevin gjf should come before fix nve");
   }
 
-  if (oflag && !atom->sphere_flag)
-    error->all(FLERR, "Fix langevin omega requires atom style sphere");
+  if (oflag && !atom->omega_flag)
+    error->all(FLERR, "Fix langevin omega requires atom attribute omega");
+  if (oflag && !atom->radius_flag)
+    error->all(FLERR, "Fix langevin omega requires atom attribute radius");
   if (ascale && !atom->ellipsoid_flag)
     error->all(FLERR, "Fix langevin angmom requires atom style ellipsoid");
 
@@ -789,7 +791,7 @@ void FixLangevin::compute_target()
     if (tstyle == EQUAL) {
       t_target = input->variable->compute_equal(tvar);
       if (t_target < 0.0)
-        error->one(FLERR,"Fix langevin variable returned negative temperature");
+        error->one(FLERR, "Fix langevin variable returned negative temperature");
       tsqrt = sqrt(t_target);
     } else {
       if (atom->nmax > maxatom2) {
@@ -801,8 +803,7 @@ void FixLangevin::compute_target()
       for (int i = 0; i < nlocal; i++)
         if (mask[i] & groupbit)
             if (tforce[i] < 0.0)
-              error->one(FLERR,
-                         "Fix langevin variable returned negative temperature");
+              error->one(FLERR, "Fix langevin variable returned negative temperature");
     }
     modify->addstep_compute(update->ntimestep + 1);
   }
diff --git a/src/fix_move.cpp b/src/fix_move.cpp
index 36bba410fc..80e10c4d3d 100644
--- a/src/fix_move.cpp
+++ b/src/fix_move.cpp
@@ -42,7 +42,7 @@ using namespace MathConst;
 enum { LINEAR, WIGGLE, ROTATE, VARIABLE, TRANSROT };
 enum { EQUAL, ATOM };
 
-#define INERTIA 0.2    // moment of inertia prefactor for ellipsoid
+static constexpr double INERTIA = 0.2;    // moment of inertia prefactor for ellipsoid
 
 /* ---------------------------------------------------------------------- */
 
@@ -276,10 +276,11 @@ FixMove::FixMove(LAMMPS *lmp, int narg, char **arg) :
   line_flag = atom->line_flag;
   tri_flag = atom->tri_flag;
   body_flag = atom->body_flag;
+  quat_atom_flag = atom->quat_flag;
 
   theta_flag = quat_flag = 0;
   if (line_flag) theta_flag = 1;
-  if (ellipsoid_flag || tri_flag || body_flag) quat_flag = 1;
+  if (ellipsoid_flag || tri_flag || body_flag || quat_atom_flag) quat_flag = 1;
 
   extra_flag = 0;
   if (omega_flag || angmom_flag || theta_flag || quat_flag) extra_flag = 1;
@@ -329,7 +330,7 @@ FixMove::FixMove(LAMMPS *lmp, int narg, char **arg) :
     }
   }
 
-  if (quat_flag) {
+  if (quat_flag && !quat_atom_flag) {
     double *quat;
     for (int i = 0; i < nlocal; i++) {
       quat = nullptr;
@@ -349,6 +350,16 @@ FixMove::FixMove(LAMMPS *lmp, int narg, char **arg) :
       } else
         qoriginal[i][0] = qoriginal[i][1] = qoriginal[i][2] = qoriginal[i][3] = 0.0;
     }
+  } else if (quat_atom_flag) {
+    double **quat_atom = atom->quat;
+    for (int i = 0; i < nlocal; i++) {
+      if (mask[i] & groupbit) {
+        qoriginal[i][0] = quat_atom[i][0];
+        qoriginal[i][1] = quat_atom[i][1];
+        qoriginal[i][2] = quat_atom[i][2];
+        qoriginal[i][3] = quat_atom[i][3];
+      }
+    }
   }
 
   // nrestart = size of per-atom restart data
@@ -521,6 +532,7 @@ void FixMove::initial_integrate(int /*vflag*/)
   double *radius = atom->radius;
   double *rmass = atom->rmass;
   double *mass = atom->mass;
+  double **quat_atom = atom->quat;
   int *type = atom->type;
   int *ellipsoid = atom->ellipsoid;
   int *line = atom->line;
@@ -749,9 +761,9 @@ void FixMove::initial_integrate(int /*vflag*/)
             avec_line->bonus[atom->line[i]].theta = theta_new;
           }
 
-          // quats for ellipsoids, tris, and bodies
+          // quats for ellipsoids, tris, bodies, and bpm/sphere
 
-          if (quat_flag) {
+          if (quat_flag && !quat_atom_flag) {
             quat = nullptr;
             if (ellipsoid_flag && ellipsoid[i] >= 0)
               quat = avec_ellipsoid->bonus[ellipsoid[i]].quat;
@@ -760,6 +772,8 @@ void FixMove::initial_integrate(int /*vflag*/)
             else if (body_flag && body[i] >= 0)
               quat = avec_body->bonus[body[i]].quat;
             if (quat) MathExtra::quatquat(qrotate, qoriginal[i], quat);
+          } else if (quat_atom_flag) {
+            MathExtra::quatquat(qrotate, qoriginal[i], quat_atom[i]);
           }
         }
 
@@ -880,9 +894,9 @@ void FixMove::initial_integrate(int /*vflag*/)
             avec_line->bonus[atom->line[i]].theta = theta_new;
           }
 
-          // quats for ellipsoids, tris, and bodies
+          // quats for ellipsoids, tris, bodies, and bpm/sphere
 
-          if (quat_flag) {
+          if (quat_flag && !quat_atom_flag) {
             quat = nullptr;
             if (ellipsoid_flag && ellipsoid[i] >= 0)
               quat = avec_ellipsoid->bonus[ellipsoid[i]].quat;
@@ -891,6 +905,8 @@ void FixMove::initial_integrate(int /*vflag*/)
             else if (body_flag && body[i] >= 0)
               quat = avec_body->bonus[body[i]].quat;
             if (quat) MathExtra::quatquat(qrotate, qoriginal[i], quat);
+          } else if (quat_atom_flag) {
+            MathExtra::quatquat(qrotate, qoriginal[i], quat_atom[i]);
           }
         }
 
@@ -1341,9 +1357,9 @@ void FixMove::set_arrays(int i)
         toriginal[i] = theta - 0.0;    // NOTE: edit this line
       }
 
-      // quats for ellipsoids, tris, and bodies
+      // quats for ellipsoids, tris, bodies, and bpm/sphere
 
-      if (quat_flag) {
+      if (quat_flag & !quat_atom_flag) {
         quat = nullptr;
         if (ellipsoid_flag && ellipsoid[i] >= 0)
           quat = avec_ellipsoid->bonus[ellipsoid[i]].quat;
@@ -1354,6 +1370,12 @@ void FixMove::set_arrays(int i)
         if (quat) {
           // qoriginal = f(quat,-delta);   // NOTE: edit this line
         }
+      } else if (quat_atom_flag) {
+        // double **quat_atom = atom->quat;
+        // qoriginal[0] = quat_atom[i][0]; // NOTE: edit this line
+        // qoriginal[1] = quat_atom[i][1]; // NOTE: edit this line
+        // qoriginal[2] = quat_atom[i][2]; // NOTE: edit this line
+        // qoriginal[3] = quat_atom[i][3]; // NOTE: edit this line
       }
     }
     xoriginal[i][0] -= vx * delta;
@@ -1400,7 +1422,7 @@ void FixMove::set_arrays(int i)
 
       // quats for ellipsoids, tris, and bodies
 
-      if (quat_flag) {
+      if (quat_flag && !quat_atom_flag) {
         quat = nullptr;
         if (ellipsoid_flag && ellipsoid[i] >= 0)
           quat = avec_ellipsoid->bonus[ellipsoid[i]].quat;
@@ -1411,6 +1433,12 @@ void FixMove::set_arrays(int i)
         if (quat) {
           // qoriginal = f(quat,-delta);   // NOTE: edit this line
         }
+      } else if (quat_atom_flag) {
+        // double **quat_atom = atom->quat;
+        // qoriginal[0] = quat_atom[i][0]; // NOTE: edit this line
+        // qoriginal[1] = quat_atom[i][1]; // NOTE: edit this line
+        // qoriginal[2] = quat_atom[i][2]; // NOTE: edit this line
+        // qoriginal[3] = quat_atom[i][3]; // NOTE: edit this line
       }
     }
   }
diff --git a/src/fix_move.h b/src/fix_move.h
index e3c018f54d..244a9d704a 100644
--- a/src/fix_move.h
+++ b/src/fix_move.h
@@ -61,7 +61,7 @@ class FixMove : public Fix {
   int xvar, yvar, zvar, vxvar, vyvar, vzvar;
   int xvarstyle, yvarstyle, zvarstyle, vxvarstyle, vyvarstyle, vzvarstyle;
   int extra_flag, omega_flag, angmom_flag;
-  int radius_flag, ellipsoid_flag, line_flag, tri_flag, body_flag;
+  int radius_flag, ellipsoid_flag, line_flag, tri_flag, body_flag, quat_atom_flag;
   int theta_flag, quat_flag;
   int nlevels_respa, nrestart;
   int time_origin;
diff --git a/src/fix_nh.cpp b/src/fix_nh.cpp
index 562ca51c29..a7536800cb 100644
--- a/src/fix_nh.cpp
+++ b/src/fix_nh.cpp
@@ -40,9 +40,9 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-#define DELTAFLIP 0.1
-#define TILTMAX 1.5
-#define EPSILON 1.0e-6
+static constexpr double DELTAFLIP = 0.1;
+static constexpr double TILTMAX = 1.5;
+static constexpr double EPSILON = 1.0e-6;
 
 enum{NOBIAS,BIAS};
 enum{NONE,XYZ,XY,YZ,XZ};
@@ -442,10 +442,16 @@ FixNH::FixNH(LAMMPS *lmp, int narg, char **arg) :
     error->all(FLERR,"Invalid fix {} pressure settings", style);
 
   if (dipole_flag) {
-    if (!atom->sphere_flag)
-      error->all(FLERR,"Using update dipole flag requires atom style sphere");
-    if (!atom->mu_flag)
-      error->all(FLERR,"Using update dipole flag requires atom attribute mu");
+    if (strstr(style, "/sphere")) {
+      if (!atom->omega_flag)
+        error->all(FLERR,"Using update dipole flag requires atom attribute omega");
+      if (!atom->radius_flag)
+        error->all(FLERR,"Using update dipole flag requires atom attribute radius");
+      if (!atom->mu_flag)
+        error->all(FLERR,"Using update dipole flag requires atom attribute mu");
+    } else {
+      error->all(FLERR, "Must use a '/sphere' Nose-Hoover fix style for updating dipoles");
+    }
   }
 
   if ((tstat_flag && t_period <= 0.0) ||
diff --git a/src/fix_nh_sphere.cpp b/src/fix_nh_sphere.cpp
index f39de6c656..0e427763e7 100644
--- a/src/fix_nh_sphere.cpp
+++ b/src/fix_nh_sphere.cpp
@@ -36,8 +36,10 @@ using namespace MathExtra;
 FixNHSphere::FixNHSphere(LAMMPS *lmp, int narg, char **arg) :
   FixNH(lmp, narg, arg)
 {
-  if (!atom->sphere_flag)
-    error->all(FLERR,"Fix nvt/nph/npt sphere requires atom style sphere");
+  if (!atom->omega_flag)
+    error->all(FLERR,"Fix {} requires atom attribute omega", style);
+  if (!atom->radius_flag)
+    error->all(FLERR,"Fix {} requires atom attribute radius", style);
 
   // inertia = moment of inertia prefactor for sphere or disc
 
diff --git a/src/fix_nve_sphere.cpp b/src/fix_nve_sphere.cpp
index ea57028af4..21520d7a69 100644
--- a/src/fix_nve_sphere.cpp
+++ b/src/fix_nve_sphere.cpp
@@ -68,8 +68,8 @@ FixNVESphere::FixNVESphere(LAMMPS *lmp, int narg, char **arg) :
 
   // error checks
 
-  if (!atom->sphere_flag)
-    error->all(FLERR,"Fix nve/sphere requires atom style sphere");
+  if (!atom->omega_flag)
+    error->all(FLERR,"Fix nve/sphere requires atom attribute omega");
   if (extra == DIPOLE && !atom->mu_flag)
     error->all(FLERR,"Fix nve/sphere update dipole requires atom attribute mu");
 }
diff --git a/src/fix_press_langevin.cpp b/src/fix_press_langevin.cpp
index 752f826dfe..d8d2a3b04a 100644
--- a/src/fix_press_langevin.cpp
+++ b/src/fix_press_langevin.cpp
@@ -37,8 +37,8 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-#define DELTAFLIP 0.1
-#define TILTMAX 1.5
+static constexpr double DELTAFLIP = 0.1;
+static constexpr double TILTMAX = 1.5;
 
 enum { NONE, XYZ, XY, YZ, XZ };
 enum { ISO, ANISO, TRICLINIC };
diff --git a/src/fix_property_atom.cpp b/src/fix_property_atom.cpp
index 9613523059..de96b5c39d 100644
--- a/src/fix_property_atom.cpp
+++ b/src/fix_property_atom.cpp
@@ -51,6 +51,19 @@ FixPropertyAtom::FixPropertyAtom(LAMMPS *lmp, int narg, char **arg) :
   nvalue = 0;
   values_peratom = 0;
 
+  // check for ghost keyword to use as add_custom() arg
+
+  border = 0;
+  while (iarg < narg) {
+    if (strcmp(arg[iarg], "ghost") == 0) {
+      if (iarg + 2 > narg) error->all(FLERR, "Illegal fix property/atom command");
+      border = utils::logical(FLERR, arg[iarg + 1], false, lmp);
+      iarg += 2;
+    } else iarg++;
+  }
+
+  iarg = 3;
+
   while (iarg < narg) {
     if (strcmp(arg[iarg], "mol") == 0) {
       if (atom->molecule_flag)
@@ -112,7 +125,7 @@ FixPropertyAtom::FixPropertyAtom(LAMMPS *lmp, int narg, char **arg) :
       if (index[nvalue] >= 0) error->all(FLERR, "Fix property/atom vector name already exists");
       if (ReadData::is_data_section(id))
         error->all(FLERR, "Fix property/atom fix ID must not be a data file section name");
-      index[nvalue] = atom->add_custom(&arg[iarg][2], 0, 0);
+      index[nvalue] = atom->add_custom(&arg[iarg][2], 0, 0, border);
       cols[nvalue] = 0;
       values_peratom++;
       nvalue++;
@@ -125,7 +138,7 @@ FixPropertyAtom::FixPropertyAtom(LAMMPS *lmp, int narg, char **arg) :
       if (index[nvalue] >= 0) error->all(FLERR, "Fix property/atom vector name already exists");
       if (ReadData::is_data_section(id))
         error->all(FLERR, "Fix property/atom fix ID must not be a data file section name");
-      index[nvalue] = atom->add_custom(&arg[iarg][2], 1, 0);
+      index[nvalue] = atom->add_custom(&arg[iarg][2], 1, 0, border);
       cols[nvalue] = 0;
       values_peratom++;
       nvalue++;
@@ -154,7 +167,7 @@ FixPropertyAtom::FixPropertyAtom(LAMMPS *lmp, int narg, char **arg) :
         which = 1;
         styles[nvalue] = DARRAY;
       }
-      index[nvalue] = atom->add_custom(&arg[iarg][3], which, ncols);
+      index[nvalue] = atom->add_custom(&arg[iarg][3], which, ncols, border);
       cols[nvalue] = ncols;
       values_peratom += ncols;
       nvalue++;
@@ -168,11 +181,8 @@ FixPropertyAtom::FixPropertyAtom(LAMMPS *lmp, int narg, char **arg) :
 
   // optional args
 
-  border = 0;
   while (iarg < narg) {
-    if (strcmp(arg[iarg], "ghost") == 0) {
-      if (iarg + 2 > narg) error->all(FLERR, "Illegal fix property/atom command");
-      border = utils::logical(FLERR, arg[iarg + 1], false, lmp);
+    if (strcmp(arg[iarg], "ghost") == 0) { // skip here, since handled earlier
       iarg += 2;
     } else if (strcmp(arg[iarg], "writedata") == 0) {
       if (iarg + 2 > narg) error->all(FLERR, "Illegal fix property/atom command");
diff --git a/src/fix_recenter.cpp b/src/fix_recenter.cpp
index bee7f55823..4da8c4787b 100644
--- a/src/fix_recenter.cpp
+++ b/src/fix_recenter.cpp
@@ -125,13 +125,12 @@ void FixRecenter::init()
 
   int after = 0;
   int flag = 0;
-  for (int i = 0; i < modify->nfix; i++) {
-    if (strcmp(id,modify->fix[i]->id) == 0) after = 1;
-    else if ((modify->fmask[i] & INITIAL_INTEGRATE) && after) flag = 1;
+  for (const auto &ifix : modify->get_fix_list()) {
+    if (strcmp(id, ifix->id) == 0) after = 1;
+    else if ((modify->get_fix_mask(ifix) & INITIAL_INTEGRATE) && after) flag = 1;
   }
   if (flag && comm->me == 0)
-    error->warning(FLERR,"Fix recenter should come after all other "
-                   "integration fixes");
+    error->warning(FLERR,"Fix recenter should come after all other integration fixes");
 
   masstotal = group->mass(igroup);
 
diff --git a/src/fix_restrain.cpp b/src/fix_restrain.cpp
index f252134aa9..cc95fc93f3 100644
--- a/src/fix_restrain.cpp
+++ b/src/fix_restrain.cpp
@@ -38,9 +38,9 @@ using MathConst::DEG2RAD;
 
 enum{BOND,LBOUND,ANGLE,DIHEDRAL};
 
-#define TOLERANCE 0.05
-#define SMALL 0.001
-#define DELTA 1
+static constexpr double TOLERANCE = 0.05;
+static constexpr double SMALL = 0.001;
+static constexpr int DELTA = 1;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/fix_spring.cpp b/src/fix_spring.cpp
index 3a14ec8de1..93c1f7867d 100644
--- a/src/fix_spring.cpp
+++ b/src/fix_spring.cpp
@@ -30,7 +30,7 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-#define SMALL 1.0e-10
+static constexpr double SMALL = 1.0e-10;
 
 enum{TETHER,COUPLE};
 
diff --git a/src/fix_spring_chunk.cpp b/src/fix_spring_chunk.cpp
index 3deedcffac..f42572b190 100644
--- a/src/fix_spring_chunk.cpp
+++ b/src/fix_spring_chunk.cpp
@@ -30,7 +30,7 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-#define SMALL 1.0e-10
+static constexpr double SMALL = 1.0e-10;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/fix_store_local.cpp b/src/fix_store_local.cpp
index d32f0e8178..8deac03d79 100644
--- a/src/fix_store_local.cpp
+++ b/src/fix_store_local.cpp
@@ -21,7 +21,7 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-#define DELTA 1024
+static constexpr int DELTA = 1024;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/fix_temp_berendsen.h b/src/fix_temp_berendsen.h
index 78ece2af22..f137830508 100644
--- a/src/fix_temp_berendsen.h
+++ b/src/fix_temp_berendsen.h
@@ -38,7 +38,7 @@ class FixTempBerendsen : public Fix {
   void restart(char *buf) override;
   void *extract(const char *, int &) override;
 
- private:
+ protected:
   int which;
   double t_start, t_stop, t_period, t_target;
   double energy;
diff --git a/src/fix_thermal_conductivity.cpp b/src/fix_thermal_conductivity.cpp
index 5fcb59d276..dd674a0506 100644
--- a/src/fix_thermal_conductivity.cpp
+++ b/src/fix_thermal_conductivity.cpp
@@ -29,7 +29,7 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-#define BIG 1.0e10
+static constexpr double BIG = 1.0e10;
 
 /* ---------------------------------------------------------------------- */
 
@@ -108,15 +108,14 @@ int FixThermalConductivity::setmask()
 
 void FixThermalConductivity::init()
 {
-  // warn if any fix ave/spatial comes after this fix
+  // warn if any fix ave/chunk comes after this fix
   // can cause glitch in averaging since ave will happen after swap
 
   int foundme = 0;
-  for (int i = 0; i < modify->nfix; i++) {
-    if (modify->fix[i] == this) foundme = 1;
-    if (foundme && strcmp(modify->fix[i]->style,"ave/spatial") == 0 && me == 0)
-      error->warning(FLERR,
-                     "Fix thermal/conductivity comes before fix ave/spatial");
+  for (const auto &ifix : modify->get_fix_list()) {
+    if (ifix == this) foundme = 1;
+    if (foundme && utils::strmatch(ifix->style,"^ave/chunk") && (me == 0))
+      error->warning(FLERR, "Fix thermal/conductivity comes before fix ave/chunk");
   }
 
   // set bounds of 2 slabs in edim
diff --git a/src/fix_wall.cpp b/src/fix_wall.cpp
index c4f3219622..50289d0f69 100644
--- a/src/fix_wall.cpp
+++ b/src/fix_wall.cpp
@@ -28,7 +28,6 @@ using namespace LAMMPS_NS;
 using namespace FixConst;
 
 enum { XLO = 0, XHI = 1, YLO = 2, YHI = 3, ZLO = 4, ZHI = 5 };
-enum { NONE = 0, EDGE, CONSTANT, VARIABLE };
 
 static const char *wallpos[] = {"xlo", "xhi", "ylo", "yhi", "zlo", "zhi"};
 
diff --git a/src/fix_wall.h b/src/fix_wall.h
index 12ceb17b49..81abfab8ea 100644
--- a/src/fix_wall.h
+++ b/src/fix_wall.h
@@ -27,6 +27,7 @@ class FixWall : public Fix {
   int xstyle[6];
   int xindex[6];
   char *xstr[6];
+  enum { NONE = 0, EDGE, CONSTANT, VARIABLE };
 
   FixWall(class LAMMPS *, int, char **);
   ~FixWall() override;
diff --git a/src/fix_wall_region.cpp b/src/fix_wall_region.cpp
index da9ee757d9..d6fc63f55c 100644
--- a/src/fix_wall_region.cpp
+++ b/src/fix_wall_region.cpp
@@ -38,7 +38,7 @@ enum { LJ93, LJ126, LJ1043, COLLOID, HARMONIC, MORSE };
 FixWallRegion::FixWallRegion(LAMMPS *lmp, int narg, char **arg) :
     Fix(lmp, narg, arg), idregion(nullptr), region(nullptr)
 {
-  if (narg < 8) error->all(FLERR, "Illegal fix wall/region command");
+  if (narg < 8) utils::missing_cmd_args(FLERR, "fix wall/region", error);
 
   scalar_flag = 1;
   vector_flag = 1;
@@ -70,12 +70,12 @@ FixWallRegion::FixWallRegion(LAMMPS *lmp, int narg, char **arg) :
   else if (strcmp(arg[4], "morse") == 0)
     style = MORSE;
   else
-    error->all(FLERR, "Illegal fix wall/region command");
+    error->all(FLERR, "Unknown fix wall/region style {}", arg[4]);
 
   if (style != COLLOID) dynamic_group_allow = 1;
 
   if (style == MORSE) {
-    if (narg != 9) error->all(FLERR, "Illegal fix wall/region command");
+    if (narg != 9) error->all(FLERR, "Illegal fix wall/region morse command");
 
     epsilon = utils::numeric(FLERR, arg[5], false, lmp);
     alpha = utils::numeric(FLERR, arg[6], false, lmp);
@@ -127,7 +127,7 @@ void FixWallRegion::init()
   // ensure all particles in group are extended particles
 
   if (style == COLLOID) {
-    if (!atom->sphere_flag) error->all(FLERR, "Fix wall/region colloid requires atom style sphere");
+    if (!atom->radius_flag) error->all(FLERR, "Fix wall/region colloid requires atom attribute radius");
 
     double *radius = atom->radius;
     int *mask = atom->mask;
@@ -140,7 +140,7 @@ void FixWallRegion::init()
 
     int flagall;
     MPI_Allreduce(&flag, &flagall, 1, MPI_INT, MPI_SUM, world);
-    if (flagall) error->all(FLERR, "Fix wall/region colloid requires extended particles");
+    if (flagall) error->all(FLERR, "Fix wall/region colloid requires only extended particles");
   }
 
   // setup coefficients for each style
diff --git a/src/fmt/args.h b/src/fmt/args.h
index 2d684e7cc1..b77a2d0661 100644
--- a/src/fmt/args.h
+++ b/src/fmt/args.h
@@ -12,7 +12,7 @@
 #include <memory>      // std::unique_ptr
 #include <vector>
 
-#include "core.h"
+#include "format.h"  // std_string_view
 
 FMT_BEGIN_NAMESPACE
 
@@ -22,8 +22,9 @@ template <typename T> struct is_reference_wrapper : std::false_type {};
 template <typename T>
 struct is_reference_wrapper<std::reference_wrapper<T>> : std::true_type {};
 
-template <typename T> const T& unwrap(const T& v) { return v; }
-template <typename T> const T& unwrap(const std::reference_wrapper<T>& v) {
+template <typename T> auto unwrap(const T& v) -> const T& { return v; }
+template <typename T>
+auto unwrap(const std::reference_wrapper<T>& v) -> const T& {
   return static_cast<const T&>(v);
 }
 
@@ -50,7 +51,7 @@ class dynamic_arg_list {
   std::unique_ptr<node<>> head_;
 
  public:
-  template <typename T, typename Arg> const T& push(const Arg& arg) {
+  template <typename T, typename Arg> auto push(const Arg& arg) -> const T& {
     auto new_node = std::unique_ptr<typed_node<T>>(new typed_node<T>(arg));
     auto& value = new_node->value;
     new_node->next = std::move(head_);
@@ -110,14 +111,14 @@ class dynamic_format_arg_store
 
   friend class basic_format_args<Context>;
 
-  unsigned long long get_types() const {
+  auto get_types() const -> unsigned long long {
     return detail::is_unpacked_bit | data_.size() |
            (named_info_.empty()
                 ? 0ULL
                 : static_cast<unsigned long long>(detail::has_named_args_bit));
   }
 
-  const basic_format_arg<Context>* data() const {
+  auto data() const -> const basic_format_arg<Context>* {
     return named_info_.empty() ? data_.data() : data_.data() + 1;
   }
 
diff --git a/src/fmt/chrono.h b/src/fmt/chrono.h
index ff3e1445b9..9d54574e16 100644
--- a/src/fmt/chrono.h
+++ b/src/fmt/chrono.h
@@ -18,7 +18,7 @@
 #include <ostream>
 #include <type_traits>
 
-#include "format.h"
+#include "ostream.h"  // formatbuf
 
 FMT_BEGIN_NAMESPACE
 
@@ -72,7 +72,8 @@ template <typename To, typename From,
           FMT_ENABLE_IF(!std::is_same<From, To>::value &&
                         std::numeric_limits<From>::is_signed ==
                             std::numeric_limits<To>::is_signed)>
-FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) {
+FMT_CONSTEXPR auto lossless_integral_conversion(const From from, int& ec)
+    -> To {
   ec = 0;
   using F = std::numeric_limits<From>;
   using T = std::numeric_limits<To>;
@@ -101,7 +102,8 @@ template <typename To, typename From,
           FMT_ENABLE_IF(!std::is_same<From, To>::value &&
                         std::numeric_limits<From>::is_signed !=
                             std::numeric_limits<To>::is_signed)>
-FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) {
+FMT_CONSTEXPR auto lossless_integral_conversion(const From from, int& ec)
+    -> To {
   ec = 0;
   using F = std::numeric_limits<From>;
   using T = std::numeric_limits<To>;
@@ -133,7 +135,8 @@ FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) {
 
 template <typename To, typename From,
           FMT_ENABLE_IF(std::is_same<From, To>::value)>
-FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) {
+FMT_CONSTEXPR auto lossless_integral_conversion(const From from, int& ec)
+    -> To {
   ec = 0;
   return from;
 }  // function
@@ -154,7 +157,7 @@ FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) {
 // clang-format on
 template <typename To, typename From,
           FMT_ENABLE_IF(!std::is_same<From, To>::value)>
-FMT_CONSTEXPR To safe_float_conversion(const From from, int& ec) {
+FMT_CONSTEXPR auto safe_float_conversion(const From from, int& ec) -> To {
   ec = 0;
   using T = std::numeric_limits<To>;
   static_assert(std::is_floating_point<From>::value, "From must be floating");
@@ -176,7 +179,7 @@ FMT_CONSTEXPR To safe_float_conversion(const From from, int& ec) {
 
 template <typename To, typename From,
           FMT_ENABLE_IF(std::is_same<From, To>::value)>
-FMT_CONSTEXPR To safe_float_conversion(const From from, int& ec) {
+FMT_CONSTEXPR auto safe_float_conversion(const From from, int& ec) -> To {
   ec = 0;
   static_assert(std::is_floating_point<From>::value, "From must be floating");
   return from;
@@ -188,8 +191,8 @@ FMT_CONSTEXPR To safe_float_conversion(const From from, int& ec) {
 template <typename To, typename FromRep, typename FromPeriod,
           FMT_ENABLE_IF(std::is_integral<FromRep>::value),
           FMT_ENABLE_IF(std::is_integral<typename To::rep>::value)>
-To safe_duration_cast(std::chrono::duration<FromRep, FromPeriod> from,
-                      int& ec) {
+auto safe_duration_cast(std::chrono::duration<FromRep, FromPeriod> from,
+                        int& ec) -> To {
   using From = std::chrono::duration<FromRep, FromPeriod>;
   ec = 0;
   // the basic idea is that we need to convert from count() in the from type
@@ -240,8 +243,8 @@ To safe_duration_cast(std::chrono::duration<FromRep, FromPeriod> from,
 template <typename To, typename FromRep, typename FromPeriod,
           FMT_ENABLE_IF(std::is_floating_point<FromRep>::value),
           FMT_ENABLE_IF(std::is_floating_point<typename To::rep>::value)>
-To safe_duration_cast(std::chrono::duration<FromRep, FromPeriod> from,
-                      int& ec) {
+auto safe_duration_cast(std::chrono::duration<FromRep, FromPeriod> from,
+                        int& ec) -> To {
   using From = std::chrono::duration<FromRep, FromPeriod>;
   ec = 0;
   if (std::isnan(from.count())) {
@@ -321,12 +324,12 @@ To safe_duration_cast(std::chrono::duration<FromRep, FromPeriod> from,
 
 namespace detail {
 template <typename T = void> struct null {};
-inline null<> localtime_r FMT_NOMACRO(...) { return null<>(); }
-inline null<> localtime_s(...) { return null<>(); }
-inline null<> gmtime_r(...) { return null<>(); }
-inline null<> gmtime_s(...) { return null<>(); }
+inline auto localtime_r FMT_NOMACRO(...) -> null<> { return null<>(); }
+inline auto localtime_s(...) -> null<> { return null<>(); }
+inline auto gmtime_r(...) -> null<> { return null<>(); }
+inline auto gmtime_s(...) -> null<> { return null<>(); }
 
-inline const std::locale& get_classic_locale() {
+inline auto get_classic_locale() -> const std::locale& {
   static const auto& locale = std::locale::classic();
   return locale;
 }
@@ -336,8 +339,6 @@ template <typename CodeUnit> struct codecvt_result {
   CodeUnit buf[max_size];
   CodeUnit* end;
 };
-template <typename CodeUnit>
-constexpr const size_t codecvt_result<CodeUnit>::max_size;
 
 template <typename CodeUnit>
 void write_codecvt(codecvt_result<CodeUnit>& out, string_view in_buf,
@@ -408,8 +409,7 @@ inline void do_write(buffer<Char>& buf, const std::tm& time,
   auto&& format_buf = formatbuf<std::basic_streambuf<Char>>(buf);
   auto&& os = std::basic_ostream<Char>(&format_buf);
   os.imbue(loc);
-  using iterator = std::ostreambuf_iterator<Char>;
-  const auto& facet = std::use_facet<std::time_put<Char, iterator>>(loc);
+  const auto& facet = std::use_facet<std::time_put<Char>>(loc);
   auto end = facet.put(os, os, Char(' '), &time, format, modifier);
   if (end.failed()) FMT_THROW(format_error("failed to format time"));
 }
@@ -432,6 +432,51 @@ auto write(OutputIt out, const std::tm& time, const std::locale& loc,
   return write_encoded_tm_str(out, string_view(buf.data(), buf.size()), loc);
 }
 
+template <typename Rep1, typename Rep2>
+struct is_same_arithmetic_type
+    : public std::integral_constant<bool,
+                                    (std::is_integral<Rep1>::value &&
+                                     std::is_integral<Rep2>::value) ||
+                                        (std::is_floating_point<Rep1>::value &&
+                                         std::is_floating_point<Rep2>::value)> {
+};
+
+template <
+    typename To, typename FromRep, typename FromPeriod,
+    FMT_ENABLE_IF(is_same_arithmetic_type<FromRep, typename To::rep>::value)>
+auto fmt_duration_cast(std::chrono::duration<FromRep, FromPeriod> from) -> To {
+#if FMT_SAFE_DURATION_CAST
+  // Throwing version of safe_duration_cast is only available for
+  // integer to integer or float to float casts.
+  int ec;
+  To to = safe_duration_cast::safe_duration_cast<To>(from, ec);
+  if (ec) FMT_THROW(format_error("cannot format duration"));
+  return to;
+#else
+  // Standard duration cast, may overflow.
+  return std::chrono::duration_cast<To>(from);
+#endif
+}
+
+template <
+    typename To, typename FromRep, typename FromPeriod,
+    FMT_ENABLE_IF(!is_same_arithmetic_type<FromRep, typename To::rep>::value)>
+auto fmt_duration_cast(std::chrono::duration<FromRep, FromPeriod> from) -> To {
+  // Mixed integer <-> float cast is not supported by safe_duration_cast.
+  return std::chrono::duration_cast<To>(from);
+}
+
+template <typename Duration>
+auto to_time_t(
+    std::chrono::time_point<std::chrono::system_clock, Duration> time_point)
+    -> std::time_t {
+  // Cannot use std::chrono::system_clock::to_time_t since this would first
+  // require a cast to std::chrono::system_clock::time_point, which could
+  // overflow.
+  return fmt_duration_cast<std::chrono::duration<std::time_t>>(
+             time_point.time_since_epoch())
+      .count();
+}
 }  // namespace detail
 
 FMT_BEGIN_EXPORT
@@ -441,29 +486,29 @@ FMT_BEGIN_EXPORT
   expressed in local time. Unlike ``std::localtime``, this function is
   thread-safe on most platforms.
  */
-inline std::tm localtime(std::time_t time) {
+inline auto localtime(std::time_t time) -> std::tm {
   struct dispatcher {
     std::time_t time_;
     std::tm tm_;
 
     dispatcher(std::time_t t) : time_(t) {}
 
-    bool run() {
+    auto run() -> bool {
       using namespace fmt::detail;
       return handle(localtime_r(&time_, &tm_));
     }
 
-    bool handle(std::tm* tm) { return tm != nullptr; }
+    auto handle(std::tm* tm) -> bool { return tm != nullptr; }
 
-    bool handle(detail::null<>) {
+    auto handle(detail::null<>) -> bool {
       using namespace fmt::detail;
       return fallback(localtime_s(&tm_, &time_));
     }
 
-    bool fallback(int res) { return res == 0; }
+    auto fallback(int res) -> bool { return res == 0; }
 
 #if !FMT_MSC_VERSION
-    bool fallback(detail::null<>) {
+    auto fallback(detail::null<>) -> bool {
       using namespace fmt::detail;
       std::tm* tm = std::localtime(&time_);
       if (tm) tm_ = *tm;
@@ -480,8 +525,8 @@ inline std::tm localtime(std::time_t time) {
 #if FMT_USE_LOCAL_TIME
 template <typename Duration>
 inline auto localtime(std::chrono::local_time<Duration> time) -> std::tm {
-  return localtime(std::chrono::system_clock::to_time_t(
-      std::chrono::current_zone()->to_sys(time)));
+  return localtime(
+      detail::to_time_t(std::chrono::current_zone()->to_sys(time)));
 }
 #endif
 
@@ -490,29 +535,29 @@ inline auto localtime(std::chrono::local_time<Duration> time) -> std::tm {
   expressed in Coordinated Universal Time (UTC). Unlike ``std::gmtime``, this
   function is thread-safe on most platforms.
  */
-inline std::tm gmtime(std::time_t time) {
+inline auto gmtime(std::time_t time) -> std::tm {
   struct dispatcher {
     std::time_t time_;
     std::tm tm_;
 
     dispatcher(std::time_t t) : time_(t) {}
 
-    bool run() {
+    auto run() -> bool {
       using namespace fmt::detail;
       return handle(gmtime_r(&time_, &tm_));
     }
 
-    bool handle(std::tm* tm) { return tm != nullptr; }
+    auto handle(std::tm* tm) -> bool { return tm != nullptr; }
 
-    bool handle(detail::null<>) {
+    auto handle(detail::null<>) -> bool {
       using namespace fmt::detail;
       return fallback(gmtime_s(&tm_, &time_));
     }
 
-    bool fallback(int res) { return res == 0; }
+    auto fallback(int res) -> bool { return res == 0; }
 
 #if !FMT_MSC_VERSION
-    bool fallback(detail::null<>) {
+    auto fallback(detail::null<>) -> bool {
       std::tm* tm = std::gmtime(&time_);
       if (tm) tm_ = *tm;
       return tm != nullptr;
@@ -525,9 +570,11 @@ inline std::tm gmtime(std::time_t time) {
   return gt.tm_;
 }
 
-inline std::tm gmtime(
-    std::chrono::time_point<std::chrono::system_clock> time_point) {
-  return gmtime(std::chrono::system_clock::to_time_t(time_point));
+template <typename Duration>
+inline auto gmtime(
+    std::chrono::time_point<std::chrono::system_clock, Duration> time_point)
+    -> std::tm {
+  return gmtime(detail::to_time_t(time_point));
 }
 
 namespace detail {
@@ -566,7 +613,8 @@ inline void write_digit2_separated(char* buf, unsigned a, unsigned b,
   }
 }
 
-template <typename Period> FMT_CONSTEXPR inline const char* get_units() {
+template <typename Period>
+FMT_CONSTEXPR inline auto get_units() -> const char* {
   if (std::is_same<Period, std::atto>::value) return "as";
   if (std::is_same<Period, std::femto>::value) return "fs";
   if (std::is_same<Period, std::pico>::value) return "ps";
@@ -584,8 +632,9 @@ template <typename Period> FMT_CONSTEXPR inline const char* get_units() {
   if (std::is_same<Period, std::tera>::value) return "Ts";
   if (std::is_same<Period, std::peta>::value) return "Ps";
   if (std::is_same<Period, std::exa>::value) return "Es";
-  if (std::is_same<Period, std::ratio<60>>::value) return "m";
+  if (std::is_same<Period, std::ratio<60>>::value) return "min";
   if (std::is_same<Period, std::ratio<3600>>::value) return "h";
+  if (std::is_same<Period, std::ratio<86400>>::value) return "d";
   return nullptr;
 }
 
@@ -621,9 +670,8 @@ auto write_padding(OutputIt out, pad_type pad) -> OutputIt {
 
 // Parses a put_time-like format string and invokes handler actions.
 template <typename Char, typename Handler>
-FMT_CONSTEXPR const Char* parse_chrono_format(const Char* begin,
-                                              const Char* end,
-                                              Handler&& handler) {
+FMT_CONSTEXPR auto parse_chrono_format(const Char* begin, const Char* end,
+                                       Handler&& handler) -> const Char* {
   if (begin == end || *begin == '}') return begin;
   if (*begin != '%') FMT_THROW(format_error("invalid format"));
   auto ptr = begin;
@@ -954,25 +1002,25 @@ struct tm_format_checker : null_chrono_spec_handler<tm_format_checker> {
   FMT_CONSTEXPR void on_tz_name() {}
 };
 
-inline const char* tm_wday_full_name(int wday) {
+inline auto tm_wday_full_name(int wday) -> const char* {
   static constexpr const char* full_name_list[] = {
       "Sunday",   "Monday", "Tuesday", "Wednesday",
       "Thursday", "Friday", "Saturday"};
   return wday >= 0 && wday <= 6 ? full_name_list[wday] : "?";
 }
-inline const char* tm_wday_short_name(int wday) {
+inline auto tm_wday_short_name(int wday) -> const char* {
   static constexpr const char* short_name_list[] = {"Sun", "Mon", "Tue", "Wed",
                                                     "Thu", "Fri", "Sat"};
   return wday >= 0 && wday <= 6 ? short_name_list[wday] : "???";
 }
 
-inline const char* tm_mon_full_name(int mon) {
+inline auto tm_mon_full_name(int mon) -> const char* {
   static constexpr const char* full_name_list[] = {
       "January", "February", "March",     "April",   "May",      "June",
       "July",    "August",   "September", "October", "November", "December"};
   return mon >= 0 && mon <= 11 ? full_name_list[mon] : "?";
 }
-inline const char* tm_mon_short_name(int mon) {
+inline auto tm_mon_short_name(int mon) -> const char* {
   static constexpr const char* short_name_list[] = {
       "Jan", "Feb", "Mar", "Apr", "May", "Jun",
       "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
@@ -1004,21 +1052,21 @@ inline void tzset_once() {
 
 // Converts value to Int and checks that it's in the range [0, upper).
 template <typename T, typename Int, FMT_ENABLE_IF(std::is_integral<T>::value)>
-inline Int to_nonnegative_int(T value, Int upper) {
-  FMT_ASSERT(std::is_unsigned<Int>::value ||
-                 (value >= 0 && to_unsigned(value) <= to_unsigned(upper)),
-             "invalid value");
-  (void)upper;
+inline auto to_nonnegative_int(T value, Int upper) -> Int {
+  if (!std::is_unsigned<Int>::value &&
+      (value < 0 || to_unsigned(value) > to_unsigned(upper))) {
+    FMT_THROW(fmt::format_error("chrono value is out of range"));
+  }
   return static_cast<Int>(value);
 }
 template <typename T, typename Int, FMT_ENABLE_IF(!std::is_integral<T>::value)>
-inline Int to_nonnegative_int(T value, Int upper) {
+inline auto to_nonnegative_int(T value, Int upper) -> Int {
   if (value < 0 || value > static_cast<T>(upper))
     FMT_THROW(format_error("invalid value"));
   return static_cast<Int>(value);
 }
 
-constexpr long long pow10(std::uint32_t n) {
+constexpr auto pow10(std::uint32_t n) -> long long {
   return n == 0 ? 1 : 10 * pow10(n - 1);
 }
 
@@ -1052,13 +1100,12 @@ void write_fractional_seconds(OutputIt& out, Duration d, int precision = -1) {
                                 std::chrono::seconds::rep>::type,
       std::ratio<1, detail::pow10(num_fractional_digits)>>;
 
-  const auto fractional =
-      d - std::chrono::duration_cast<std::chrono::seconds>(d);
+  const auto fractional = d - fmt_duration_cast<std::chrono::seconds>(d);
   const auto subseconds =
       std::chrono::treat_as_floating_point<
           typename subsecond_precision::rep>::value
           ? fractional.count()
-          : std::chrono::duration_cast<subsecond_precision>(fractional).count();
+          : fmt_duration_cast<subsecond_precision>(fractional).count();
   auto n = static_cast<uint32_or_64_or_128_t<long long>>(subseconds);
   const int num_digits = detail::count_digits(n);
 
@@ -1109,11 +1156,11 @@ void write_floating_seconds(memory_buffer& buf, Duration duration,
       num_fractional_digits = 6;
   }
 
-  format_to(std::back_inserter(buf), FMT_STRING("{:.{}f}"),
-            std::fmod(val * static_cast<rep>(Duration::period::num) /
-                          static_cast<rep>(Duration::period::den),
-                      static_cast<rep>(60)),
-            num_fractional_digits);
+  fmt::format_to(std::back_inserter(buf), FMT_STRING("{:.{}f}"),
+                 std::fmod(val * static_cast<rep>(Duration::period::num) /
+                               static_cast<rep>(Duration::period::den),
+                           static_cast<rep>(60)),
+                 num_fractional_digits);
 }
 
 template <typename OutputIt, typename Char,
@@ -1174,8 +1221,7 @@ class tm_writer {
     return static_cast<int>(l);
   }
 
-  // Algorithm:
-  // https://en.wikipedia.org/wiki/ISO_week_date#Calculating_the_week_number_from_a_month_and_day_of_the_month_or_ordinal_date
+  // Algorithm: https://en.wikipedia.org/wiki/ISO_week_date.
   auto iso_year_weeks(long long curr_year) const noexcept -> int {
     const auto prev_year = curr_year - 1;
     const auto curr_p =
@@ -1315,7 +1361,7 @@ class tm_writer {
         subsecs_(subsecs),
         tm_(tm) {}
 
-  OutputIt out() const { return out_; }
+  auto out() const -> OutputIt { return out_; }
 
   FMT_CONSTEXPR void on_text(const Char* begin, const Char* end) {
     out_ = copy_str<Char>(begin, end, out_);
@@ -1579,6 +1625,7 @@ struct chrono_format_checker : null_chrono_spec_handler<chrono_format_checker> {
 
   template <typename Char>
   FMT_CONSTEXPR void on_text(const Char*, const Char*) {}
+  FMT_CONSTEXPR void on_day_of_year() {}
   FMT_CONSTEXPR void on_24_hour(numeric_system, pad_type) {}
   FMT_CONSTEXPR void on_12_hour(numeric_system, pad_type) {}
   FMT_CONSTEXPR void on_minute(numeric_system, pad_type) {}
@@ -1597,16 +1644,16 @@ struct chrono_format_checker : null_chrono_spec_handler<chrono_format_checker> {
 
 template <typename T,
           FMT_ENABLE_IF(std::is_integral<T>::value&& has_isfinite<T>::value)>
-inline bool isfinite(T) {
+inline auto isfinite(T) -> bool {
   return true;
 }
 
 template <typename T, FMT_ENABLE_IF(std::is_integral<T>::value)>
-inline T mod(T x, int y) {
+inline auto mod(T x, int y) -> T {
   return x % static_cast<T>(y);
 }
 template <typename T, FMT_ENABLE_IF(std::is_floating_point<T>::value)>
-inline T mod(T x, int y) {
+inline auto mod(T x, int y) -> T {
   return std::fmod(x, static_cast<T>(y));
 }
 
@@ -1621,49 +1668,38 @@ template <typename T> struct make_unsigned_or_unchanged<T, true> {
   using type = typename std::make_unsigned<T>::type;
 };
 
-#if FMT_SAFE_DURATION_CAST
-// throwing version of safe_duration_cast
-template <typename To, typename FromRep, typename FromPeriod>
-To fmt_safe_duration_cast(std::chrono::duration<FromRep, FromPeriod> from) {
-  int ec;
-  To to = safe_duration_cast::safe_duration_cast<To>(from, ec);
-  if (ec) FMT_THROW(format_error("cannot format duration"));
-  return to;
-}
-#endif
-
 template <typename Rep, typename Period,
           FMT_ENABLE_IF(std::is_integral<Rep>::value)>
-inline std::chrono::duration<Rep, std::milli> get_milliseconds(
-    std::chrono::duration<Rep, Period> d) {
+inline auto get_milliseconds(std::chrono::duration<Rep, Period> d)
+    -> std::chrono::duration<Rep, std::milli> {
   // this may overflow and/or the result may not fit in the
   // target type.
 #if FMT_SAFE_DURATION_CAST
   using CommonSecondsType =
       typename std::common_type<decltype(d), std::chrono::seconds>::type;
-  const auto d_as_common = fmt_safe_duration_cast<CommonSecondsType>(d);
+  const auto d_as_common = fmt_duration_cast<CommonSecondsType>(d);
   const auto d_as_whole_seconds =
-      fmt_safe_duration_cast<std::chrono::seconds>(d_as_common);
+      fmt_duration_cast<std::chrono::seconds>(d_as_common);
   // this conversion should be nonproblematic
   const auto diff = d_as_common - d_as_whole_seconds;
   const auto ms =
-      fmt_safe_duration_cast<std::chrono::duration<Rep, std::milli>>(diff);
+      fmt_duration_cast<std::chrono::duration<Rep, std::milli>>(diff);
   return ms;
 #else
-  auto s = std::chrono::duration_cast<std::chrono::seconds>(d);
-  return std::chrono::duration_cast<std::chrono::milliseconds>(d - s);
+  auto s = fmt_duration_cast<std::chrono::seconds>(d);
+  return fmt_duration_cast<std::chrono::milliseconds>(d - s);
 #endif
 }
 
 template <typename Char, typename Rep, typename OutputIt,
           FMT_ENABLE_IF(std::is_integral<Rep>::value)>
-OutputIt format_duration_value(OutputIt out, Rep val, int) {
+auto format_duration_value(OutputIt out, Rep val, int) -> OutputIt {
   return write<Char>(out, val);
 }
 
 template <typename Char, typename Rep, typename OutputIt,
           FMT_ENABLE_IF(std::is_floating_point<Rep>::value)>
-OutputIt format_duration_value(OutputIt out, Rep val, int precision) {
+auto format_duration_value(OutputIt out, Rep val, int precision) -> OutputIt {
   auto specs = format_specs<Char>();
   specs.precision = precision;
   specs.type = precision >= 0 ? presentation_type::fixed_lower
@@ -1672,12 +1708,12 @@ OutputIt format_duration_value(OutputIt out, Rep val, int precision) {
 }
 
 template <typename Char, typename OutputIt>
-OutputIt copy_unit(string_view unit, OutputIt out, Char) {
+auto copy_unit(string_view unit, OutputIt out, Char) -> OutputIt {
   return std::copy(unit.begin(), unit.end(), out);
 }
 
 template <typename OutputIt>
-OutputIt copy_unit(string_view unit, OutputIt out, wchar_t) {
+auto copy_unit(string_view unit, OutputIt out, wchar_t) -> OutputIt {
   // This works when wchar_t is UTF-32 because units only contain characters
   // that have the same representation in UTF-16 and UTF-32.
   utf8_to_utf16 u(unit);
@@ -1685,7 +1721,7 @@ OutputIt copy_unit(string_view unit, OutputIt out, wchar_t) {
 }
 
 template <typename Char, typename Period, typename OutputIt>
-OutputIt format_duration_unit(OutputIt out) {
+auto format_duration_unit(OutputIt out) -> OutputIt {
   if (const char* unit = get_units<Period>())
     return copy_unit(string_view(unit), out, Char());
   *out++ = '[';
@@ -1752,18 +1788,12 @@ struct chrono_formatter {
 
     // this may overflow and/or the result may not fit in the
     // target type.
-#if FMT_SAFE_DURATION_CAST
     // might need checked conversion (rep!=Rep)
-    auto tmpval = std::chrono::duration<rep, Period>(val);
-    s = fmt_safe_duration_cast<seconds>(tmpval);
-#else
-    s = std::chrono::duration_cast<seconds>(
-        std::chrono::duration<rep, Period>(val));
-#endif
+    s = fmt_duration_cast<seconds>(std::chrono::duration<rep, Period>(val));
   }
 
   // returns true if nan or inf, writes to out.
-  bool handle_nan_inf() {
+  auto handle_nan_inf() -> bool {
     if (isfinite(val)) {
       return false;
     }
@@ -1780,17 +1810,22 @@ struct chrono_formatter {
     return true;
   }
 
-  Rep hour() const { return static_cast<Rep>(mod((s.count() / 3600), 24)); }
+  auto days() const -> Rep { return static_cast<Rep>(s.count() / 86400); }
+  auto hour() const -> Rep {
+    return static_cast<Rep>(mod((s.count() / 3600), 24));
+  }
 
-  Rep hour12() const {
+  auto hour12() const -> Rep {
     Rep hour = static_cast<Rep>(mod((s.count() / 3600), 12));
     return hour <= 0 ? 12 : hour;
   }
 
-  Rep minute() const { return static_cast<Rep>(mod((s.count() / 60), 60)); }
-  Rep second() const { return static_cast<Rep>(mod(s.count(), 60)); }
+  auto minute() const -> Rep {
+    return static_cast<Rep>(mod((s.count() / 60), 60));
+  }
+  auto second() const -> Rep { return static_cast<Rep>(mod(s.count(), 60)); }
 
-  std::tm time() const {
+  auto time() const -> std::tm {
     auto time = std::tm();
     time.tm_hour = to_nonnegative_int(hour(), 24);
     time.tm_min = to_nonnegative_int(minute(), 60);
@@ -1858,10 +1893,14 @@ struct chrono_formatter {
   void on_dec0_week_of_year(numeric_system) {}
   void on_dec1_week_of_year(numeric_system) {}
   void on_iso_week_of_year(numeric_system) {}
-  void on_day_of_year() {}
   void on_day_of_month(numeric_system) {}
   void on_day_of_month_space(numeric_system) {}
 
+  void on_day_of_year() {
+    if (handle_nan_inf()) return;
+    write(days(), 0);
+  }
+
   void on_24_hour(numeric_system ns, pad_type pad) {
     if (handle_nan_inf()) return;
 
@@ -1968,7 +2007,7 @@ class weekday {
   weekday() = default;
   explicit constexpr weekday(unsigned wd) noexcept
       : value(static_cast<unsigned char>(wd != 7 ? wd : 0)) {}
-  constexpr unsigned c_encoding() const noexcept { return value; }
+  constexpr auto c_encoding() const noexcept -> unsigned { return value; }
 };
 
 class year_month_day {};
@@ -2083,25 +2122,22 @@ struct formatter<std::chrono::time_point<std::chrono::system_clock, Duration>,
             period::num != 1 || period::den != 1 ||
             std::is_floating_point<typename Duration::rep>::value)) {
       const auto epoch = val.time_since_epoch();
-      auto subsecs = std::chrono::duration_cast<Duration>(
-          epoch - std::chrono::duration_cast<std::chrono::seconds>(epoch));
+      auto subsecs = detail::fmt_duration_cast<Duration>(
+          epoch - detail::fmt_duration_cast<std::chrono::seconds>(epoch));
 
       if (subsecs.count() < 0) {
         auto second =
-            std::chrono::duration_cast<Duration>(std::chrono::seconds(1));
+            detail::fmt_duration_cast<Duration>(std::chrono::seconds(1));
         if (epoch.count() < ((Duration::min)() + second).count())
           FMT_THROW(format_error("duration is too small"));
         subsecs += second;
         val -= second;
       }
 
-      return formatter<std::tm, Char>::do_format(
-          gmtime(std::chrono::time_point_cast<std::chrono::seconds>(val)), ctx,
-          &subsecs);
+      return formatter<std::tm, Char>::do_format(gmtime(val), ctx, &subsecs);
     }
 
-    return formatter<std::tm, Char>::format(
-        gmtime(std::chrono::time_point_cast<std::chrono::seconds>(val)), ctx);
+    return formatter<std::tm, Char>::format(gmtime(val), ctx);
   }
 };
 
@@ -2120,17 +2156,13 @@ struct formatter<std::chrono::local_time<Duration>, Char>
     if (period::num != 1 || period::den != 1 ||
         std::is_floating_point<typename Duration::rep>::value) {
       const auto epoch = val.time_since_epoch();
-      const auto subsecs = std::chrono::duration_cast<Duration>(
-          epoch - std::chrono::duration_cast<std::chrono::seconds>(epoch));
+      const auto subsecs = detail::fmt_duration_cast<Duration>(
+          epoch - detail::fmt_duration_cast<std::chrono::seconds>(epoch));
 
-      return formatter<std::tm, Char>::do_format(
-          localtime(std::chrono::time_point_cast<std::chrono::seconds>(val)),
-          ctx, &subsecs);
+      return formatter<std::tm, Char>::do_format(localtime(val), ctx, &subsecs);
     }
 
-    return formatter<std::tm, Char>::format(
-        localtime(std::chrono::time_point_cast<std::chrono::seconds>(val)),
-        ctx);
+    return formatter<std::tm, Char>::format(localtime(val), ctx);
   }
 };
 #endif
diff --git a/src/fmt/color.h b/src/fmt/color.h
index 8697e1ca0b..464519e582 100644
--- a/src/fmt/color.h
+++ b/src/fmt/color.h
@@ -233,7 +233,7 @@ class text_style {
   FMT_CONSTEXPR text_style(emphasis em = emphasis()) noexcept
       : set_foreground_color(), set_background_color(), ems(em) {}
 
-  FMT_CONSTEXPR text_style& operator|=(const text_style& rhs) {
+  FMT_CONSTEXPR auto operator|=(const text_style& rhs) -> text_style& {
     if (!set_foreground_color) {
       set_foreground_color = rhs.set_foreground_color;
       foreground_color = rhs.foreground_color;
@@ -257,29 +257,29 @@ class text_style {
     return *this;
   }
 
-  friend FMT_CONSTEXPR text_style operator|(text_style lhs,
-                                            const text_style& rhs) {
+  friend FMT_CONSTEXPR auto operator|(text_style lhs, const text_style& rhs)
+      -> text_style {
     return lhs |= rhs;
   }
 
-  FMT_CONSTEXPR bool has_foreground() const noexcept {
+  FMT_CONSTEXPR auto has_foreground() const noexcept -> bool {
     return set_foreground_color;
   }
-  FMT_CONSTEXPR bool has_background() const noexcept {
+  FMT_CONSTEXPR auto has_background() const noexcept -> bool {
     return set_background_color;
   }
-  FMT_CONSTEXPR bool has_emphasis() const noexcept {
+  FMT_CONSTEXPR auto has_emphasis() const noexcept -> bool {
     return static_cast<uint8_t>(ems) != 0;
   }
-  FMT_CONSTEXPR detail::color_type get_foreground() const noexcept {
+  FMT_CONSTEXPR auto get_foreground() const noexcept -> detail::color_type {
     FMT_ASSERT(has_foreground(), "no foreground specified for this style");
     return foreground_color;
   }
-  FMT_CONSTEXPR detail::color_type get_background() const noexcept {
+  FMT_CONSTEXPR auto get_background() const noexcept -> detail::color_type {
     FMT_ASSERT(has_background(), "no background specified for this style");
     return background_color;
   }
-  FMT_CONSTEXPR emphasis get_emphasis() const noexcept {
+  FMT_CONSTEXPR auto get_emphasis() const noexcept -> emphasis {
     FMT_ASSERT(has_emphasis(), "no emphasis specified for this style");
     return ems;
   }
@@ -297,9 +297,11 @@ class text_style {
     }
   }
 
-  friend FMT_CONSTEXPR text_style fg(detail::color_type foreground) noexcept;
+  friend FMT_CONSTEXPR auto fg(detail::color_type foreground) noexcept
+      -> text_style;
 
-  friend FMT_CONSTEXPR text_style bg(detail::color_type background) noexcept;
+  friend FMT_CONSTEXPR auto bg(detail::color_type background) noexcept
+      -> text_style;
 
   detail::color_type foreground_color;
   detail::color_type background_color;
@@ -309,16 +311,19 @@ class text_style {
 };
 
 /** Creates a text style from the foreground (text) color. */
-FMT_CONSTEXPR inline text_style fg(detail::color_type foreground) noexcept {
+FMT_CONSTEXPR inline auto fg(detail::color_type foreground) noexcept
+    -> text_style {
   return text_style(true, foreground);
 }
 
 /** Creates a text style from the background color. */
-FMT_CONSTEXPR inline text_style bg(detail::color_type background) noexcept {
+FMT_CONSTEXPR inline auto bg(detail::color_type background) noexcept
+    -> text_style {
   return text_style(false, background);
 }
 
-FMT_CONSTEXPR inline text_style operator|(emphasis lhs, emphasis rhs) noexcept {
+FMT_CONSTEXPR inline auto operator|(emphasis lhs, emphasis rhs) noexcept
+    -> text_style {
   return text_style(lhs) | rhs;
 }
 
@@ -384,8 +389,8 @@ template <typename Char> struct ansi_color_escape {
   }
   FMT_CONSTEXPR operator const Char*() const noexcept { return buffer; }
 
-  FMT_CONSTEXPR const Char* begin() const noexcept { return buffer; }
-  FMT_CONSTEXPR_CHAR_TRAITS const Char* end() const noexcept {
+  FMT_CONSTEXPR auto begin() const noexcept -> const Char* { return buffer; }
+  FMT_CONSTEXPR20 auto end() const noexcept -> const Char* {
     return buffer + std::char_traits<Char>::length(buffer);
   }
 
@@ -400,25 +405,27 @@ template <typename Char> struct ansi_color_escape {
     out[2] = static_cast<Char>('0' + c % 10);
     out[3] = static_cast<Char>(delimiter);
   }
-  static FMT_CONSTEXPR bool has_emphasis(emphasis em, emphasis mask) noexcept {
+  static FMT_CONSTEXPR auto has_emphasis(emphasis em, emphasis mask) noexcept
+      -> bool {
     return static_cast<uint8_t>(em) & static_cast<uint8_t>(mask);
   }
 };
 
 template <typename Char>
-FMT_CONSTEXPR ansi_color_escape<Char> make_foreground_color(
-    detail::color_type foreground) noexcept {
+FMT_CONSTEXPR auto make_foreground_color(detail::color_type foreground) noexcept
+    -> ansi_color_escape<Char> {
   return ansi_color_escape<Char>(foreground, "\x1b[38;2;");
 }
 
 template <typename Char>
-FMT_CONSTEXPR ansi_color_escape<Char> make_background_color(
-    detail::color_type background) noexcept {
+FMT_CONSTEXPR auto make_background_color(detail::color_type background) noexcept
+    -> ansi_color_escape<Char> {
   return ansi_color_escape<Char>(background, "\x1b[48;2;");
 }
 
 template <typename Char>
-FMT_CONSTEXPR ansi_color_escape<Char> make_emphasis(emphasis em) noexcept {
+FMT_CONSTEXPR auto make_emphasis(emphasis em) noexcept
+    -> ansi_color_escape<Char> {
   return ansi_color_escape<Char>(em);
 }
 
@@ -427,9 +434,10 @@ template <typename Char> inline void reset_color(buffer<Char>& buffer) {
   buffer.append(reset_color.begin(), reset_color.end());
 }
 
-template <typename T> struct styled_arg {
+template <typename T> struct styled_arg : detail::view {
   const T& value;
   text_style style;
+  styled_arg(const T& v, text_style s) : value(v), style(s) {}
 };
 
 template <typename Char>
@@ -510,9 +518,10 @@ void print(const text_style& ts, const S& format_str, const Args&... args) {
 }
 
 template <typename S, typename Char = char_t<S>>
-inline std::basic_string<Char> vformat(
+inline auto vformat(
     const text_style& ts, const S& format_str,
-    basic_format_args<buffer_context<type_identity_t<Char>>> args) {
+    basic_format_args<buffer_context<type_identity_t<Char>>> args)
+    -> std::basic_string<Char> {
   basic_memory_buffer<Char> buf;
   detail::vformat_to(buf, ts, detail::to_string_view(format_str), args);
   return fmt::to_string(buf);
@@ -531,8 +540,8 @@ inline std::basic_string<Char> vformat(
   \endrst
 */
 template <typename S, typename... Args, typename Char = char_t<S>>
-inline std::basic_string<Char> format(const text_style& ts, const S& format_str,
-                                      const Args&... args) {
+inline auto format(const text_style& ts, const S& format_str,
+                   const Args&... args) -> std::basic_string<Char> {
   return fmt::vformat(ts, detail::to_string_view(format_str),
                       fmt::make_format_args<buffer_context<Char>>(args...));
 }
@@ -542,9 +551,10 @@ inline std::basic_string<Char> format(const text_style& ts, const S& format_str,
  */
 template <typename OutputIt, typename Char,
           FMT_ENABLE_IF(detail::is_output_iterator<OutputIt, Char>::value)>
-OutputIt vformat_to(
-    OutputIt out, const text_style& ts, basic_string_view<Char> format_str,
-    basic_format_args<buffer_context<type_identity_t<Char>>> args) {
+auto vformat_to(OutputIt out, const text_style& ts,
+                basic_string_view<Char> format_str,
+                basic_format_args<buffer_context<type_identity_t<Char>>> args)
+    -> OutputIt {
   auto&& buf = detail::get_buffer<Char>(out);
   detail::vformat_to(buf, ts, format_str, args);
   return detail::get_iterator(buf, out);
@@ -562,9 +572,10 @@ OutputIt vformat_to(
                    fmt::emphasis::bold | fg(fmt::color::red), "{}", 42);
   \endrst
 */
-template <typename OutputIt, typename S, typename... Args,
-          bool enable = detail::is_output_iterator<OutputIt, char_t<S>>::value&&
-              detail::is_string<S>::value>
+template <
+    typename OutputIt, typename S, typename... Args,
+    bool enable = detail::is_output_iterator<OutputIt, char_t<S>>::value &&
+                  detail::is_string<S>::value>
 inline auto format_to(OutputIt out, const text_style& ts, const S& format_str,
                       Args&&... args) ->
     typename std::enable_if<enable, OutputIt>::type {
diff --git a/src/fmt/compile.h b/src/fmt/compile.h
index af76507f07..71fa69c67e 100644
--- a/src/fmt/compile.h
+++ b/src/fmt/compile.h
@@ -14,8 +14,8 @@ FMT_BEGIN_NAMESPACE
 namespace detail {
 
 template <typename Char, typename InputIt>
-FMT_CONSTEXPR inline counting_iterator copy_str(InputIt begin, InputIt end,
-                                                counting_iterator it) {
+FMT_CONSTEXPR inline auto copy_str(InputIt begin, InputIt end,
+                                   counting_iterator it) -> counting_iterator {
   return it + (end - begin);
 }
 
@@ -57,7 +57,7 @@ struct udl_compiled_string : compiled_string {
 #endif
 
 template <typename T, typename... Tail>
-const T& first(const T& value, const Tail&...) {
+auto first(const T& value, const Tail&...) -> const T& {
   return value;
 }
 
@@ -489,18 +489,19 @@ FMT_CONSTEXPR OutputIt format_to(OutputIt out, const S&, Args&&... args) {
 
 template <typename OutputIt, typename S, typename... Args,
           FMT_ENABLE_IF(detail::is_compiled_string<S>::value)>
-format_to_n_result<OutputIt> format_to_n(OutputIt out, size_t n,
-                                         const S& format_str, Args&&... args) {
+auto format_to_n(OutputIt out, size_t n, const S& format_str, Args&&... args)
+    -> format_to_n_result<OutputIt> {
   using traits = detail::fixed_buffer_traits;
   auto buf = detail::iterator_buffer<OutputIt, char, traits>(out, n);
-  format_to(std::back_inserter(buf), format_str, std::forward<Args>(args)...);
+  fmt::format_to(std::back_inserter(buf), format_str,
+                 std::forward<Args>(args)...);
   return {buf.out(), buf.count()};
 }
 
 template <typename S, typename... Args,
           FMT_ENABLE_IF(detail::is_compiled_string<S>::value)>
-FMT_CONSTEXPR20 size_t formatted_size(const S& format_str,
-                                      const Args&... args) {
+FMT_CONSTEXPR20 auto formatted_size(const S& format_str, const Args&... args)
+    -> size_t {
   return fmt::format_to(detail::counting_iterator(), format_str, args...)
       .count();
 }
diff --git a/src/fmt/core.h b/src/fmt/core.h
index 9f7de781bb..6a53b8c52c 100644
--- a/src/fmt/core.h
+++ b/src/fmt/core.h
@@ -8,17 +8,15 @@
 #ifndef FMT_CORE_H_
 #define FMT_CORE_H_
 
-#include <cstddef>  // std::byte
-#include <cstdio>   // std::FILE
-#include <cstring>  // std::strlen
-#include <iterator>
-#include <limits>
-#include <memory>  // std::addressof
-#include <string>
-#include <type_traits>
+#include <cstddef>      // std::byte
+#include <cstdio>       // std::FILE
+#include <cstring>      // std::strlen
+#include <limits.h>     // CHAR_BIT
+#include <string>       // std::string
+#include <type_traits>  // std::enable_if
 
 // The fmt library version in the form major * 10000 + minor * 100 + patch.
-#define FMT_VERSION 100100
+#define FMT_VERSION 100200
 
 #if defined(__clang__) && !defined(__ibmxl__)
 #  define FMT_CLANG_VERSION (__clang_major__ * 100 + __clang_minor__)
@@ -58,6 +56,12 @@
 #  define FMT_MSC_WARNING(...)
 #endif
 
+#ifdef _GLIBCXX_RELEASE
+#  define FMT_GLIBCXX_RELEASE _GLIBCXX_RELEASE
+#else
+#  define FMT_GLIBCXX_RELEASE 0
+#endif
+
 #ifdef _MSVC_LANG
 #  define FMT_CPLUSPLUS _MSVC_LANG
 #else
@@ -88,6 +92,20 @@
 #define FMT_HAS_CPP17_ATTRIBUTE(attribute) \
   (FMT_CPLUSPLUS >= 201703L && FMT_HAS_CPP_ATTRIBUTE(attribute))
 
+#ifndef FMT_DEPRECATED
+#  if FMT_HAS_CPP14_ATTRIBUTE(deprecated) || FMT_MSC_VERSION >= 1900
+#    define FMT_DEPRECATED [[deprecated]]
+#  else
+#    if (defined(__GNUC__) && !defined(__LCC__)) || defined(__clang__)
+#      define FMT_DEPRECATED __attribute__((deprecated))
+#    elif FMT_MSC_VERSION
+#      define FMT_DEPRECATED __declspec(deprecated)
+#    else
+#      define FMT_DEPRECATED /* deprecated */
+#    endif
+#  endif
+#endif
+
 // Check if relaxed C++14 constexpr is supported.
 // GCC doesn't allow throw in constexpr until version 6 (bug 67371).
 #ifndef FMT_USE_CONSTEXPR
@@ -105,30 +123,17 @@
 #  define FMT_CONSTEXPR
 #endif
 
-#if ((FMT_CPLUSPLUS >= 202002L) &&                            \
-     (!defined(_GLIBCXX_RELEASE) || _GLIBCXX_RELEASE > 9)) || \
-    (FMT_CPLUSPLUS >= 201709L && FMT_GCC_VERSION >= 1002)
+#if (FMT_CPLUSPLUS >= 202002L ||                                \
+     (FMT_CPLUSPLUS >= 201709L && FMT_GCC_VERSION >= 1002)) &&  \
+    ((!FMT_GLIBCXX_RELEASE || FMT_GLIBCXX_RELEASE >= 10) &&     \
+     (!defined(_LIBCPP_VERSION) || _LIBCPP_VERSION >= 10000) && \
+     (!FMT_MSC_VERSION || FMT_MSC_VERSION >= 1928)) &&          \
+    defined(__cpp_lib_is_constant_evaluated)
 #  define FMT_CONSTEXPR20 constexpr
 #else
 #  define FMT_CONSTEXPR20
 #endif
 
-// Check if constexpr std::char_traits<>::{compare,length} are supported.
-#if defined(__GLIBCXX__)
-#  if FMT_CPLUSPLUS >= 201703L && defined(_GLIBCXX_RELEASE) && \
-      _GLIBCXX_RELEASE >= 7  // GCC 7+ libstdc++ has _GLIBCXX_RELEASE.
-#    define FMT_CONSTEXPR_CHAR_TRAITS constexpr
-#  endif
-#elif defined(_LIBCPP_VERSION) && FMT_CPLUSPLUS >= 201703L && \
-    _LIBCPP_VERSION >= 4000
-#  define FMT_CONSTEXPR_CHAR_TRAITS constexpr
-#elif FMT_MSC_VERSION >= 1914 && FMT_CPLUSPLUS >= 201703L
-#  define FMT_CONSTEXPR_CHAR_TRAITS constexpr
-#endif
-#ifndef FMT_CONSTEXPR_CHAR_TRAITS
-#  define FMT_CONSTEXPR_CHAR_TRAITS
-#endif
-
 // Check if exceptions are disabled.
 #ifndef FMT_EXCEPTIONS
 #  if (defined(__GNUC__) && !defined(__EXCEPTIONS)) || \
@@ -191,33 +196,25 @@
 #  define FMT_END_EXPORT
 #endif
 
+#if FMT_GCC_VERSION || FMT_CLANG_VERSION
+#  define FMT_VISIBILITY(value) __attribute__((visibility(value)))
+#else
+#  define FMT_VISIBILITY(value)
+#endif
+
 #if !defined(FMT_HEADER_ONLY) && defined(_WIN32)
-#  ifdef FMT_LIB_EXPORT
+#  if defined(FMT_LIB_EXPORT)
 #    define FMT_API __declspec(dllexport)
 #  elif defined(FMT_SHARED)
 #    define FMT_API __declspec(dllimport)
 #  endif
-#else
-#  if defined(FMT_LIB_EXPORT) || defined(FMT_SHARED)
-#    if defined(__GNUC__) || defined(__clang__)
-#      define FMT_API __attribute__((visibility("default")))
-#    endif
-#  endif
+#elif defined(FMT_LIB_EXPORT) || defined(FMT_SHARED)
+#  define FMT_API FMT_VISIBILITY("default")
 #endif
 #ifndef FMT_API
 #  define FMT_API
 #endif
 
-// libc++ supports string_view in pre-c++17.
-#if FMT_HAS_INCLUDE(<string_view>) && \
-    (FMT_CPLUSPLUS >= 201703L || defined(_LIBCPP_VERSION))
-#  include <string_view>
-#  define FMT_USE_STRING_VIEW
-#elif FMT_HAS_INCLUDE("experimental/string_view") && FMT_CPLUSPLUS >= 201402L
-#  include <experimental/string_view>
-#  define FMT_USE_EXPERIMENTAL_STRING_VIEW
-#endif
-
 #ifndef FMT_UNICODE
 #  define FMT_UNICODE !FMT_MSC_VERSION
 #endif
@@ -228,8 +225,9 @@
         __apple_build_version__ >= 14000029L) &&                 \
        FMT_CPLUSPLUS >= 202002L) ||                              \
       (defined(__cpp_consteval) &&                               \
-       (!FMT_MSC_VERSION || _MSC_FULL_VER >= 193030704))
-// consteval is broken in MSVC before VS2022 and Apple clang before 14.
+       (!FMT_MSC_VERSION || FMT_MSC_VERSION >= 1929))
+// consteval is broken in MSVC before VS2019 version 16.10 and Apple clang
+// before 14.
 #    define FMT_CONSTEVAL consteval
 #    define FMT_HAS_CONSTEVAL
 #  else
@@ -248,6 +246,15 @@
 #  endif
 #endif
 
+// GCC < 5 requires this-> in decltype.
+#ifndef FMT_DECLTYPE_THIS
+#  if FMT_GCC_VERSION && FMT_GCC_VERSION < 500
+#    define FMT_DECLTYPE_THIS this->
+#  else
+#    define FMT_DECLTYPE_THIS
+#  endif
+#endif
+
 // Enable minimal optimizations for more compact code in debug mode.
 FMT_GCC_PRAGMA("GCC push_options")
 #if !defined(__OPTIMIZE__) && !defined(__NVCOMPILER) && !defined(__LCC__) && \
@@ -269,20 +276,57 @@ template <typename T>
 using remove_const_t = typename std::remove_const<T>::type;
 template <typename T>
 using remove_cvref_t = typename std::remove_cv<remove_reference_t<T>>::type;
-template <typename T> struct type_identity { using type = T; };
+template <typename T> struct type_identity {
+  using type = T;
+};
 template <typename T> using type_identity_t = typename type_identity<T>::type;
 template <typename T>
 using underlying_t = typename std::underlying_type<T>::type;
 
-// Checks whether T is a container with contiguous storage.
-template <typename T> struct is_contiguous : std::false_type {};
-template <typename Char>
-struct is_contiguous<std::basic_string<Char>> : std::true_type {};
+#if FMT_GCC_VERSION && FMT_GCC_VERSION < 500
+// A workaround for gcc 4.8 to make void_t work in a SFINAE context.
+template <typename...> struct void_t_impl {
+  using type = void;
+};
+template <typename... T> using void_t = typename void_t_impl<T...>::type;
+#else
+template <typename...> using void_t = void;
+#endif
 
 struct monostate {
   constexpr monostate() {}
 };
 
+// An implementation of back_insert_iterator to avoid dependency on <iterator>.
+template <typename Container> class back_insert_iterator {
+ private:
+  Container* container_;
+
+  friend auto get_container(back_insert_iterator it) -> Container& {
+    return *it.container_;
+  }
+
+ public:
+  using difference_type = ptrdiff_t;
+  FMT_UNCHECKED_ITERATOR(back_insert_iterator);
+
+  explicit back_insert_iterator(Container& c) : container_(&c) {}
+
+  auto operator=(const typename Container::value_type& value)
+      -> back_insert_iterator& {
+    container_->push_back(value);
+    return *this;
+  }
+  auto operator*() -> back_insert_iterator& { return *this; }
+  auto operator++() -> back_insert_iterator& { return *this; }
+  auto operator++(int) -> back_insert_iterator { return *this; }
+};
+
+template <typename Container>
+auto back_inserter(Container& c) -> back_insert_iterator<Container> {
+  return {c};
+}
+
 // An enable_if helper to be used in template parameters which results in much
 // shorter symbols: https://godbolt.org/z/sWw4vP. Extra parentheses are needed
 // to workaround a bug in MSVC 2019 (see #1140 and #1186).
@@ -310,10 +354,9 @@ template <typename... T> FMT_CONSTEXPR void ignore_unused(const T&...) {}
 constexpr FMT_INLINE auto is_constant_evaluated(
     bool default_value = false) noexcept -> bool {
 // Workaround for incompatibility between libstdc++ consteval-based
-// std::is_constant_evaluated() implementation and clang-14.
-// https://github.com/fmtlib/fmt/issues/3247
-#if FMT_CPLUSPLUS >= 202002L && defined(_GLIBCXX_RELEASE) && \
-    _GLIBCXX_RELEASE >= 12 &&                                \
+// std::is_constant_evaluated() implementation and clang-14:
+// https://github.com/fmtlib/fmt/issues/3247.
+#if FMT_CPLUSPLUS >= 202002L && FMT_GLIBCXX_RELEASE >= 12 && \
     (FMT_CLANG_VERSION >= 1400 && FMT_CLANG_VERSION < 1500)
   ignore_unused(default_value);
   return __builtin_is_constant_evaluated();
@@ -346,15 +389,6 @@ FMT_NORETURN FMT_API void assert_fail(const char* file, int line,
 #  endif
 #endif
 
-#if defined(FMT_USE_STRING_VIEW)
-template <typename Char> using std_string_view = std::basic_string_view<Char>;
-#elif defined(FMT_USE_EXPERIMENTAL_STRING_VIEW)
-template <typename Char>
-using std_string_view = std::experimental::basic_string_view<Char>;
-#else
-template <typename T> struct std_string_view {};
-#endif
-
 #ifdef FMT_USE_INT128
 // Do nothing.
 #elif defined(__SIZEOF_INT128__) && !defined(__NVCC__) && \
@@ -386,6 +420,15 @@ FMT_CONSTEXPR auto to_unsigned(Int value) ->
   return static_cast<typename std::make_unsigned<Int>::type>(value);
 }
 
+template <typename T, typename Enable = void>
+struct is_string_like : std::false_type {};
+
+// A heuristic to detect std::string and std::string_view.
+template <typename T>
+struct is_string_like<T, void_t<decltype(std::declval<T>().find_first_of(
+                             typename T::value_type(), 0))>> : std::true_type {
+};
+
 FMT_CONSTEXPR inline auto is_utf8() -> bool {
   FMT_MSC_WARNING(suppress : 4566) constexpr unsigned char section[] = "\u00A7";
 
@@ -394,8 +437,33 @@ FMT_CONSTEXPR inline auto is_utf8() -> bool {
   return FMT_UNICODE || (sizeof(section) == 3 && uchar(section[0]) == 0xC2 &&
                          uchar(section[1]) == 0xA7);
 }
+
+template <typename Char> FMT_CONSTEXPR auto length(const Char* s) -> size_t {
+  size_t len = 0;
+  while (*s++) ++len;
+  return len;
+}
+
+template <typename Char>
+FMT_CONSTEXPR auto compare(const Char* s1, const Char* s2, std::size_t n)
+    -> int {
+  for (; n != 0; ++s1, ++s2, --n) {
+    if (*s1 < *s2) return -1;
+    if (*s1 > *s2) return 1;
+  }
+  return 0;
+}
 }  // namespace detail
 
+template <typename Char>
+using basic_string =
+    std::basic_string<Char, std::char_traits<Char>, std::allocator<Char>>;
+
+// Checks whether T is a container with contiguous storage.
+template <typename T> struct is_contiguous : std::false_type {};
+template <typename Char>
+struct is_contiguous<basic_string<Char>> : std::true_type {};
+
 /**
   An implementation of ``std::basic_string_view`` for pre-C++17. It provides a
   subset of the API. ``fmt::basic_string_view`` is used for format strings even
@@ -420,29 +488,25 @@ template <typename Char> class basic_string_view {
       : data_(s), size_(count) {}
 
   /**
-    \rst
-    Constructs a string reference object from a C string computing
-    the size with ``std::char_traits<Char>::length``.
-    \endrst
+    Constructs a string reference object from a C string.
    */
-  FMT_CONSTEXPR_CHAR_TRAITS
+  FMT_CONSTEXPR20
   FMT_INLINE
   basic_string_view(const Char* s)
       : data_(s),
         size_(detail::const_check(std::is_same<Char, char>::value &&
-                                  !detail::is_constant_evaluated(true))
+                                  !detail::is_constant_evaluated(false))
                   ? std::strlen(reinterpret_cast<const char*>(s))
-                  : std::char_traits<Char>::length(s)) {}
+                  : detail::length(s)) {}
 
-  /** Constructs a string reference from a ``std::basic_string`` object. */
-  template <typename Traits, typename Alloc>
-  FMT_CONSTEXPR basic_string_view(
-      const std::basic_string<Char, Traits, Alloc>& s) noexcept
-      : data_(s.data()), size_(s.size()) {}
-
-  template <typename S, FMT_ENABLE_IF(std::is_same<
-                                      S, detail::std_string_view<Char>>::value)>
-  FMT_CONSTEXPR basic_string_view(S s) noexcept
+  /**
+    Constructs a string reference from a ``std::basic_string`` or a
+    ``std::basic_string_view`` object.
+  */
+  template <typename S,
+            FMT_ENABLE_IF(detail::is_string_like<S>::value&& std::is_same<
+                          typename S::value_type, Char>::value)>
+  FMT_CONSTEXPR basic_string_view(const S& s) noexcept
       : data_(s.data()), size_(s.size()) {}
 
   /** Returns a pointer to the string data. */
@@ -463,30 +527,28 @@ template <typename Char> class basic_string_view {
     size_ -= n;
   }
 
-  FMT_CONSTEXPR_CHAR_TRAITS bool starts_with(
-      basic_string_view<Char> sv) const noexcept {
-    return size_ >= sv.size_ &&
-           std::char_traits<Char>::compare(data_, sv.data_, sv.size_) == 0;
+  FMT_CONSTEXPR auto starts_with(basic_string_view<Char> sv) const noexcept
+      -> bool {
+    return size_ >= sv.size_ && detail::compare(data_, sv.data_, sv.size_) == 0;
   }
-  FMT_CONSTEXPR_CHAR_TRAITS bool starts_with(Char c) const noexcept {
-    return size_ >= 1 && std::char_traits<Char>::eq(*data_, c);
+  FMT_CONSTEXPR auto starts_with(Char c) const noexcept -> bool {
+    return size_ >= 1 && *data_ == c;
   }
-  FMT_CONSTEXPR_CHAR_TRAITS bool starts_with(const Char* s) const {
+  FMT_CONSTEXPR auto starts_with(const Char* s) const -> bool {
     return starts_with(basic_string_view<Char>(s));
   }
 
   // Lexicographically compare this string reference to other.
-  FMT_CONSTEXPR_CHAR_TRAITS auto compare(basic_string_view other) const -> int {
+  FMT_CONSTEXPR auto compare(basic_string_view other) const -> int {
     size_t str_size = size_ < other.size_ ? size_ : other.size_;
-    int result = std::char_traits<Char>::compare(data_, other.data_, str_size);
+    int result = detail::compare(data_, other.data_, str_size);
     if (result == 0)
       result = size_ == other.size_ ? 0 : (size_ < other.size_ ? -1 : 1);
     return result;
   }
 
-  FMT_CONSTEXPR_CHAR_TRAITS friend auto operator==(basic_string_view lhs,
-                                                   basic_string_view rhs)
-      -> bool {
+  FMT_CONSTEXPR friend auto operator==(basic_string_view lhs,
+                                       basic_string_view rhs) -> bool {
     return lhs.compare(rhs) == 0;
   }
   friend auto operator!=(basic_string_view lhs, basic_string_view rhs) -> bool {
@@ -526,21 +588,16 @@ template <typename Char, FMT_ENABLE_IF(is_char<Char>::value)>
 FMT_INLINE auto to_string_view(const Char* s) -> basic_string_view<Char> {
   return s;
 }
-template <typename Char, typename Traits, typename Alloc>
-inline auto to_string_view(const std::basic_string<Char, Traits, Alloc>& s)
-    -> basic_string_view<Char> {
-  return s;
+template <typename S, FMT_ENABLE_IF(is_string_like<S>::value)>
+inline auto to_string_view(const S& s)
+    -> basic_string_view<typename S::value_type> {
+  return s;  // std::basic_string[_view]
 }
 template <typename Char>
 constexpr auto to_string_view(basic_string_view<Char> s)
     -> basic_string_view<Char> {
   return s;
 }
-template <typename Char,
-          FMT_ENABLE_IF(!std::is_empty<std_string_view<Char>>::value)>
-inline auto to_string_view(std_string_view<Char> s) -> basic_string_view<Char> {
-  return s;
-}
 template <typename S, FMT_ENABLE_IF(is_compile_string<S>::value)>
 constexpr auto to_string_view(const S& s)
     -> basic_string_view<typename S::char_type> {
@@ -609,10 +666,10 @@ FMT_TYPE_CONSTANT(const Char*, cstring_type);
 FMT_TYPE_CONSTANT(basic_string_view<Char>, string_type);
 FMT_TYPE_CONSTANT(const void*, pointer_type);
 
-constexpr bool is_integral_type(type t) {
+constexpr auto is_integral_type(type t) -> bool {
   return t > type::none_type && t <= type::last_integer_type;
 }
-constexpr bool is_arithmetic_type(type t) {
+constexpr auto is_arithmetic_type(type t) -> bool {
   return t > type::none_type && t <= type::last_numeric_type;
 }
 
@@ -635,21 +692,10 @@ enum {
   cstring_set = set(type::cstring_type),
   pointer_set = set(type::pointer_type)
 };
-
-FMT_NORETURN FMT_API void throw_format_error(const char* message);
-
-struct error_handler {
-  constexpr error_handler() = default;
-
-  // This function is intentionally not constexpr to give a compile-time error.
-  FMT_NORETURN void on_error(const char* message) {
-    throw_format_error(message);
-  }
-};
 }  // namespace detail
 
 /** Throws ``format_error`` with a given message. */
-using detail::throw_format_error;
+FMT_NORETURN FMT_API void throw_format_error(const char* message);
 
 /** String's character type. */
 template <typename S> using char_t = typename detail::char_t_impl<S>::type;
@@ -701,7 +747,7 @@ template <typename Char> class basic_format_parse_context {
    */
   FMT_CONSTEXPR auto next_arg_id() -> int {
     if (next_arg_id_ < 0) {
-      detail::throw_format_error(
+      throw_format_error(
           "cannot switch from manual to automatic argument indexing");
       return 0;
     }
@@ -716,7 +762,7 @@ template <typename Char> class basic_format_parse_context {
    */
   FMT_CONSTEXPR void check_arg_id(int id) {
     if (next_arg_id_ > 0) {
-      detail::throw_format_error(
+      throw_format_error(
           "cannot switch from automatic to manual argument indexing");
       return;
     }
@@ -769,35 +815,6 @@ class compile_parse_context : public basic_format_parse_context<Char> {
   }
 };
 
-// Extracts a reference to the container from back_insert_iterator.
-template <typename Container>
-inline auto get_container(std::back_insert_iterator<Container> it)
-    -> Container& {
-  using base = std::back_insert_iterator<Container>;
-  struct accessor : base {
-    accessor(base b) : base(b) {}
-    using base::container;
-  };
-  return *accessor(it).container;
-}
-
-template <typename Char, typename InputIt, typename OutputIt>
-FMT_CONSTEXPR auto copy_str(InputIt begin, InputIt end, OutputIt out)
-    -> OutputIt {
-  while (begin != end) *out++ = static_cast<Char>(*begin++);
-  return out;
-}
-
-template <typename Char, typename T, typename U,
-          FMT_ENABLE_IF(
-              std::is_same<remove_const_t<T>, U>::value&& is_char<U>::value)>
-FMT_CONSTEXPR auto copy_str(T* begin, T* end, U* out) -> U* {
-  if (is_constant_evaluated()) return copy_str<Char, T*, U*>(begin, end, out);
-  auto size = to_unsigned(end - begin);
-  if (size > 0) memcpy(out, begin, size * sizeof(U));
-  return out + size;
-}
-
 /**
   \rst
   A contiguous memory buffer with an optional growing ability. It is an internal
@@ -810,13 +827,18 @@ template <typename T> class buffer {
   size_t size_;
   size_t capacity_;
 
+  using grow_fun = void (*)(buffer& buf, size_t capacity);
+  grow_fun grow_;
+
  protected:
   // Don't initialize ptr_ since it is not accessed to save a few cycles.
   FMT_MSC_WARNING(suppress : 26495)
-  buffer(size_t sz) noexcept : size_(sz), capacity_(sz) {}
+  FMT_CONSTEXPR buffer(grow_fun grow, size_t sz) noexcept
+      : size_(sz), capacity_(sz), grow_(grow) {}
 
-  FMT_CONSTEXPR20 buffer(T* p = nullptr, size_t sz = 0, size_t cap = 0) noexcept
-      : ptr_(p), size_(sz), capacity_(cap) {}
+  FMT_CONSTEXPR20 buffer(grow_fun grow, T* p = nullptr, size_t sz = 0,
+                         size_t cap = 0) noexcept
+      : ptr_(p), size_(sz), capacity_(cap), grow_(grow) {}
 
   FMT_CONSTEXPR20 ~buffer() = default;
   buffer(buffer&&) = default;
@@ -827,9 +849,6 @@ template <typename T> class buffer {
     capacity_ = buf_capacity;
   }
 
-  /** Increases the buffer capacity to hold at least *capacity* elements. */
-  virtual FMT_CONSTEXPR20 void grow(size_t capacity) = 0;
-
  public:
   using value_type = T;
   using const_reference = const T&;
@@ -868,7 +887,7 @@ template <typename T> class buffer {
   // for at least one additional element either by increasing the capacity or by
   // flushing the buffer if it is full.
   FMT_CONSTEXPR20 void try_reserve(size_t new_capacity) {
-    if (new_capacity > capacity_) grow(new_capacity);
+    if (new_capacity > capacity_) grow_(*this, new_capacity);
   }
 
   FMT_CONSTEXPR20 void push_back(const T& value) {
@@ -917,22 +936,25 @@ class iterator_buffer final : public Traits, public buffer<T> {
   enum { buffer_size = 256 };
   T data_[buffer_size];
 
- protected:
-  FMT_CONSTEXPR20 void grow(size_t) override {
-    if (this->size() == buffer_size) flush();
+  static FMT_CONSTEXPR20 void grow(buffer<T>& buf, size_t) {
+    if (buf.size() == buffer_size) static_cast<iterator_buffer&>(buf).flush();
   }
 
   void flush() {
     auto size = this->size();
     this->clear();
-    out_ = copy_str<T>(data_, data_ + this->limit(size), out_);
+    const T* begin = data_;
+    const T* end = begin + this->limit(size);
+    while (begin != end) *out_++ = *begin++;
   }
 
  public:
   explicit iterator_buffer(OutputIt out, size_t n = buffer_size)
-      : Traits(n), buffer<T>(data_, 0, buffer_size), out_(out) {}
+      : Traits(n), buffer<T>(grow, data_, 0, buffer_size), out_(out) {}
   iterator_buffer(iterator_buffer&& other)
-      : Traits(other), buffer<T>(data_, 0, buffer_size), out_(other.out_) {}
+      : Traits(other),
+        buffer<T>(grow, data_, 0, buffer_size),
+        out_(other.out_) {}
   ~iterator_buffer() { flush(); }
 
   auto out() -> OutputIt {
@@ -951,9 +973,9 @@ class iterator_buffer<T*, T, fixed_buffer_traits> final
   enum { buffer_size = 256 };
   T data_[buffer_size];
 
- protected:
-  FMT_CONSTEXPR20 void grow(size_t) override {
-    if (this->size() == this->capacity()) flush();
+  static FMT_CONSTEXPR20 void grow(buffer<T>& buf, size_t) {
+    if (buf.size() == buf.capacity())
+      static_cast<iterator_buffer&>(buf).flush();
   }
 
   void flush() {
@@ -967,10 +989,10 @@ class iterator_buffer<T*, T, fixed_buffer_traits> final
 
  public:
   explicit iterator_buffer(T* out, size_t n = buffer_size)
-      : fixed_buffer_traits(n), buffer<T>(out, 0, n), out_(out) {}
+      : fixed_buffer_traits(n), buffer<T>(grow, out, 0, n), out_(out) {}
   iterator_buffer(iterator_buffer&& other)
       : fixed_buffer_traits(other),
-        buffer<T>(std::move(other)),
+        buffer<T>(static_cast<iterator_buffer&&>(other)),
         out_(other.out_) {
     if (this->data() != out_) {
       this->set(data_, buffer_size);
@@ -989,38 +1011,37 @@ class iterator_buffer<T*, T, fixed_buffer_traits> final
 };
 
 template <typename T> class iterator_buffer<T*, T> final : public buffer<T> {
- protected:
-  FMT_CONSTEXPR20 void grow(size_t) override {}
-
  public:
-  explicit iterator_buffer(T* out, size_t = 0) : buffer<T>(out, 0, ~size_t()) {}
+  explicit iterator_buffer(T* out, size_t = 0)
+      : buffer<T>([](buffer<T>&, size_t) {}, out, 0, ~size_t()) {}
 
   auto out() -> T* { return &*this->end(); }
 };
 
 // A buffer that writes to a container with the contiguous storage.
 template <typename Container>
-class iterator_buffer<std::back_insert_iterator<Container>,
+class iterator_buffer<back_insert_iterator<Container>,
                       enable_if_t<is_contiguous<Container>::value,
                                   typename Container::value_type>>
     final : public buffer<typename Container::value_type> {
  private:
+  using value_type = typename Container::value_type;
   Container& container_;
 
- protected:
-  FMT_CONSTEXPR20 void grow(size_t capacity) override {
-    container_.resize(capacity);
-    this->set(&container_[0], capacity);
+  static FMT_CONSTEXPR20 void grow(buffer<value_type>& buf, size_t capacity) {
+    auto& self = static_cast<iterator_buffer&>(buf);
+    self.container_.resize(capacity);
+    self.set(&self.container_[0], capacity);
   }
 
  public:
   explicit iterator_buffer(Container& c)
-      : buffer<typename Container::value_type>(c.size()), container_(c) {}
-  explicit iterator_buffer(std::back_insert_iterator<Container> out, size_t = 0)
+      : buffer<value_type>(grow, c.size()), container_(c) {}
+  explicit iterator_buffer(back_insert_iterator<Container> out, size_t = 0)
       : iterator_buffer(get_container(out)) {}
 
-  auto out() -> std::back_insert_iterator<Container> {
-    return std::back_inserter(container_);
+  auto out() -> back_insert_iterator<Container> {
+    return fmt::back_inserter(container_);
   }
 };
 
@@ -1031,15 +1052,14 @@ template <typename T = char> class counting_buffer final : public buffer<T> {
   T data_[buffer_size];
   size_t count_ = 0;
 
- protected:
-  FMT_CONSTEXPR20 void grow(size_t) override {
-    if (this->size() != buffer_size) return;
-    count_ += this->size();
-    this->clear();
+  static FMT_CONSTEXPR20 void grow(buffer<T>& buf, size_t) {
+    if (buf.size() != buffer_size) return;
+    static_cast<counting_buffer&>(buf).count_ += buf.size();
+    buf.clear();
   }
 
  public:
-  counting_buffer() : buffer<T>(data_, 0, buffer_size) {}
+  counting_buffer() : buffer<T>(grow, data_, 0, buffer_size) {}
 
   auto count() -> size_t { return count_ + this->size(); }
 };
@@ -1053,7 +1073,7 @@ FMT_CONSTEXPR void basic_format_parse_context<Char>::do_check_arg_id(int id) {
       (!FMT_GCC_VERSION || FMT_GCC_VERSION >= 1200)) {
     using context = detail::compile_parse_context<Char>;
     if (id >= static_cast<context*>(this)->num_args())
-      detail::throw_format_error("argument not found");
+      throw_format_error("argument not found");
   }
 }
 
@@ -1085,18 +1105,29 @@ template <typename T, typename Context>
 using has_formatter =
     std::is_constructible<typename Context::template formatter_type<T>>;
 
-// An output iterator that appends to a buffer.
-// It is used to reduce symbol sizes for the common case.
-class appender : public std::back_insert_iterator<detail::buffer<char>> {
-  using base = std::back_insert_iterator<detail::buffer<char>>;
+// An output iterator that appends to a buffer. It is used instead of
+// back_insert_iterator to reduce symbol sizes for the common case.
+class appender {
+ private:
+  detail::buffer<char>* buffer_;
+
+  friend auto get_container(appender app) -> detail::buffer<char>& {
+    return *app.buffer_;
+  }
 
  public:
-  using std::back_insert_iterator<detail::buffer<char>>::back_insert_iterator;
-  appender(base it) noexcept : base(it) {}
+  using difference_type = ptrdiff_t;
   FMT_UNCHECKED_ITERATOR(appender);
 
-  auto operator++() noexcept -> appender& { return *this; }
-  auto operator++(int) noexcept -> appender { return *this; }
+  appender(detail::buffer<char>& buf) : buffer_(&buf) {}
+
+  auto operator=(char c) -> appender& {
+    buffer_->push_back(c);
+    return *this;
+  }
+  auto operator*() -> appender& { return *this; }
+  auto operator++() -> appender& { return *this; }
+  auto operator++(int) -> appender { return *this; }
 };
 
 namespace detail {
@@ -1119,7 +1150,7 @@ constexpr auto has_const_formatter() -> bool {
 
 template <typename T>
 using buffer_appender = conditional_t<std::is_same<T, char>::value, appender,
-                                      std::back_insert_iterator<buffer<T>>>;
+                                      back_insert_iterator<buffer<T>>>;
 
 // Maps an output iterator to a buffer.
 template <typename T, typename OutputIt>
@@ -1128,7 +1159,7 @@ auto get_buffer(OutputIt out) -> iterator_buffer<OutputIt, T> {
 }
 template <typename T, typename Buf,
           FMT_ENABLE_IF(std::is_base_of<buffer<char>, Buf>::value)>
-auto get_buffer(std::back_insert_iterator<Buf> out) -> buffer<char>& {
+auto get_buffer(back_insert_iterator<Buf> out) -> buffer<char>& {
   return get_container(out);
 }
 
@@ -1293,7 +1324,13 @@ template <typename Context> class value {
 
   template <typename T> FMT_CONSTEXPR20 FMT_INLINE value(T& val) {
     using value_type = remove_const_t<T>;
-    custom.value = const_cast<value_type*>(std::addressof(val));
+    // T may overload operator& e.g. std::vector<bool>::reference in libc++.
+#ifdef __cpp_if_constexpr
+    if constexpr (std::is_same<decltype(&val), T*>::value)
+      custom.value = const_cast<value_type*>(&val);
+#endif
+    if (!is_constant_evaluated())
+      custom.value = const_cast<char*>(&reinterpret_cast<const char&>(val));
     // Get the formatter type through the context to allow different contexts
     // have different extension points, e.g. `formatter<T>` for `format` and
     // `printf_formatter<T>` for `printf`.
@@ -1314,6 +1351,7 @@ template <typename Context> class value {
     parse_ctx.advance_to(f.parse(parse_ctx));
     using qualified_type =
         conditional_t<has_const_formatter<T, Context>(), const T, T>;
+    // Calling format through a mutable reference is deprecated.
     ctx.advance_to(f.format(*static_cast<qualified_type*>(arg), ctx));
   }
 };
@@ -1327,7 +1365,7 @@ using ulong_type = conditional_t<long_short, unsigned, unsigned long long>;
 template <typename T> struct format_as_result {
   template <typename U,
             FMT_ENABLE_IF(std::is_enum<U>::value || std::is_class<U>::value)>
-  static auto map(U*) -> decltype(format_as(std::declval<U>()));
+  static auto map(U*) -> remove_cvref_t<decltype(format_as(std::declval<U>()))>;
   static auto map(...) -> void;
 
   using type = decltype(map(static_cast<T*>(nullptr)));
@@ -1444,7 +1482,8 @@ template <typename Context> struct arg_mapper {
   // Only map owning types because mapping views can be unsafe.
   template <typename T, typename U = format_as_t<T>,
             FMT_ENABLE_IF(std::is_arithmetic<U>::value)>
-  FMT_CONSTEXPR FMT_INLINE auto map(const T& val) -> decltype(this->map(U())) {
+  FMT_CONSTEXPR FMT_INLINE auto map(const T& val)
+      -> decltype(FMT_DECLTYPE_THIS map(U())) {
     return map(format_as(val));
   }
 
@@ -1468,13 +1507,14 @@ template <typename Context> struct arg_mapper {
                           !is_string<U>::value && !is_char<U>::value &&
                           !is_named_arg<U>::value &&
                           !std::is_arithmetic<format_as_t<U>>::value)>
-  FMT_CONSTEXPR FMT_INLINE auto map(T& val) -> decltype(this->do_map(val)) {
+  FMT_CONSTEXPR FMT_INLINE auto map(T& val)
+      -> decltype(FMT_DECLTYPE_THIS do_map(val)) {
     return do_map(val);
   }
 
   template <typename T, FMT_ENABLE_IF(is_named_arg<T>::value)>
   FMT_CONSTEXPR FMT_INLINE auto map(const T& named_arg)
-      -> decltype(this->map(named_arg.value)) {
+      -> decltype(FMT_DECLTYPE_THIS map(named_arg.value)) {
     return map(named_arg.value);
   }
 
@@ -1493,45 +1533,19 @@ enum { max_packed_args = 62 / packed_arg_bits };
 enum : unsigned long long { is_unpacked_bit = 1ULL << 63 };
 enum : unsigned long long { has_named_args_bit = 1ULL << 62 };
 
-template <typename Char, typename InputIt>
-auto copy_str(InputIt begin, InputIt end, appender out) -> appender {
-  get_container(out).append(begin, end);
-  return out;
-}
-template <typename Char, typename InputIt>
-auto copy_str(InputIt begin, InputIt end,
-              std::back_insert_iterator<std::string> out)
-    -> std::back_insert_iterator<std::string> {
-  get_container(out).append(begin, end);
-  return out;
-}
-
-template <typename Char, typename R, typename OutputIt>
-FMT_CONSTEXPR auto copy_str(R&& rng, OutputIt out) -> OutputIt {
-  return detail::copy_str<Char>(rng.begin(), rng.end(), out);
-}
-
-#if FMT_GCC_VERSION && FMT_GCC_VERSION < 500
-// A workaround for gcc 4.8 to make void_t work in a SFINAE context.
-template <typename...> struct void_t_impl { using type = void; };
-template <typename... T> using void_t = typename void_t_impl<T...>::type;
-#else
-template <typename...> using void_t = void;
-#endif
-
 template <typename It, typename T, typename Enable = void>
 struct is_output_iterator : std::false_type {};
 
+template <> struct is_output_iterator<appender, char> : std::true_type {};
+
 template <typename It, typename T>
 struct is_output_iterator<
-    It, T,
-    void_t<typename std::iterator_traits<It>::iterator_category,
-           decltype(*std::declval<It>() = std::declval<T>())>>
+    It, T, void_t<decltype(*std::declval<It&>()++ = std::declval<T>())>>
     : std::true_type {};
 
 template <typename It> struct is_back_insert_iterator : std::false_type {};
 template <typename Container>
-struct is_back_insert_iterator<std::back_insert_iterator<Container>>
+struct is_back_insert_iterator<back_insert_iterator<Container>>
     : std::true_type {};
 
 // A type-erased reference to an std::locale to avoid a heavy <locale> include.
@@ -1607,8 +1621,8 @@ FMT_CONSTEXPR inline auto make_arg(T& val) -> basic_format_arg<Context> {
 }  // namespace detail
 FMT_BEGIN_EXPORT
 
-// A formatting argument. It is a trivially copyable/constructible type to
-// allow storage in basic_memory_buffer.
+// A formatting argument. Context is a template parameter for the compiled API
+// where output can be unbuffered.
 template <typename Context> class basic_format_arg {
  private:
   detail::value<Context> value_;
@@ -1618,11 +1632,6 @@ template <typename Context> class basic_format_arg {
   friend FMT_CONSTEXPR auto detail::make_arg(T& value)
       -> basic_format_arg<ContextType>;
 
-  template <typename Visitor, typename Ctx>
-  friend FMT_CONSTEXPR auto visit_format_arg(Visitor&& vis,
-                                             const basic_format_arg<Ctx>& arg)
-      -> decltype(vis(0));
-
   friend class basic_format_args<Context>;
   friend class dynamic_format_arg_store<Context>;
 
@@ -1660,55 +1669,68 @@ template <typename Context> class basic_format_arg {
   auto is_arithmetic() const -> bool {
     return detail::is_arithmetic_type(type_);
   }
+
+  /**
+    \rst
+    Visits an argument dispatching to the appropriate visit method based on
+    the argument type. For example, if the argument type is ``double`` then
+    ``vis(value)`` will be called with the value of type ``double``.
+    \endrst
+  */
+  template <typename Visitor>
+  FMT_CONSTEXPR auto visit(Visitor&& vis) -> decltype(vis(0)) {
+    switch (type_) {
+    case detail::type::none_type:
+      break;
+    case detail::type::int_type:
+      return vis(value_.int_value);
+    case detail::type::uint_type:
+      return vis(value_.uint_value);
+    case detail::type::long_long_type:
+      return vis(value_.long_long_value);
+    case detail::type::ulong_long_type:
+      return vis(value_.ulong_long_value);
+    case detail::type::int128_type:
+      return vis(detail::convert_for_visit(value_.int128_value));
+    case detail::type::uint128_type:
+      return vis(detail::convert_for_visit(value_.uint128_value));
+    case detail::type::bool_type:
+      return vis(value_.bool_value);
+    case detail::type::char_type:
+      return vis(value_.char_value);
+    case detail::type::float_type:
+      return vis(value_.float_value);
+    case detail::type::double_type:
+      return vis(value_.double_value);
+    case detail::type::long_double_type:
+      return vis(value_.long_double_value);
+    case detail::type::cstring_type:
+      return vis(value_.string.data);
+    case detail::type::string_type:
+      using sv = basic_string_view<typename Context::char_type>;
+      return vis(sv(value_.string.data, value_.string.size));
+    case detail::type::pointer_type:
+      return vis(value_.pointer);
+    case detail::type::custom_type:
+      return vis(typename basic_format_arg<Context>::handle(value_.custom));
+    }
+    return vis(monostate());
+  }
+
+  FMT_INLINE auto format_custom(const char_type* parse_begin,
+                                typename Context::parse_context_type& parse_ctx,
+                                Context& ctx) -> bool {
+    if (type_ != detail::type::custom_type) return false;
+    parse_ctx.advance_to(parse_begin);
+    value_.custom.format(value_.custom.value, parse_ctx, ctx);
+    return true;
+  }
 };
 
-/**
-  \rst
-  Visits an argument dispatching to the appropriate visit method based on
-  the argument type. For example, if the argument type is ``double`` then
-  ``vis(value)`` will be called with the value of type ``double``.
-  \endrst
- */
-// DEPRECATED!
 template <typename Visitor, typename Context>
-FMT_CONSTEXPR FMT_INLINE auto visit_format_arg(
+FMT_DEPRECATED FMT_CONSTEXPR FMT_INLINE auto visit_format_arg(
     Visitor&& vis, const basic_format_arg<Context>& arg) -> decltype(vis(0)) {
-  switch (arg.type_) {
-  case detail::type::none_type:
-    break;
-  case detail::type::int_type:
-    return vis(arg.value_.int_value);
-  case detail::type::uint_type:
-    return vis(arg.value_.uint_value);
-  case detail::type::long_long_type:
-    return vis(arg.value_.long_long_value);
-  case detail::type::ulong_long_type:
-    return vis(arg.value_.ulong_long_value);
-  case detail::type::int128_type:
-    return vis(detail::convert_for_visit(arg.value_.int128_value));
-  case detail::type::uint128_type:
-    return vis(detail::convert_for_visit(arg.value_.uint128_value));
-  case detail::type::bool_type:
-    return vis(arg.value_.bool_value);
-  case detail::type::char_type:
-    return vis(arg.value_.char_value);
-  case detail::type::float_type:
-    return vis(arg.value_.float_value);
-  case detail::type::double_type:
-    return vis(arg.value_.double_value);
-  case detail::type::long_double_type:
-    return vis(arg.value_.long_double_value);
-  case detail::type::cstring_type:
-    return vis(arg.value_.string.data);
-  case detail::type::string_type:
-    using sv = basic_string_view<typename Context::char_type>;
-    return vis(sv(arg.value_.string.data, arg.value_.string.size));
-  case detail::type::pointer_type:
-    return vis(arg.value_.pointer);
-  case detail::type::custom_type:
-    return vis(typename basic_format_arg<Context>::handle(arg.value_.custom));
-  }
-  return vis(monostate());
+  return arg.visit(std::forward<Visitor>(vis));
 }
 
 // Formatting context.
@@ -1748,8 +1770,8 @@ template <typename OutputIt, typename Char> class basic_format_context {
   }
   auto args() const -> const format_args& { return args_; }
 
-  FMT_CONSTEXPR auto error_handler() -> detail::error_handler { return {}; }
-  void on_error(const char* message) { error_handler().on_error(message); }
+  // This function is intentionally not constexpr to give a compile-time error.
+  void on_error(const char* message) { throw_format_error(message); }
 
   // Returns an iterator to the beginning of the output range.
   FMT_CONSTEXPR auto out() -> iterator { return out_; }
@@ -1831,7 +1853,7 @@ class format_arg_store
 // Arguments are taken by lvalue references to avoid some lifetime issues.
 template <typename Context = format_context, typename... T>
 constexpr auto make_format_args(T&... args)
-    -> format_arg_store<Context, remove_cvref_t<T>...> {
+    -> format_arg_store<Context, remove_const_t<T>...> {
   return {args...};
 }
 
@@ -2107,11 +2129,8 @@ struct dynamic_format_specs : format_specs<Char> {
 };
 
 // Converts a character to ASCII. Returns '\0' on conversion failure.
-template <typename Char, FMT_ENABLE_IF(std::is_integral<Char>::value)>
-constexpr auto to_ascii(Char c) -> char {
-  return c <= 0xff ? static_cast<char>(c) : '\0';
-}
-template <typename Char, FMT_ENABLE_IF(std::is_enum<Char>::value)>
+template <typename Char, FMT_ENABLE_IF(std::is_integral<Char>::value ||
+                                       std::is_enum<Char>::value)>
 constexpr auto to_ascii(Char c) -> char {
   return c <= 0xff ? static_cast<char>(c) : '\0';
 }
@@ -2156,11 +2175,11 @@ FMT_CONSTEXPR auto parse_nonnegative_int(const Char*& begin, const Char* end,
   } while (p != end && '0' <= *p && *p <= '9');
   auto num_digits = p - begin;
   begin = p;
-  if (num_digits <= std::numeric_limits<int>::digits10)
-    return static_cast<int>(value);
+  int digits10 = static_cast<int>(sizeof(int) * CHAR_BIT * 3 / 10);
+  if (num_digits <= digits10) return static_cast<int>(value);
   // Check for overflow.
-  const unsigned max = to_unsigned((std::numeric_limits<int>::max)());
-  return num_digits == std::numeric_limits<int>::digits10 + 1 &&
+  unsigned max = INT_MAX;
+  return num_digits == digits10 + 1 &&
                  prev * 10ull + unsigned(p[-1] - '0') <= max
              ? static_cast<int>(value)
              : error_value;
@@ -2188,9 +2207,8 @@ FMT_CONSTEXPR auto do_parse_arg_id(const Char* begin, const Char* end,
   Char c = *begin;
   if (c >= '0' && c <= '9') {
     int index = 0;
-    constexpr int max = (std::numeric_limits<int>::max)();
     if (c != '0')
-      index = parse_nonnegative_int(begin, end, max);
+      index = parse_nonnegative_int(begin, end, INT_MAX);
     else
       ++begin;
     if (begin == end || (*begin != '}' && *begin != ':'))
@@ -2309,9 +2327,12 @@ FMT_CONSTEXPR FMT_INLINE auto parse_format_specs(
     dynamic_format_specs<Char>& specs;
     type arg_type;
 
-    FMT_CONSTEXPR auto operator()(pres type, int set) -> const Char* {
-      if (!in(arg_type, set)) throw_format_error("invalid format specifier");
-      specs.type = type;
+    FMT_CONSTEXPR auto operator()(pres pres_type, int set) -> const Char* {
+      if (!in(arg_type, set)) {
+        if (arg_type == type::none_type) return begin;
+        throw_format_error("invalid format specifier");
+      }
+      specs.type = pres_type;
       return begin + 1;
     }
   } parse_presentation_type{begin, specs, arg_type};
@@ -2328,6 +2349,7 @@ FMT_CONSTEXPR FMT_INLINE auto parse_format_specs(
     case '+':
     case '-':
     case ' ':
+      if (arg_type == type::none_type) return begin;
       enter_state(state::sign, in(arg_type, sint_set | float_set));
       switch (c) {
       case '+':
@@ -2343,14 +2365,17 @@ FMT_CONSTEXPR FMT_INLINE auto parse_format_specs(
       ++begin;
       break;
     case '#':
+      if (arg_type == type::none_type) return begin;
       enter_state(state::hash, is_arithmetic_type(arg_type));
       specs.alt = true;
       ++begin;
       break;
     case '0':
       enter_state(state::zero);
-      if (!is_arithmetic_type(arg_type))
+      if (!is_arithmetic_type(arg_type)) {
+        if (arg_type == type::none_type) return begin;
         throw_format_error("format specifier requires numeric argument");
+      }
       if (specs.align == align::none) {
         // Ignore 0 if align is specified for compatibility with std::format.
         specs.align = align::numeric;
@@ -2372,12 +2397,14 @@ FMT_CONSTEXPR FMT_INLINE auto parse_format_specs(
       begin = parse_dynamic_spec(begin, end, specs.width, specs.width_ref, ctx);
       break;
     case '.':
+      if (arg_type == type::none_type) return begin;
       enter_state(state::precision,
                   in(arg_type, float_set | string_set | cstring_set));
       begin = parse_precision(begin, end, specs.precision, specs.precision_ref,
                               ctx);
       break;
     case 'L':
+      if (arg_type == type::none_type) return begin;
       enter_state(state::locale, is_arithmetic_type(arg_type));
       specs.localized = true;
       ++begin;
@@ -2411,6 +2438,8 @@ FMT_CONSTEXPR FMT_INLINE auto parse_format_specs(
     case 'G':
       return parse_presentation_type(pres::general_upper, float_set);
     case 'c':
+      if (arg_type == type::bool_type)
+        throw_format_error("invalid format specifier");
       return parse_presentation_type(pres::chr, integral_set);
     case 's':
       return parse_presentation_type(pres::string,
@@ -2550,9 +2579,9 @@ FMT_CONSTEXPR auto parse_format_specs(ParseContext& ctx)
       decltype(arg_mapper<context>().map(std::declval<const T&>())),
       typename strip_named_arg<T>::type>;
 // LAMMPS customization. Fails to compile with (some) Intel compilers
-#if defined(__cpp_if_constexpr) && 0
-  if constexpr (std::is_default_constructible_v<
-                    formatter<mapped_type, char_type>>) {
+#if defined(__cpp_if_constexpr) && 1
+  if constexpr (std::is_default_constructible<
+                    formatter<mapped_type, char_type>>::value) {
     return formatter<mapped_type, char_type>().parse(ctx);
   } else {
     type_is_unformattable_for<T, char_type> _;
@@ -2675,9 +2704,11 @@ void check_format_string(S format_str) {
 
 template <typename Char = char> struct vformat_args {
   using type = basic_format_args<
-      basic_format_context<std::back_insert_iterator<buffer<Char>>, Char>>;
+      basic_format_context<back_insert_iterator<buffer<Char>>, Char>>;
+};
+template <> struct vformat_args<char> {
+  using type = format_args;
 };
-template <> struct vformat_args<char> { using type = format_args; };
 
 // Use vformat_args and avoid type_identity to keep symbols short.
 template <typename Char>
@@ -2779,7 +2810,7 @@ using format_string = basic_format_string<char, type_identity_t<Args>...>;
 inline auto runtime(string_view s) -> runtime_format_string<> { return {{s}}; }
 #endif
 
-FMT_API auto vformat(string_view fmt, format_args args) -> std::string;
+FMT_API auto vformat(string_view fmt, format_args args) -> basic_string<char>;
 
 /**
   \rst
@@ -2794,7 +2825,7 @@ FMT_API auto vformat(string_view fmt, format_args args) -> std::string;
 */
 template <typename... T>
 FMT_NODISCARD FMT_INLINE auto format(format_string<T...> fmt, T&&... args)
-    -> std::string {
+    -> basic_string<char> {
   return vformat(fmt, fmt::make_format_args(args...));
 }
 
@@ -2816,7 +2847,7 @@ auto vformat_to(OutputIt out, string_view fmt, format_args args) -> OutputIt {
  **Example**::
 
    auto out = std::vector<char>();
-   fmt::format_to(std::back_inserter(out), "{}", 42);
+   fmt::format_to(fmt::back_inserter(out), "{}", 42);
  \endrst
  */
 template <typename OutputIt, typename... T,
diff --git a/src/fmt/format-inl.h b/src/fmt/format-inl.h
index dac2d437a4..8da1c17f36 100644
--- a/src/fmt/format-inl.h
+++ b/src/fmt/format-inl.h
@@ -18,7 +18,7 @@
 #  include <locale>
 #endif
 
-#ifdef _WIN32
+#if defined(_WIN32) && !defined(FMT_WINDOWS_NO_WCHAR)
 #  include <io.h>  // _isatty
 #endif
 
@@ -36,10 +36,6 @@ FMT_FUNC void assert_fail(const char* file, int line, const char* message) {
   std::terminate();
 }
 
-FMT_FUNC void throw_format_error(const char* message) {
-  FMT_THROW(format_error(message));
-}
-
 FMT_FUNC void format_error_code(detail::buffer<char>& out, int error_code,
                                 string_view message) noexcept {
   // Report error code making sure that the output fits into
@@ -58,8 +54,8 @@ FMT_FUNC void format_error_code(detail::buffer<char>& out, int error_code,
   error_code_size += detail::to_unsigned(detail::count_digits(abs_value));
   auto it = buffer_appender<char>(out);
   if (message.size() <= inline_buffer_size - error_code_size)
-    format_to(it, FMT_STRING("{}{}"), message, SEP);
-  format_to(it, FMT_STRING("{}{}"), ERROR_STR, error_code);
+    fmt::format_to(it, FMT_STRING("{}{}"), message, SEP);
+  fmt::format_to(it, FMT_STRING("{}{}"), ERROR_STR, error_code);
   FMT_ASSERT(out.size() <= inline_buffer_size, "");
 }
 
@@ -73,9 +69,8 @@ FMT_FUNC void report_error(format_func func, int error_code,
 }
 
 // A wrapper around fwrite that throws on error.
-inline void fwrite_fully(const void* ptr, size_t size, size_t count,
-                         FILE* stream) {
-  size_t written = std::fwrite(ptr, size, count, stream);
+inline void fwrite_fully(const void* ptr, size_t count, FILE* stream) {
+  size_t written = std::fwrite(ptr, 1, count, stream);
   if (written < count)
     FMT_THROW(system_error(errno, FMT_STRING("cannot write to file")));
 }
@@ -86,7 +81,7 @@ locale_ref::locale_ref(const Locale& loc) : locale_(&loc) {
   static_assert(std::is_same<Locale, std::locale>::value, "");
 }
 
-template <typename Locale> Locale locale_ref::get() const {
+template <typename Locale> auto locale_ref::get() const -> Locale {
   static_assert(std::is_same<Locale, std::locale>::value, "");
   return locale_ ? *static_cast<const std::locale*>(locale_) : std::locale();
 }
@@ -98,7 +93,8 @@ FMT_FUNC auto thousands_sep_impl(locale_ref loc) -> thousands_sep_result<Char> {
   auto thousands_sep = grouping.empty() ? Char() : facet.thousands_sep();
   return {std::move(grouping), thousands_sep};
 }
-template <typename Char> FMT_FUNC Char decimal_point_impl(locale_ref loc) {
+template <typename Char>
+FMT_FUNC auto decimal_point_impl(locale_ref loc) -> Char {
   return std::use_facet<std::numpunct<Char>>(loc.get<std::locale>())
       .decimal_point();
 }
@@ -127,6 +123,10 @@ FMT_FUNC auto write_loc(appender out, loc_value value,
 }
 }  // namespace detail
 
+FMT_FUNC void throw_format_error(const char* message) {
+  FMT_THROW(format_error(message));
+}
+
 template <typename Locale> typename Locale::id format_facet<Locale>::id;
 
 #ifndef FMT_STATIC_THOUSANDS_SEPARATOR
@@ -144,24 +144,25 @@ FMT_API FMT_FUNC auto format_facet<std::locale>::do_put(
 }
 #endif
 
-FMT_FUNC std::system_error vsystem_error(int error_code, string_view fmt,
-                                         format_args args) {
+FMT_FUNC auto vsystem_error(int error_code, string_view fmt, format_args args)
+    -> std::system_error {
   auto ec = std::error_code(error_code, std::generic_category());
   return std::system_error(ec, vformat(fmt, args));
 }
 
 namespace detail {
 
-template <typename F> inline bool operator==(basic_fp<F> x, basic_fp<F> y) {
+template <typename F>
+inline auto operator==(basic_fp<F> x, basic_fp<F> y) -> bool {
   return x.f == y.f && x.e == y.e;
 }
 
 // Compilers should be able to optimize this into the ror instruction.
-FMT_CONSTEXPR inline uint32_t rotr(uint32_t n, uint32_t r) noexcept {
+FMT_CONSTEXPR inline auto rotr(uint32_t n, uint32_t r) noexcept -> uint32_t {
   r &= 31;
   return (n >> r) | (n << (32 - r));
 }
-FMT_CONSTEXPR inline uint64_t rotr(uint64_t n, uint32_t r) noexcept {
+FMT_CONSTEXPR inline auto rotr(uint64_t n, uint32_t r) noexcept -> uint64_t {
   r &= 63;
   return (n >> r) | (n << (64 - r));
 }
@@ -170,14 +171,14 @@ FMT_CONSTEXPR inline uint64_t rotr(uint64_t n, uint32_t r) noexcept {
 namespace dragonbox {
 // Computes upper 64 bits of multiplication of a 32-bit unsigned integer and a
 // 64-bit unsigned integer.
-inline uint64_t umul96_upper64(uint32_t x, uint64_t y) noexcept {
+inline auto umul96_upper64(uint32_t x, uint64_t y) noexcept -> uint64_t {
   return umul128_upper64(static_cast<uint64_t>(x) << 32, y);
 }
 
 // Computes lower 128 bits of multiplication of a 64-bit unsigned integer and a
 // 128-bit unsigned integer.
-inline uint128_fallback umul192_lower128(uint64_t x,
-                                         uint128_fallback y) noexcept {
+inline auto umul192_lower128(uint64_t x, uint128_fallback y) noexcept
+    -> uint128_fallback {
   uint64_t high = x * y.high();
   uint128_fallback high_low = umul128(x, y.low());
   return {high + high_low.high(), high_low.low()};
@@ -185,12 +186,12 @@ inline uint128_fallback umul192_lower128(uint64_t x,
 
 // Computes lower 64 bits of multiplication of a 32-bit unsigned integer and a
 // 64-bit unsigned integer.
-inline uint64_t umul96_lower64(uint32_t x, uint64_t y) noexcept {
+inline auto umul96_lower64(uint32_t x, uint64_t y) noexcept -> uint64_t {
   return x * y;
 }
 
 // Various fast log computations.
-inline int floor_log10_pow2_minus_log10_4_over_3(int e) noexcept {
+inline auto floor_log10_pow2_minus_log10_4_over_3(int e) noexcept -> int {
   FMT_ASSERT(e <= 2936 && e >= -2985, "too large exponent");
   return (e * 631305 - 261663) >> 21;
 }
@@ -204,7 +205,7 @@ FMT_INLINE_VARIABLE constexpr struct {
 // divisible by pow(10, N).
 // Precondition: n <= pow(10, N + 1).
 template <int N>
-bool check_divisibility_and_divide_by_pow10(uint32_t& n) noexcept {
+auto check_divisibility_and_divide_by_pow10(uint32_t& n) noexcept -> bool {
   // The numbers below are chosen such that:
   //   1. floor(n/d) = floor(nm / 2^k) where d=10 or d=100,
   //   2. nm mod 2^k < m if and only if n is divisible by d,
@@ -229,7 +230,7 @@ bool check_divisibility_and_divide_by_pow10(uint32_t& n) noexcept {
 
 // Computes floor(n / pow(10, N)) for small n and N.
 // Precondition: n <= pow(10, N + 1).
-template <int N> uint32_t small_division_by_pow10(uint32_t n) noexcept {
+template <int N> auto small_division_by_pow10(uint32_t n) noexcept -> uint32_t {
   constexpr auto info = div_small_pow10_infos[N - 1];
   FMT_ASSERT(n <= info.divisor * 10, "n is too large");
   constexpr uint32_t magic_number =
@@ -238,12 +239,12 @@ template <int N> uint32_t small_division_by_pow10(uint32_t n) noexcept {
 }
 
 // Computes floor(n / 10^(kappa + 1)) (float)
-inline uint32_t divide_by_10_to_kappa_plus_1(uint32_t n) noexcept {
+inline auto divide_by_10_to_kappa_plus_1(uint32_t n) noexcept -> uint32_t {
   // 1374389535 = ceil(2^37/100)
   return static_cast<uint32_t>((static_cast<uint64_t>(n) * 1374389535) >> 37);
 }
 // Computes floor(n / 10^(kappa + 1)) (double)
-inline uint64_t divide_by_10_to_kappa_plus_1(uint64_t n) noexcept {
+inline auto divide_by_10_to_kappa_plus_1(uint64_t n) noexcept -> uint64_t {
   // 2361183241434822607 = ceil(2^(64+7)/1000)
   return umul128_upper64(n, 2361183241434822607ull) >> 7;
 }
@@ -255,7 +256,7 @@ template <> struct cache_accessor<float> {
   using carrier_uint = float_info<float>::carrier_uint;
   using cache_entry_type = uint64_t;
 
-  static uint64_t get_cached_power(int k) noexcept {
+  static auto get_cached_power(int k) noexcept -> uint64_t {
     FMT_ASSERT(k >= float_info<float>::min_k && k <= float_info<float>::max_k,
                "k is out of range");
     static constexpr const uint64_t pow10_significands[] = {
@@ -297,20 +298,23 @@ template <> struct cache_accessor<float> {
     bool is_integer;
   };
 
-  static compute_mul_result compute_mul(
-      carrier_uint u, const cache_entry_type& cache) noexcept {
+  static auto compute_mul(carrier_uint u,
+                          const cache_entry_type& cache) noexcept
+      -> compute_mul_result {
     auto r = umul96_upper64(u, cache);
     return {static_cast<carrier_uint>(r >> 32),
             static_cast<carrier_uint>(r) == 0};
   }
 
-  static uint32_t compute_delta(const cache_entry_type& cache,
-                                int beta) noexcept {
+  static auto compute_delta(const cache_entry_type& cache, int beta) noexcept
+      -> uint32_t {
     return static_cast<uint32_t>(cache >> (64 - 1 - beta));
   }
 
-  static compute_mul_parity_result compute_mul_parity(
-      carrier_uint two_f, const cache_entry_type& cache, int beta) noexcept {
+  static auto compute_mul_parity(carrier_uint two_f,
+                                 const cache_entry_type& cache,
+                                 int beta) noexcept
+      -> compute_mul_parity_result {
     FMT_ASSERT(beta >= 1, "");
     FMT_ASSERT(beta < 64, "");
 
@@ -319,22 +323,22 @@ template <> struct cache_accessor<float> {
             static_cast<uint32_t>(r >> (32 - beta)) == 0};
   }
 
-  static carrier_uint compute_left_endpoint_for_shorter_interval_case(
-      const cache_entry_type& cache, int beta) noexcept {
+  static auto compute_left_endpoint_for_shorter_interval_case(
+      const cache_entry_type& cache, int beta) noexcept -> carrier_uint {
     return static_cast<carrier_uint>(
         (cache - (cache >> (num_significand_bits<float>() + 2))) >>
         (64 - num_significand_bits<float>() - 1 - beta));
   }
 
-  static carrier_uint compute_right_endpoint_for_shorter_interval_case(
-      const cache_entry_type& cache, int beta) noexcept {
+  static auto compute_right_endpoint_for_shorter_interval_case(
+      const cache_entry_type& cache, int beta) noexcept -> carrier_uint {
     return static_cast<carrier_uint>(
         (cache + (cache >> (num_significand_bits<float>() + 1))) >>
         (64 - num_significand_bits<float>() - 1 - beta));
   }
 
-  static carrier_uint compute_round_up_for_shorter_interval_case(
-      const cache_entry_type& cache, int beta) noexcept {
+  static auto compute_round_up_for_shorter_interval_case(
+      const cache_entry_type& cache, int beta) noexcept -> carrier_uint {
     return (static_cast<carrier_uint>(
                 cache >> (64 - num_significand_bits<float>() - 2 - beta)) +
             1) /
@@ -346,7 +350,7 @@ template <> struct cache_accessor<double> {
   using carrier_uint = float_info<double>::carrier_uint;
   using cache_entry_type = uint128_fallback;
 
-  static uint128_fallback get_cached_power(int k) noexcept {
+  static auto get_cached_power(int k) noexcept -> uint128_fallback {
     FMT_ASSERT(k >= float_info<double>::min_k && k <= float_info<double>::max_k,
                "k is out of range");
 
@@ -985,8 +989,7 @@ template <> struct cache_accessor<double> {
       {0xe0accfa875af45a7, 0x93eb1b80a33b8606},
       {0x8c6c01c9498d8b88, 0xbc72f130660533c4},
       {0xaf87023b9bf0ee6a, 0xeb8fad7c7f8680b5},
-      { 0xdb68c2ca82ed2a05,
-        0xa67398db9f6820e2 }
+      {0xdb68c2ca82ed2a05, 0xa67398db9f6820e2},
 #else
       {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b},
       {0xce5d73ff402d98e3, 0xfb0a3d212dc81290},
@@ -1071,19 +1074,22 @@ template <> struct cache_accessor<double> {
     bool is_integer;
   };
 
-  static compute_mul_result compute_mul(
-      carrier_uint u, const cache_entry_type& cache) noexcept {
+  static auto compute_mul(carrier_uint u,
+                          const cache_entry_type& cache) noexcept
+      -> compute_mul_result {
     auto r = umul192_upper128(u, cache);
     return {r.high(), r.low() == 0};
   }
 
-  static uint32_t compute_delta(cache_entry_type const& cache,
-                                int beta) noexcept {
+  static auto compute_delta(cache_entry_type const& cache, int beta) noexcept
+      -> uint32_t {
     return static_cast<uint32_t>(cache.high() >> (64 - 1 - beta));
   }
 
-  static compute_mul_parity_result compute_mul_parity(
-      carrier_uint two_f, const cache_entry_type& cache, int beta) noexcept {
+  static auto compute_mul_parity(carrier_uint two_f,
+                                 const cache_entry_type& cache,
+                                 int beta) noexcept
+      -> compute_mul_parity_result {
     FMT_ASSERT(beta >= 1, "");
     FMT_ASSERT(beta < 64, "");
 
@@ -1092,35 +1098,35 @@ template <> struct cache_accessor<double> {
             ((r.high() << beta) | (r.low() >> (64 - beta))) == 0};
   }
 
-  static carrier_uint compute_left_endpoint_for_shorter_interval_case(
-      const cache_entry_type& cache, int beta) noexcept {
+  static auto compute_left_endpoint_for_shorter_interval_case(
+      const cache_entry_type& cache, int beta) noexcept -> carrier_uint {
     return (cache.high() -
             (cache.high() >> (num_significand_bits<double>() + 2))) >>
            (64 - num_significand_bits<double>() - 1 - beta);
   }
 
-  static carrier_uint compute_right_endpoint_for_shorter_interval_case(
-      const cache_entry_type& cache, int beta) noexcept {
+  static auto compute_right_endpoint_for_shorter_interval_case(
+      const cache_entry_type& cache, int beta) noexcept -> carrier_uint {
     return (cache.high() +
             (cache.high() >> (num_significand_bits<double>() + 1))) >>
            (64 - num_significand_bits<double>() - 1 - beta);
   }
 
-  static carrier_uint compute_round_up_for_shorter_interval_case(
-      const cache_entry_type& cache, int beta) noexcept {
+  static auto compute_round_up_for_shorter_interval_case(
+      const cache_entry_type& cache, int beta) noexcept -> carrier_uint {
     return ((cache.high() >> (64 - num_significand_bits<double>() - 2 - beta)) +
             1) /
            2;
   }
 };
 
-FMT_FUNC uint128_fallback get_cached_power(int k) noexcept {
+FMT_FUNC auto get_cached_power(int k) noexcept -> uint128_fallback {
   return cache_accessor<double>::get_cached_power(k);
 }
 
 // Various integer checks
 template <typename T>
-bool is_left_endpoint_integer_shorter_interval(int exponent) noexcept {
+auto is_left_endpoint_integer_shorter_interval(int exponent) noexcept -> bool {
   const int case_shorter_interval_left_endpoint_lower_threshold = 2;
   const int case_shorter_interval_left_endpoint_upper_threshold = 3;
   return exponent >= case_shorter_interval_left_endpoint_lower_threshold &&
@@ -1132,7 +1138,7 @@ FMT_INLINE int remove_trailing_zeros(uint32_t& n, int s = 0) noexcept {
   FMT_ASSERT(n != 0, "");
   // Modular inverse of 5 (mod 2^32): (mod_inv_5 * 5) mod 2^32 = 1.
   constexpr uint32_t mod_inv_5 = 0xcccccccd;
-  constexpr uint32_t mod_inv_25 = 0xc28f5c29; // = mod_inv_5 * mod_inv_5
+  constexpr uint32_t mod_inv_25 = 0xc28f5c29;  // = mod_inv_5 * mod_inv_5
 
   while (true) {
     auto q = rotr(n * mod_inv_25, 2);
@@ -1168,7 +1174,7 @@ FMT_INLINE int remove_trailing_zeros(uint64_t& n) noexcept {
 
   // If n is not divisible by 10^8, work with n itself.
   constexpr uint64_t mod_inv_5 = 0xcccccccccccccccd;
-  constexpr uint64_t mod_inv_25 = 0x8f5c28f5c28f5c29; // = mod_inv_5 * mod_inv_5
+  constexpr uint64_t mod_inv_25 = 0x8f5c28f5c28f5c29;  // mod_inv_5 * mod_inv_5
 
   int s = 0;
   while (true) {
@@ -1234,7 +1240,7 @@ FMT_INLINE decimal_fp<T> shorter_interval_case(int exponent) noexcept {
   return ret_value;
 }
 
-template <typename T> decimal_fp<T> to_decimal(T x) noexcept {
+template <typename T> auto to_decimal(T x) noexcept -> decimal_fp<T> {
   // Step 1: integer promotion & Schubfach multiplier calculation.
 
   using carrier_uint = typename float_info<T>::carrier_uint;
@@ -1373,15 +1379,15 @@ template <> struct formatter<detail::bigint> {
     for (auto i = n.bigits_.size(); i > 0; --i) {
       auto value = n.bigits_[i - 1u];
       if (first) {
-        out = format_to(out, FMT_STRING("{:x}"), value);
+        out = fmt::format_to(out, FMT_STRING("{:x}"), value);
         first = false;
         continue;
       }
-      out = format_to(out, FMT_STRING("{:08x}"), value);
+      out = fmt::format_to(out, FMT_STRING("{:08x}"), value);
     }
     if (n.exp_ > 0)
-      out = format_to(out, FMT_STRING("p{}"),
-                      n.exp_ * detail::bigint::bigit_bits);
+      out = fmt::format_to(out, FMT_STRING("p{}"),
+                           n.exp_ * detail::bigint::bigit_bits);
     return out;
   }
 };
@@ -1417,7 +1423,7 @@ FMT_FUNC void report_system_error(int error_code,
   report_error(format_system_error, error_code, message);
 }
 
-FMT_FUNC std::string vformat(string_view fmt, format_args args) {
+FMT_FUNC auto vformat(string_view fmt, format_args args) -> std::string {
   // Don't optimize the "{}" case to keep the binary size small and because it
   // can be better optimized in fmt::format anyway.
   auto buffer = memory_buffer();
@@ -1426,33 +1432,38 @@ FMT_FUNC std::string vformat(string_view fmt, format_args args) {
 }
 
 namespace detail {
-#ifndef _WIN32
-FMT_FUNC bool write_console(std::FILE*, string_view) { return false; }
+#if !defined(_WIN32) || defined(FMT_WINDOWS_NO_WCHAR)
+FMT_FUNC auto write_console(int, string_view) -> bool { return false; }
 #else
 using dword = conditional_t<sizeof(long) == 4, unsigned long, unsigned>;
 extern "C" __declspec(dllimport) int __stdcall WriteConsoleW(  //
     void*, const void*, dword, dword*, void*);
 
-FMT_FUNC bool write_console(std::FILE* f, string_view text) {
-  auto fd = _fileno(f);
-  if (!_isatty(fd)) return false;
+FMT_FUNC bool write_console(int fd, string_view text) {
   auto u16 = utf8_to_utf16(text);
-  auto written = dword();
   return WriteConsoleW(reinterpret_cast<void*>(_get_osfhandle(fd)), u16.c_str(),
-                       static_cast<uint32_t>(u16.size()), &written, nullptr) != 0;
+                       static_cast<dword>(u16.size()), nullptr, nullptr) != 0;
 }
+#endif
 
+#ifdef _WIN32
 // Print assuming legacy (non-Unicode) encoding.
 FMT_FUNC void vprint_mojibake(std::FILE* f, string_view fmt, format_args args) {
   auto buffer = memory_buffer();
-  detail::vformat_to(buffer, fmt,
-                     basic_format_args<buffer_context<char>>(args));
-  fwrite_fully(buffer.data(), 1, buffer.size(), f);
+  detail::vformat_to(buffer, fmt, args);
+  fwrite_fully(buffer.data(), buffer.size(), f);
 }
 #endif
 
 FMT_FUNC void print(std::FILE* f, string_view text) {
-  if (!write_console(f, text)) fwrite_fully(text.data(), 1, text.size(), f);
+#ifdef _WIN32
+  int fd = _fileno(f);
+  if (_isatty(fd)) {
+    std::fflush(f);
+    if (write_console(fd, text)) return;
+  }
+#endif
+  fwrite_fully(text.data(), text.size(), f);
 }
 }  // namespace detail
 
diff --git a/src/fmt/format.h b/src/fmt/format.h
index 87a34b972c..8cdf95b7bd 100644
--- a/src/fmt/format.h
+++ b/src/fmt/format.h
@@ -37,17 +37,28 @@
 #include <cstdint>           // uint32_t
 #include <cstring>           // std::memcpy
 #include <initializer_list>  // std::initializer_list
-#include <limits>            // std::numeric_limits
-#include <memory>            // std::uninitialized_copy
-#include <stdexcept>         // std::runtime_error
-#include <system_error>      // std::system_error
+#include <iterator>
+#include <limits>        // std::numeric_limits
+#include <memory>        // std::uninitialized_copy
+#include <stdexcept>     // std::runtime_error
+#include <system_error>  // std::system_error
 
 #ifdef __cpp_lib_bit_cast
-#  include <bit>  // std::bitcast
+#  include <bit>  // std::bit_cast
 #endif
 
 #include "core.h"
 
+// libc++ supports string_view in pre-c++17.
+#if FMT_HAS_INCLUDE(<string_view>) && \
+    (FMT_CPLUSPLUS >= 201703L || defined(_LIBCPP_VERSION))
+#  include <string_view>
+#  define FMT_USE_STRING_VIEW
+#elif FMT_HAS_INCLUDE("experimental/string_view") && FMT_CPLUSPLUS >= 201402L
+#  include <experimental/string_view>
+#  define FMT_USE_EXPERIMENTAL_STRING_VIEW
+#endif
+
 #if defined __cpp_inline_variables && __cpp_inline_variables >= 201606L
 #  define FMT_INLINE_VARIABLE inline
 #else
@@ -65,25 +76,11 @@
 #  define FMT_FALLTHROUGH
 #endif
 
-#ifndef FMT_DEPRECATED
-#  if FMT_HAS_CPP14_ATTRIBUTE(deprecated) || FMT_MSC_VERSION >= 1900
-#    define FMT_DEPRECATED [[deprecated]]
-#  else
-#    if (defined(__GNUC__) && !defined(__LCC__)) || defined(__clang__)
-#      define FMT_DEPRECATED __attribute__((deprecated))
-#    elif FMT_MSC_VERSION
-#      define FMT_DEPRECATED __declspec(deprecated)
-#    else
-#      define FMT_DEPRECATED /* deprecated */
-#    endif
-#  endif
-#endif
-
 #ifndef FMT_NO_UNIQUE_ADDRESS
 #  if FMT_CPLUSPLUS >= 202002L
 #    if FMT_HAS_CPP_ATTRIBUTE(no_unique_address)
 #      define FMT_NO_UNIQUE_ADDRESS [[no_unique_address]]
-// VS2019 v16.10 and later except clang-cl (https://reviews.llvm.org/D110485)
+// VS2019 v16.10 and later except clang-cl (https://reviews.llvm.org/D110485).
 #    elif (FMT_MSC_VERSION >= 1929) && !FMT_CLANG_VERSION
 #      define FMT_NO_UNIQUE_ADDRESS [[msvc::no_unique_address]]
 #    endif
@@ -93,10 +90,11 @@
 #  define FMT_NO_UNIQUE_ADDRESS
 #endif
 
-#if FMT_GCC_VERSION || defined(__clang__)
-#  define FMT_VISIBILITY(value) __attribute__((visibility(value)))
+// Visibility when compiled as a shared library/object.
+#if defined(FMT_LIB_EXPORT) || defined(FMT_SHARED)
+#  define FMT_SO_VISIBILITY(value) FMT_VISIBILITY(value)
 #else
-#  define FMT_VISIBILITY(value)
+#  define FMT_SO_VISIBILITY(value)
 #endif
 
 #ifdef __has_builtin
@@ -152,7 +150,10 @@ FMT_END_NAMESPACE
 
 #ifndef FMT_USE_USER_DEFINED_LITERALS
 // EDG based compilers (Intel, NVIDIA, Elbrus, etc), GCC and MSVC support UDLs.
-#  if (FMT_HAS_FEATURE(cxx_user_literals) || FMT_GCC_VERSION >= 407 || \
+//
+// GCC before 4.9 requires a space in `operator"" _a` which is invalid in later
+// compiler versions.
+#  if (FMT_HAS_FEATURE(cxx_user_literals) || FMT_GCC_VERSION >= 409 || \
        FMT_MSC_VERSION >= 1900) &&                                     \
       (!defined(__EDG_VERSION__) || __EDG_VERSION__ >= /* UDL feature */ 480)
 #    define FMT_USE_USER_DEFINED_LITERALS 1
@@ -272,20 +273,19 @@ inline auto ctzll(uint64_t x) -> int {
 FMT_END_NAMESPACE
 #endif
 
+namespace std {
+template <> struct iterator_traits<fmt::appender> {
+  using value_type = void;
+  using iterator_category = std::output_iterator_tag;
+};
+template <typename Container>
+struct iterator_traits<fmt::back_insert_iterator<Container>> {
+  using value_type = void;
+  using iterator_category = std::output_iterator_tag;
+};
+}  // namespace std
+
 FMT_BEGIN_NAMESPACE
-
-template <typename...> struct disjunction : std::false_type {};
-template <typename P> struct disjunction<P> : P {};
-template <typename P1, typename... Pn>
-struct disjunction<P1, Pn...>
-    : conditional_t<bool(P1::value), P1, disjunction<Pn...>> {};
-
-template <typename...> struct conjunction : std::true_type {};
-template <typename P> struct conjunction<P> : P {};
-template <typename P1, typename... Pn>
-struct conjunction<P1, Pn...>
-    : conditional_t<bool(P1::value), conjunction<Pn...>, P1> {};
-
 namespace detail {
 
 FMT_CONSTEXPR inline void abort_fuzzing_if(bool condition) {
@@ -295,6 +295,15 @@ FMT_CONSTEXPR inline void abort_fuzzing_if(bool condition) {
 #endif
 }
 
+#if defined(FMT_USE_STRING_VIEW)
+template <typename Char> using std_string_view = std::basic_string_view<Char>;
+#elif defined(FMT_USE_EXPERIMENTAL_STRING_VIEW)
+template <typename Char>
+using std_string_view = std::experimental::basic_string_view<Char>;
+#else
+template <typename T> struct std_string_view {};
+#endif
+
 template <typename CharT, CharT... C> struct string_literal {
   static constexpr CharT value[sizeof...(C)] = {C...};
   constexpr operator basic_string_view<CharT>() const {
@@ -307,37 +316,6 @@ template <typename CharT, CharT... C>
 constexpr CharT string_literal<CharT, C...>::value[sizeof...(C)];
 #endif
 
-template <typename Streambuf> class formatbuf : public Streambuf {
- private:
-  using char_type = typename Streambuf::char_type;
-  using streamsize = decltype(std::declval<Streambuf>().sputn(nullptr, 0));
-  using int_type = typename Streambuf::int_type;
-  using traits_type = typename Streambuf::traits_type;
-
-  buffer<char_type>& buffer_;
-
- public:
-  explicit formatbuf(buffer<char_type>& buf) : buffer_(buf) {}
-
- protected:
-  // The put area is always empty. This makes the implementation simpler and has
-  // the advantage that the streambuf and the buffer are always in sync and
-  // sputc never writes into uninitialized memory. A disadvantage is that each
-  // call to sputc always results in a (virtual) call to overflow. There is no
-  // disadvantage here for sputn since this always results in a call to xsputn.
-
-  auto overflow(int_type ch) -> int_type override {
-    if (!traits_type::eq_int_type(ch, traits_type::eof()))
-      buffer_.push_back(static_cast<char_type>(ch));
-    return ch;
-  }
-
-  auto xsputn(const char_type* s, streamsize count) -> streamsize override {
-    buffer_.append(s, s + count);
-    return count;
-  }
-};
-
 // Implementation of std::bit_cast for pre-C++20.
 template <typename To, typename From, FMT_ENABLE_IF(sizeof(To) == sizeof(From))>
 FMT_CONSTEXPR20 auto bit_cast(const From& from) -> To {
@@ -373,8 +351,8 @@ class uint128_fallback {
   constexpr uint128_fallback(uint64_t hi, uint64_t lo) : lo_(lo), hi_(hi) {}
   constexpr uint128_fallback(uint64_t value = 0) : lo_(value), hi_(0) {}
 
-  constexpr uint64_t high() const noexcept { return hi_; }
-  constexpr uint64_t low() const noexcept { return lo_; }
+  constexpr auto high() const noexcept -> uint64_t { return hi_; }
+  constexpr auto low() const noexcept -> uint64_t { return lo_; }
 
   template <typename T, FMT_ENABLE_IF(std::is_integral<T>::value)>
   constexpr explicit operator T() const {
@@ -450,7 +428,7 @@ class uint128_fallback {
     hi_ &= n.hi_;
   }
 
-  FMT_CONSTEXPR20 uint128_fallback& operator+=(uint64_t n) noexcept {
+  FMT_CONSTEXPR20 auto operator+=(uint64_t n) noexcept -> uint128_fallback& {
     if (is_constant_evaluated()) {
       lo_ += n;
       hi_ += (lo_ < n ? 1 : 0);
@@ -546,6 +524,52 @@ FMT_INLINE void assume(bool condition) {
 #endif
 }
 
+// Extracts a reference to the container from back_insert_iterator.
+template <typename Container>
+inline auto get_container(std::back_insert_iterator<Container> it)
+    -> Container& {
+  using base = std::back_insert_iterator<Container>;
+  struct accessor : base {
+    accessor(base b) : base(b) {}
+    using base::container;
+  };
+  return *accessor(it).container;
+}
+
+template <typename Char, typename InputIt, typename OutputIt>
+FMT_CONSTEXPR auto copy_str(InputIt begin, InputIt end, OutputIt out)
+    -> OutputIt {
+  while (begin != end) *out++ = static_cast<Char>(*begin++);
+  return out;
+}
+
+template <typename Char, typename T, typename U,
+          FMT_ENABLE_IF(
+              std::is_same<remove_const_t<T>, U>::value&& is_char<U>::value)>
+FMT_CONSTEXPR auto copy_str(T* begin, T* end, U* out) -> U* {
+  if (is_constant_evaluated()) return copy_str<Char, T*, U*>(begin, end, out);
+  auto size = to_unsigned(end - begin);
+  if (size > 0) memcpy(out, begin, size * sizeof(U));
+  return out + size;
+}
+
+template <typename Char, typename InputIt>
+auto copy_str(InputIt begin, InputIt end, appender out) -> appender {
+  get_container(out).append(begin, end);
+  return out;
+}
+template <typename Char, typename InputIt>
+auto copy_str(InputIt begin, InputIt end, back_insert_iterator<std::string> out)
+    -> back_insert_iterator<std::string> {
+  get_container(out).append(begin, end);
+  return out;
+}
+
+template <typename Char, typename R, typename OutputIt>
+FMT_CONSTEXPR auto copy_str(R&& rng, OutputIt out) -> OutputIt {
+  return detail::copy_str<Char>(rng.begin(), rng.end(), out);
+}
+
 // An approximation of iterator_t for pre-C++20 systems.
 template <typename T>
 using iterator_t = decltype(std::begin(std::declval<T&>()));
@@ -740,7 +764,7 @@ inline auto compute_width(basic_string_view<Char> s) -> size_t {
 }
 
 // Computes approximate display width of a UTF-8 string.
-FMT_CONSTEXPR inline size_t compute_width(string_view s) {
+FMT_CONSTEXPR inline auto compute_width(string_view s) -> size_t {
   size_t num_code_points = 0;
   // It is not a lambda for compatibility with C++14.
   struct count_code_points {
@@ -787,12 +811,17 @@ inline auto code_point_index(basic_string_view<Char> s, size_t n) -> size_t {
 
 // Calculates the index of the nth code point in a UTF-8 string.
 inline auto code_point_index(string_view s, size_t n) -> size_t {
-  const char* data = s.data();
-  size_t num_code_points = 0;
-  for (size_t i = 0, size = s.size(); i != size; ++i) {
-    if ((data[i] & 0xc0) != 0x80 && ++num_code_points > n) return i;
-  }
-  return s.size();
+  size_t result = s.size();
+  const char* begin = s.begin();
+  for_each_codepoint(s, [begin, &n, &result](uint32_t, string_view sv) {
+    if (n != 0) {
+      --n;
+      return true;
+    }
+    result = to_unsigned(sv.begin() - begin);
+    return false;
+  });
+  return result;
 }
 
 inline auto code_point_index(basic_string_view<char8_type> s, size_t n)
@@ -902,7 +931,7 @@ enum { inline_buffer_size = 500 };
   **Example**::
 
      auto out = fmt::memory_buffer();
-     format_to(std::back_inserter(out), "The answer is {}.", 42);
+     fmt::format_to(std::back_inserter(out), "The answer is {}.", 42);
 
   This will append the following output to the ``out`` object:
 
@@ -929,27 +958,29 @@ class basic_memory_buffer final : public detail::buffer<T> {
   }
 
  protected:
-  FMT_CONSTEXPR20 void grow(size_t size) override {
+  static FMT_CONSTEXPR20 void grow(detail::buffer<T>& buf, size_t size) {
     detail::abort_fuzzing_if(size > 5000);
-    const size_t max_size = std::allocator_traits<Allocator>::max_size(alloc_);
-    size_t old_capacity = this->capacity();
+    auto& self = static_cast<basic_memory_buffer&>(buf);
+    const size_t max_size =
+        std::allocator_traits<Allocator>::max_size(self.alloc_);
+    size_t old_capacity = buf.capacity();
     size_t new_capacity = old_capacity + old_capacity / 2;
     if (size > new_capacity)
       new_capacity = size;
     else if (new_capacity > max_size)
       new_capacity = size > max_size ? size : max_size;
-    T* old_data = this->data();
+    T* old_data = buf.data();
     T* new_data =
-        std::allocator_traits<Allocator>::allocate(alloc_, new_capacity);
+        std::allocator_traits<Allocator>::allocate(self.alloc_, new_capacity);
     // Suppress a bogus -Wstringop-overflow in gcc 13.1 (#3481).
-    detail::assume(this->size() <= new_capacity);
+    detail::assume(buf.size() <= new_capacity);
     // The following code doesn't throw, so the raw pointer above doesn't leak.
-    std::uninitialized_copy_n(old_data, this->size(), new_data);
-    this->set(new_data, new_capacity);
+    std::uninitialized_copy_n(old_data, buf.size(), new_data);
+    self.set(new_data, new_capacity);
     // deallocate must not throw according to the standard, but even if it does,
     // the buffer already uses the new storage and will deallocate it in
     // destructor.
-    if (old_data != store_) alloc_.deallocate(old_data, old_capacity);
+    if (old_data != self.store_) self.alloc_.deallocate(old_data, old_capacity);
   }
 
  public:
@@ -958,7 +989,7 @@ class basic_memory_buffer final : public detail::buffer<T> {
 
   FMT_CONSTEXPR20 explicit basic_memory_buffer(
       const Allocator& alloc = Allocator())
-      : alloc_(alloc) {
+      : detail::buffer<T>(grow), alloc_(alloc) {
     this->set(store_, SIZE);
     if (detail::is_constant_evaluated()) detail::fill_n(store_, SIZE, T());
   }
@@ -990,7 +1021,8 @@ class basic_memory_buffer final : public detail::buffer<T> {
     of the other object to it.
     \endrst
    */
-  FMT_CONSTEXPR20 basic_memory_buffer(basic_memory_buffer&& other) noexcept {
+  FMT_CONSTEXPR20 basic_memory_buffer(basic_memory_buffer&& other) noexcept
+      : detail::buffer<T>(grow) {
     move(other);
   }
 
@@ -1018,7 +1050,6 @@ class basic_memory_buffer final : public detail::buffer<T> {
   /** Increases the buffer capacity to *new_capacity*. */
   void reserve(size_t new_capacity) { this->try_reserve(new_capacity); }
 
-  // Directly append data into the buffer
   using detail::buffer<T>::append;
   template <typename ContiguousRange>
   void append(const ContiguousRange& range) {
@@ -1034,7 +1065,7 @@ struct is_contiguous<basic_memory_buffer<T, SIZE, Allocator>> : std::true_type {
 
 FMT_END_EXPORT
 namespace detail {
-FMT_API bool write_console(std::FILE* f, string_view text);
+FMT_API auto write_console(int fd, string_view text) -> bool;
 FMT_API void print(std::FILE*, string_view);
 }  // namespace detail
 
@@ -1046,7 +1077,7 @@ FMT_BEGIN_EXPORT
 #endif
 
 /** An error reported from a formatting function. */
-class FMT_VISIBILITY("default") format_error : public std::runtime_error {
+class FMT_SO_VISIBILITY("default") format_error : public std::runtime_error {
  public:
   using std::runtime_error::runtime_error;
 };
@@ -1089,7 +1120,7 @@ class loc_value {
   loc_value(T) {}
 
   template <typename Visitor> auto visit(Visitor&& vis) -> decltype(vis(0)) {
-    return visit_format_arg(vis, value_);
+    return value_.visit(vis);
   }
 };
 
@@ -1153,13 +1184,13 @@ using uint32_or_64_or_128_t =
 template <typename T>
 using uint64_or_128_t = conditional_t<num_bits<T>() <= 64, uint64_t, uint128_t>;
 
-#define FMT_POWERS_OF_10(factor)                                             \
-  factor * 10, (factor)*100, (factor)*1000, (factor)*10000, (factor)*100000, \
-      (factor)*1000000, (factor)*10000000, (factor)*100000000,               \
-      (factor)*1000000000
+#define FMT_POWERS_OF_10(factor)                                  \
+  factor * 10, (factor) * 100, (factor) * 1000, (factor) * 10000, \
+      (factor) * 100000, (factor) * 1000000, (factor) * 10000000, \
+      (factor) * 100000000, (factor) * 1000000000
 
 // Converts value in the range [0, 100) to a string.
-constexpr const char* digits2(size_t value) {
+constexpr auto digits2(size_t value) -> const char* {
   // GCC generates slightly better code when value is pointer-size.
   return &"0001020304050607080910111213141516171819"
          "2021222324252627282930313233343536373839"
@@ -1169,7 +1200,7 @@ constexpr const char* digits2(size_t value) {
 }
 
 // Sign is a template parameter to workaround a bug in gcc 4.8.
-template <typename Char, typename Sign> constexpr Char sign(Sign s) {
+template <typename Char, typename Sign> constexpr auto sign(Sign s) -> Char {
 #if !FMT_GCC_VERSION || FMT_GCC_VERSION >= 604
   static_assert(std::is_same<Sign, sign_t>::value, "");
 #endif
@@ -1394,7 +1425,7 @@ FMT_CONSTEXPR inline auto format_uint(It out, UInt value, int num_digits,
     return out;
   }
   // Buffer should be large enough to hold all digits (digits / BASE_BITS + 1).
-  char buffer[num_bits<UInt>() / BASE_BITS + 1];
+  char buffer[num_bits<UInt>() / BASE_BITS + 1] = {};
   format_uint<BASE_BITS>(buffer, value, num_digits, upper);
   return detail::copy_str_noinline<Char>(buffer, buffer + num_digits, out);
 }
@@ -1430,22 +1461,23 @@ template <typename WChar, typename Buffer = memory_buffer> class to_utf8 {
                                                       : "invalid utf32"));
   }
   operator string_view() const { return string_view(&buffer_[0], size()); }
-  size_t size() const { return buffer_.size() - 1; }
-  const char* c_str() const { return &buffer_[0]; }
-  std::string str() const { return std::string(&buffer_[0], size()); }
+  auto size() const -> size_t { return buffer_.size() - 1; }
+  auto c_str() const -> const char* { return &buffer_[0]; }
+  auto str() const -> std::string { return std::string(&buffer_[0], size()); }
 
   // Performs conversion returning a bool instead of throwing exception on
   // conversion error. This method may still throw in case of memory allocation
   // error.
-  bool convert(basic_string_view<WChar> s,
-               to_utf8_error_policy policy = to_utf8_error_policy::abort) {
+  auto convert(basic_string_view<WChar> s,
+               to_utf8_error_policy policy = to_utf8_error_policy::abort)
+      -> bool {
     if (!convert(buffer_, s, policy)) return false;
     buffer_.push_back(0);
     return true;
   }
-  static bool convert(
-      Buffer& buf, basic_string_view<WChar> s,
-      to_utf8_error_policy policy = to_utf8_error_policy::abort) {
+  static auto convert(Buffer& buf, basic_string_view<WChar> s,
+                      to_utf8_error_policy policy = to_utf8_error_policy::abort)
+      -> bool {
     for (auto p = s.begin(); p != s.end(); ++p) {
       uint32_t c = static_cast<uint32_t>(*p);
       if (sizeof(WChar) == 2 && c >= 0xd800 && c <= 0xdfff) {
@@ -1481,7 +1513,7 @@ template <typename WChar, typename Buffer = memory_buffer> class to_utf8 {
 };
 
 // Computes 128-bit result of multiplication of two 64-bit unsigned integers.
-inline uint128_fallback umul128(uint64_t x, uint64_t y) noexcept {
+inline auto umul128(uint64_t x, uint64_t y) noexcept -> uint128_fallback {
 #if FMT_USE_INT128
   auto p = static_cast<uint128_opt>(x) * static_cast<uint128_opt>(y);
   return {static_cast<uint64_t>(p >> 64), static_cast<uint64_t>(p)};
@@ -1512,19 +1544,19 @@ inline uint128_fallback umul128(uint64_t x, uint64_t y) noexcept {
 namespace dragonbox {
 // Computes floor(log10(pow(2, e))) for e in [-2620, 2620] using the method from
 // https://fmt.dev/papers/Dragonbox.pdf#page=28, section 6.1.
-inline int floor_log10_pow2(int e) noexcept {
+inline auto floor_log10_pow2(int e) noexcept -> int {
   FMT_ASSERT(e <= 2620 && e >= -2620, "too large exponent");
   static_assert((-1 >> 1) == -1, "right shift is not arithmetic");
   return (e * 315653) >> 20;
 }
 
-inline int floor_log2_pow10(int e) noexcept {
+inline auto floor_log2_pow10(int e) noexcept -> int {
   FMT_ASSERT(e <= 1233 && e >= -1233, "too large exponent");
   return (e * 1741647) >> 19;
 }
 
 // Computes upper 64 bits of multiplication of two 64-bit unsigned integers.
-inline uint64_t umul128_upper64(uint64_t x, uint64_t y) noexcept {
+inline auto umul128_upper64(uint64_t x, uint64_t y) noexcept -> uint64_t {
 #if FMT_USE_INT128
   auto p = static_cast<uint128_opt>(x) * static_cast<uint128_opt>(y);
   return static_cast<uint64_t>(p >> 64);
@@ -1537,14 +1569,14 @@ inline uint64_t umul128_upper64(uint64_t x, uint64_t y) noexcept {
 
 // Computes upper 128 bits of multiplication of a 64-bit unsigned integer and a
 // 128-bit unsigned integer.
-inline uint128_fallback umul192_upper128(uint64_t x,
-                                         uint128_fallback y) noexcept {
+inline auto umul192_upper128(uint64_t x, uint128_fallback y) noexcept
+    -> uint128_fallback {
   uint128_fallback r = umul128(x, y.high());
   r += umul128_upper64(x, y.low());
   return r;
 }
 
-FMT_API uint128_fallback get_cached_power(int k) noexcept;
+FMT_API auto get_cached_power(int k) noexcept -> uint128_fallback;
 
 // Type-specific information that Dragonbox uses.
 template <typename T, typename Enable = void> struct float_info;
@@ -1598,14 +1630,14 @@ template <typename T> FMT_API auto to_decimal(T x) noexcept -> decimal_fp<T>;
 }  // namespace dragonbox
 
 // Returns true iff Float has the implicit bit which is not stored.
-template <typename Float> constexpr bool has_implicit_bit() {
+template <typename Float> constexpr auto has_implicit_bit() -> bool {
   // An 80-bit FP number has a 64-bit significand an no implicit bit.
   return std::numeric_limits<Float>::digits != 64;
 }
 
 // Returns the number of significand bits stored in Float. The implicit bit is
 // not counted since it is not stored.
-template <typename Float> constexpr int num_significand_bits() {
+template <typename Float> constexpr auto num_significand_bits() -> int {
   // std::numeric_limits may not support __float128.
   return is_float128<Float>() ? 112
                               : (std::numeric_limits<Float>::digits -
@@ -1698,7 +1730,7 @@ using fp = basic_fp<unsigned long long>;
 
 // Normalizes the value converted from double and multiplied by (1 << SHIFT).
 template <int SHIFT = 0, typename F>
-FMT_CONSTEXPR basic_fp<F> normalize(basic_fp<F> value) {
+FMT_CONSTEXPR auto normalize(basic_fp<F> value) -> basic_fp<F> {
   // Handle subnormals.
   const auto implicit_bit = F(1) << num_significand_bits<double>();
   const auto shifted_implicit_bit = implicit_bit << SHIFT;
@@ -1715,7 +1747,7 @@ FMT_CONSTEXPR basic_fp<F> normalize(basic_fp<F> value) {
 }
 
 // Computes lhs * rhs / pow(2, 64) rounded to nearest with half-up tie breaking.
-FMT_CONSTEXPR inline uint64_t multiply(uint64_t lhs, uint64_t rhs) {
+FMT_CONSTEXPR inline auto multiply(uint64_t lhs, uint64_t rhs) -> uint64_t {
 #if FMT_USE_INT128
   auto product = static_cast<__uint128_t>(lhs) * rhs;
   auto f = static_cast<uint64_t>(product >> 64);
@@ -1732,33 +1764,10 @@ FMT_CONSTEXPR inline uint64_t multiply(uint64_t lhs, uint64_t rhs) {
 #endif
 }
 
-FMT_CONSTEXPR inline fp operator*(fp x, fp y) {
+FMT_CONSTEXPR inline auto operator*(fp x, fp y) -> fp {
   return {multiply(x.f, y.f), x.e + y.e + 64};
 }
 
-template <typename T = void> struct basic_data {
-  // For checking rounding thresholds.
-  // The kth entry is chosen to be the smallest integer such that the
-  // upper 32-bits of 10^(k+1) times it is strictly bigger than 5 * 10^k.
-  static constexpr uint32_t fractional_part_rounding_thresholds[8] = {
-      2576980378U,  // ceil(2^31 + 2^32/10^1)
-      2190433321U,  // ceil(2^31 + 2^32/10^2)
-      2151778616U,  // ceil(2^31 + 2^32/10^3)
-      2147913145U,  // ceil(2^31 + 2^32/10^4)
-      2147526598U,  // ceil(2^31 + 2^32/10^5)
-      2147487943U,  // ceil(2^31 + 2^32/10^6)
-      2147484078U,  // ceil(2^31 + 2^32/10^7)
-      2147483691U   // ceil(2^31 + 2^32/10^8)
-  };
-};
-// This is a struct rather than an alias to avoid shadowing warnings in gcc.
-struct data : basic_data<> {};
-
-#if FMT_CPLUSPLUS < 201703L
-template <typename T>
-constexpr uint32_t basic_data<T>::fractional_part_rounding_thresholds[];
-#endif
-
 template <typename T, bool doublish = num_bits<T>() == num_bits<double>()>
 using convert_float_result =
     conditional_t<std::is_same<T, float>::value || doublish, double, T>;
@@ -1939,15 +1948,11 @@ auto write_escaped_cp(OutputIt out, const find_escape_result<Char>& escape)
     *out++ = static_cast<Char>('\\');
     break;
   default:
-    if (escape.cp < 0x100) {
-      return write_codepoint<2, Char>(out, 'x', escape.cp);
-    }
-    if (escape.cp < 0x10000) {
+    if (escape.cp < 0x100) return write_codepoint<2, Char>(out, 'x', escape.cp);
+    if (escape.cp < 0x10000)
       return write_codepoint<4, Char>(out, 'u', escape.cp);
-    }
-    if (escape.cp < 0x110000) {
+    if (escape.cp < 0x110000)
       return write_codepoint<8, Char>(out, 'U', escape.cp);
-    }
     for (Char escape_char : basic_string_view<Char>(
              escape.begin, to_unsigned(escape.end - escape.begin))) {
       out = write_codepoint<2, Char>(out, 'x',
@@ -1977,11 +1982,13 @@ auto write_escaped_string(OutputIt out, basic_string_view<Char> str)
 
 template <typename Char, typename OutputIt>
 auto write_escaped_char(OutputIt out, Char v) -> OutputIt {
+  Char v_array[1] = {v};
   *out++ = static_cast<Char>('\'');
   if ((needs_escape(static_cast<uint32_t>(v)) && v != static_cast<Char>('"')) ||
       v == static_cast<Char>('\'')) {
-    out = write_escaped_cp(
-        out, find_escape_result<Char>{&v, &v + 1, static_cast<uint32_t>(v)});
+    out = write_escaped_cp(out,
+                           find_escape_result<Char>{v_array, v_array + 1,
+                                                    static_cast<uint32_t>(v)});
   } else {
     *out++ = v;
   }
@@ -2070,10 +2077,10 @@ template <typename Char> class digit_grouping {
     std::string::const_iterator group;
     int pos;
   };
-  next_state initial_state() const { return {grouping_.begin(), 0}; }
+  auto initial_state() const -> next_state { return {grouping_.begin(), 0}; }
 
   // Returns the next digit group separator position.
-  int next(next_state& state) const {
+  auto next(next_state& state) const -> int {
     if (thousands_sep_.empty()) return max_value<int>();
     if (state.group == grouping_.end()) return state.pos += grouping_.back();
     if (*state.group <= 0 || *state.group == max_value<char>())
@@ -2092,9 +2099,9 @@ template <typename Char> class digit_grouping {
   digit_grouping(std::string grouping, std::basic_string<Char> sep)
       : grouping_(std::move(grouping)), thousands_sep_(std::move(sep)) {}
 
-  bool has_separator() const { return !thousands_sep_.empty(); }
+  auto has_separator() const -> bool { return !thousands_sep_.empty(); }
 
-  int count_separators(int num_digits) const {
+  auto count_separators(int num_digits) const -> int {
     int count = 0;
     auto state = initial_state();
     while (num_digits > next(state)) ++count;
@@ -2103,7 +2110,7 @@ template <typename Char> class digit_grouping {
 
   // Applies grouping to digits and write the output to out.
   template <typename Out, typename C>
-  Out apply(Out out, basic_string_view<C> digits) const {
+  auto apply(Out out, basic_string_view<C> digits) const -> Out {
     auto num_digits = static_cast<int>(digits.size());
     auto separators = basic_memory_buffer<int>();
     separators.push_back(0);
@@ -2126,24 +2133,66 @@ template <typename Char> class digit_grouping {
   }
 };
 
+FMT_CONSTEXPR inline void prefix_append(unsigned& prefix, unsigned value) {
+  prefix |= prefix != 0 ? value << 8 : value;
+  prefix += (1u + (value > 0xff ? 1 : 0)) << 24;
+}
+
 // Writes a decimal integer with digit grouping.
 template <typename OutputIt, typename UInt, typename Char>
 auto write_int(OutputIt out, UInt value, unsigned prefix,
                const format_specs<Char>& specs,
                const digit_grouping<Char>& grouping) -> OutputIt {
   static_assert(std::is_same<uint64_or_128_t<UInt>, UInt>::value, "");
-  int num_digits = count_digits(value);
-  char digits[40];
-  format_decimal(digits, value, num_digits);
-  unsigned size = to_unsigned((prefix != 0 ? 1 : 0) + num_digits +
-                              grouping.count_separators(num_digits));
+  int num_digits = 0;
+  auto buffer = memory_buffer();
+  switch (specs.type) {
+  case presentation_type::none:
+  case presentation_type::dec: {
+    num_digits = count_digits(value);
+    format_decimal<char>(appender(buffer), value, num_digits);
+    break;
+  }
+  case presentation_type::hex_lower:
+  case presentation_type::hex_upper: {
+    bool upper = specs.type == presentation_type::hex_upper;
+    if (specs.alt)
+      prefix_append(prefix, unsigned(upper ? 'X' : 'x') << 8 | '0');
+    num_digits = count_digits<4>(value);
+    format_uint<4, char>(appender(buffer), value, num_digits, upper);
+    break;
+  }
+  case presentation_type::bin_lower:
+  case presentation_type::bin_upper: {
+    bool upper = specs.type == presentation_type::bin_upper;
+    if (specs.alt)
+      prefix_append(prefix, unsigned(upper ? 'B' : 'b') << 8 | '0');
+    num_digits = count_digits<1>(value);
+    format_uint<1, char>(appender(buffer), value, num_digits);
+    break;
+  }
+  case presentation_type::oct: {
+    num_digits = count_digits<3>(value);
+    // Octal prefix '0' is counted as a digit, so only add it if precision
+    // is not greater than the number of digits.
+    if (specs.alt && specs.precision <= num_digits && value != 0)
+      prefix_append(prefix, '0');
+    format_uint<3, char>(appender(buffer), value, num_digits);
+    break;
+  }
+  case presentation_type::chr:
+    return write_char(out, static_cast<Char>(value), specs);
+  default:
+    throw_format_error("invalid format specifier");
+  }
+
+  unsigned size = (prefix != 0 ? prefix >> 24 : 0) + to_unsigned(num_digits) +
+                  to_unsigned(grouping.count_separators(num_digits));
   return write_padded<align::right>(
       out, specs, size, size, [&](reserve_iterator<OutputIt> it) {
-        if (prefix != 0) {
-          char sign = static_cast<char>(prefix);
-          *it++ = static_cast<Char>(sign);
-        }
-        return grouping.apply(it, string_view(digits, to_unsigned(num_digits)));
+        for (unsigned p = prefix & 0xffffff; p != 0; p >>= 8)
+          *it++ = static_cast<Char>(p & 0xff);
+        return grouping.apply(it, string_view(buffer.data(), buffer.size()));
       });
 }
 
@@ -2156,11 +2205,6 @@ inline auto write_loc(OutputIt, loc_value, const format_specs<Char>&,
   return false;
 }
 
-FMT_CONSTEXPR inline void prefix_append(unsigned& prefix, unsigned value) {
-  prefix |= prefix != 0 ? value << 8 : value;
-  prefix += (1u + (value > 0xff ? 1 : 0)) << 24;
-}
-
 template <typename UInt> struct write_int_arg {
   UInt abs_value;
   unsigned prefix;
@@ -2307,25 +2351,25 @@ class counting_iterator {
 
   FMT_CONSTEXPR counting_iterator() : count_(0) {}
 
-  FMT_CONSTEXPR size_t count() const { return count_; }
+  FMT_CONSTEXPR auto count() const -> size_t { return count_; }
 
-  FMT_CONSTEXPR counting_iterator& operator++() {
+  FMT_CONSTEXPR auto operator++() -> counting_iterator& {
     ++count_;
     return *this;
   }
-  FMT_CONSTEXPR counting_iterator operator++(int) {
+  FMT_CONSTEXPR auto operator++(int) -> counting_iterator {
     auto it = *this;
     ++*this;
     return it;
   }
 
-  FMT_CONSTEXPR friend counting_iterator operator+(counting_iterator it,
-                                                   difference_type n) {
+  FMT_CONSTEXPR friend auto operator+(counting_iterator it, difference_type n)
+      -> counting_iterator {
     it.count_ += static_cast<size_t>(n);
     return it;
   }
 
-  FMT_CONSTEXPR value_type operator*() const { return {}; }
+  FMT_CONSTEXPR auto operator*() const -> value_type { return {}; }
 };
 
 template <typename Char, typename OutputIt>
@@ -2360,9 +2404,10 @@ template <typename Char, typename OutputIt>
 FMT_CONSTEXPR auto write(OutputIt out, const Char* s,
                          const format_specs<Char>& specs, locale_ref)
     -> OutputIt {
-  return specs.type != presentation_type::pointer
-             ? write(out, basic_string_view<Char>(s), specs, {})
-             : write_ptr<Char>(out, bit_cast<uintptr_t>(s), &specs);
+  if (specs.type == presentation_type::pointer)
+    return write_ptr<Char>(out, bit_cast<uintptr_t>(s), &specs);
+  if (!s) throw_format_error("string pointer is null");
+  return write(out, basic_string_view<Char>(s), specs, {});
 }
 
 template <typename Char, typename OutputIt, typename T,
@@ -2448,9 +2493,8 @@ struct float_specs {
   bool showpoint : 1;
 };
 
-template <typename ErrorHandler = error_handler, typename Char>
-FMT_CONSTEXPR auto parse_float_type_spec(const format_specs<Char>& specs,
-                                         ErrorHandler&& eh = {})
+template <typename Char>
+FMT_CONSTEXPR auto parse_float_type_spec(const format_specs<Char>& specs)
     -> float_specs {
   auto result = float_specs();
   result.showpoint = specs.alt;
@@ -2486,7 +2530,7 @@ FMT_CONSTEXPR auto parse_float_type_spec(const format_specs<Char>& specs,
     result.format = float_format::hex;
     break;
   default:
-    eh.on_error("invalid format specifier");
+    throw_format_error("invalid format specifier");
     break;
   }
   return result;
@@ -2725,12 +2769,12 @@ template <typename Char> class fallback_digit_grouping {
  public:
   constexpr fallback_digit_grouping(locale_ref, bool) {}
 
-  constexpr bool has_separator() const { return false; }
+  constexpr auto has_separator() const -> bool { return false; }
 
-  constexpr int count_separators(int) const { return 0; }
+  constexpr auto count_separators(int) const -> int { return 0; }
 
   template <typename Out, typename C>
-  constexpr Out apply(Out out, basic_string_view<C>) const {
+  constexpr auto apply(Out out, basic_string_view<C>) const -> Out {
     return out;
   }
 };
@@ -2749,7 +2793,7 @@ FMT_CONSTEXPR20 auto write_float(OutputIt out, const DecimalFP& f,
   }
 }
 
-template <typename T> constexpr bool isnan(T value) {
+template <typename T> constexpr auto isnan(T value) -> bool {
   return !(value >= value);  // std::isnan doesn't support __float128.
 }
 
@@ -2762,14 +2806,14 @@ struct has_isfinite<T, enable_if_t<sizeof(std::isfinite(T())) != 0>>
 
 template <typename T, FMT_ENABLE_IF(std::is_floating_point<T>::value&&
                                         has_isfinite<T>::value)>
-FMT_CONSTEXPR20 bool isfinite(T value) {
+FMT_CONSTEXPR20 auto isfinite(T value) -> bool {
   constexpr T inf = T(std::numeric_limits<double>::infinity());
   if (is_constant_evaluated())
     return !detail::isnan(value) && value < inf && value > -inf;
   return std::isfinite(value);
 }
 template <typename T, FMT_ENABLE_IF(!has_isfinite<T>::value)>
-FMT_CONSTEXPR bool isfinite(T value) {
+FMT_CONSTEXPR auto isfinite(T value) -> bool {
   T inf = T(std::numeric_limits<double>::infinity());
   // std::isfinite doesn't support __float128.
   return !detail::isnan(value) && value < inf && value > -inf;
@@ -2806,10 +2850,10 @@ class bigint {
   basic_memory_buffer<bigit, bigits_capacity> bigits_;
   int exp_;
 
-  FMT_CONSTEXPR20 bigit operator[](int index) const {
+  FMT_CONSTEXPR20 auto operator[](int index) const -> bigit {
     return bigits_[to_unsigned(index)];
   }
-  FMT_CONSTEXPR20 bigit& operator[](int index) {
+  FMT_CONSTEXPR20 auto operator[](int index) -> bigit& {
     return bigits_[to_unsigned(index)];
   }
 
@@ -2905,11 +2949,11 @@ class bigint {
     assign(uint64_or_128_t<Int>(n));
   }
 
-  FMT_CONSTEXPR20 int num_bigits() const {
+  FMT_CONSTEXPR20 auto num_bigits() const -> int {
     return static_cast<int>(bigits_.size()) + exp_;
   }
 
-  FMT_NOINLINE FMT_CONSTEXPR20 bigint& operator<<=(int shift) {
+  FMT_NOINLINE FMT_CONSTEXPR20 auto operator<<=(int shift) -> bigint& {
     FMT_ASSERT(shift >= 0, "");
     exp_ += shift / bigit_bits;
     shift %= bigit_bits;
@@ -2924,13 +2968,15 @@ class bigint {
     return *this;
   }
 
-  template <typename Int> FMT_CONSTEXPR20 bigint& operator*=(Int value) {
+  template <typename Int>
+  FMT_CONSTEXPR20 auto operator*=(Int value) -> bigint& {
     FMT_ASSERT(value > 0, "");
     multiply(uint32_or_64_or_128_t<Int>(value));
     return *this;
   }
 
-  friend FMT_CONSTEXPR20 int compare(const bigint& lhs, const bigint& rhs) {
+  friend FMT_CONSTEXPR20 auto compare(const bigint& lhs, const bigint& rhs)
+      -> int {
     int num_lhs_bigits = lhs.num_bigits(), num_rhs_bigits = rhs.num_bigits();
     if (num_lhs_bigits != num_rhs_bigits)
       return num_lhs_bigits > num_rhs_bigits ? 1 : -1;
@@ -2947,8 +2993,9 @@ class bigint {
   }
 
   // Returns compare(lhs1 + lhs2, rhs).
-  friend FMT_CONSTEXPR20 int add_compare(const bigint& lhs1, const bigint& lhs2,
-                                         const bigint& rhs) {
+  friend FMT_CONSTEXPR20 auto add_compare(const bigint& lhs1,
+                                          const bigint& lhs2, const bigint& rhs)
+      -> int {
     auto minimum = [](int a, int b) { return a < b ? a : b; };
     auto maximum = [](int a, int b) { return a > b ? a : b; };
     int max_lhs_bigits = maximum(lhs1.num_bigits(), lhs2.num_bigits());
@@ -3029,13 +3076,13 @@ class bigint {
     bigits_.resize(to_unsigned(num_bigits + exp_difference));
     for (int i = num_bigits - 1, j = i + exp_difference; i >= 0; --i, --j)
       bigits_[j] = bigits_[i];
-    std::uninitialized_fill_n(bigits_.data(), exp_difference, 0);
+    std::uninitialized_fill_n(bigits_.data(), exp_difference, 0u);
     exp_ -= exp_difference;
   }
 
   // Divides this bignum by divisor, assigning the remainder to this and
   // returning the quotient.
-  FMT_CONSTEXPR20 int divmod_assign(const bigint& divisor) {
+  FMT_CONSTEXPR20 auto divmod_assign(const bigint& divisor) -> int {
     FMT_ASSERT(this != &divisor, "");
     if (compare(*this, divisor) < 0) return 0;
     FMT_ASSERT(divisor.bigits_[divisor.bigits_.size() - 1u] != 0, "");
@@ -3178,8 +3225,10 @@ FMT_CONSTEXPR20 inline void format_dragon(basic_fp<uint128_t> value,
       }
       if (buf[0] == overflow) {
         buf[0] = '1';
-        if ((flags & dragon::fixed) != 0) buf.push_back('0');
-        else ++exp10;
+        if ((flags & dragon::fixed) != 0)
+          buf.push_back('0');
+        else
+          ++exp10;
       }
       return;
     }
@@ -3276,6 +3325,17 @@ FMT_CONSTEXPR20 void format_hexfloat(Float value, int precision,
   format_hexfloat(static_cast<double>(value), precision, specs, buf);
 }
 
+constexpr auto fractional_part_rounding_thresholds(int index) -> uint32_t {
+  // For checking rounding thresholds.
+  // The kth entry is chosen to be the smallest integer such that the
+  // upper 32-bits of 10^(k+1) times it is strictly bigger than 5 * 10^k.
+  // It is equal to ceil(2^31 + 2^32/10^(k + 1)).
+  // These are stored in a string literal because we cannot have static arrays
+  // in constexpr functions and non-static ones are poorly optimized.
+  return U"\x9999999a\x828f5c29\x80418938\x80068db9\x8000a7c6\x800010c7"
+         U"\x800001ae\x8000002b"[index];
+}
+
 template <typename Float>
 FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs,
                                   buffer<char>& buf) -> int {
@@ -3480,12 +3540,12 @@ FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs,
           //    fractional part is strictly larger than 1/2.
           if (precision < 9) {
             uint32_t fractional_part = static_cast<uint32_t>(prod);
-            should_round_up = fractional_part >=
-                                  data::fractional_part_rounding_thresholds
-                                      [8 - number_of_digits_to_print] ||
-                              ((fractional_part >> 31) &
-                               ((digits & 1) | (second_third_subsegments != 0) |
-                                has_more_segments)) != 0;
+            should_round_up =
+                fractional_part >= fractional_part_rounding_thresholds(
+                                       8 - number_of_digits_to_print) ||
+                ((fractional_part >> 31) &
+                 ((digits & 1) | (second_third_subsegments != 0) |
+                  has_more_segments)) != 0;
           }
           // Rounding at the subsegment boundary.
           // In this case, the fractional part is at least 1/2 if and only if
@@ -3520,12 +3580,12 @@ FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs,
             // of 19 digits, so in this case the third segment should be
             // consisting of a genuine digit from the input.
             uint32_t fractional_part = static_cast<uint32_t>(prod);
-            should_round_up = fractional_part >=
-                                  data::fractional_part_rounding_thresholds
-                                      [8 - number_of_digits_to_print] ||
-                              ((fractional_part >> 31) &
-                               ((digits & 1) | (third_subsegment != 0) |
-                                has_more_segments)) != 0;
+            should_round_up =
+                fractional_part >= fractional_part_rounding_thresholds(
+                                       8 - number_of_digits_to_print) ||
+                ((fractional_part >> 31) &
+                 ((digits & 1) | (third_subsegment != 0) |
+                  has_more_segments)) != 0;
           }
           // Rounding at the subsegment boundary.
           else {
@@ -3726,8 +3786,7 @@ FMT_CONSTEXPR auto write(OutputIt out, Char value) -> OutputIt {
 }
 
 template <typename Char, typename OutputIt>
-FMT_CONSTEXPR_CHAR_TRAITS auto write(OutputIt out, const Char* value)
-    -> OutputIt {
+FMT_CONSTEXPR20 auto write(OutputIt out, const Char* value) -> OutputIt {
   if (value) return write(out, basic_string_view<Char>(value));
   throw_format_error("string pointer is null");
   return out;
@@ -3757,8 +3816,11 @@ template <typename Char, typename OutputIt, typename T,
 FMT_CONSTEXPR auto write(OutputIt out, const T& value)
     -> enable_if_t<mapped_type_constant<T, Context>::value == type::custom_type,
                    OutputIt> {
+  auto formatter = typename Context::template formatter_type<T>();
+  auto parse_ctx = typename Context::parse_context_type({});
+  formatter.parse(parse_ctx);
   auto ctx = Context(out, {}, {});
-  return typename Context::template formatter_type<T>().format(value, ctx);
+  return formatter.format(value, ctx);
 }
 
 // An argument visitor that formats the argument and writes it via the output
@@ -3801,62 +3863,39 @@ template <typename Char> struct arg_formatter {
   }
 };
 
-template <typename Char> struct custom_formatter {
-  basic_format_parse_context<Char>& parse_ctx;
-  buffer_context<Char>& ctx;
-
-  void operator()(
-      typename basic_format_arg<buffer_context<Char>>::handle h) const {
-    h.format(parse_ctx, ctx);
-  }
-  template <typename T> void operator()(T) const {}
-};
-
-template <typename ErrorHandler> class width_checker {
- public:
-  explicit FMT_CONSTEXPR width_checker(ErrorHandler& eh) : handler_(eh) {}
-
+struct width_checker {
   template <typename T, FMT_ENABLE_IF(is_integer<T>::value)>
   FMT_CONSTEXPR auto operator()(T value) -> unsigned long long {
-    if (is_negative(value)) handler_.on_error("negative width");
+    if (is_negative(value)) throw_format_error("negative width");
     return static_cast<unsigned long long>(value);
   }
 
   template <typename T, FMT_ENABLE_IF(!is_integer<T>::value)>
   FMT_CONSTEXPR auto operator()(T) -> unsigned long long {
-    handler_.on_error("width is not integer");
+    throw_format_error("width is not integer");
     return 0;
   }
-
- private:
-  ErrorHandler& handler_;
 };
 
-template <typename ErrorHandler> class precision_checker {
- public:
-  explicit FMT_CONSTEXPR precision_checker(ErrorHandler& eh) : handler_(eh) {}
-
+struct precision_checker {
   template <typename T, FMT_ENABLE_IF(is_integer<T>::value)>
   FMT_CONSTEXPR auto operator()(T value) -> unsigned long long {
-    if (is_negative(value)) handler_.on_error("negative precision");
+    if (is_negative(value)) throw_format_error("negative precision");
     return static_cast<unsigned long long>(value);
   }
 
   template <typename T, FMT_ENABLE_IF(!is_integer<T>::value)>
   FMT_CONSTEXPR auto operator()(T) -> unsigned long long {
-    handler_.on_error("precision is not integer");
+    throw_format_error("precision is not integer");
     return 0;
   }
-
- private:
-  ErrorHandler& handler_;
 };
 
-template <template <typename> class Handler, typename FormatArg,
-          typename ErrorHandler>
-FMT_CONSTEXPR auto get_dynamic_spec(FormatArg arg, ErrorHandler eh) -> int {
-  unsigned long long value = visit_format_arg(Handler<ErrorHandler>(eh), arg);
-  if (value > to_unsigned(max_value<int>())) eh.on_error("number is too big");
+template <typename Handler, typename FormatArg>
+FMT_CONSTEXPR auto get_dynamic_spec(FormatArg arg) -> int {
+  unsigned long long value = arg.visit(Handler());
+  if (value > to_unsigned(max_value<int>()))
+    throw_format_error("number is too big");
   return static_cast<int>(value);
 }
 
@@ -3867,7 +3906,7 @@ FMT_CONSTEXPR auto get_arg(Context& ctx, ID id) -> decltype(ctx.arg(id)) {
   return arg;
 }
 
-template <template <typename> class Handler, typename Context>
+template <typename Handler, typename Context>
 FMT_CONSTEXPR void handle_dynamic_spec(int& value,
                                        arg_ref<typename Context::char_type> ref,
                                        Context& ctx) {
@@ -3875,12 +3914,10 @@ FMT_CONSTEXPR void handle_dynamic_spec(int& value,
   case arg_id_kind::none:
     break;
   case arg_id_kind::index:
-    value = detail::get_dynamic_spec<Handler>(get_arg(ctx, ref.val.index),
-                                              ctx.error_handler());
+    value = detail::get_dynamic_spec<Handler>(get_arg(ctx, ref.val.index));
     break;
   case arg_id_kind::name:
-    value = detail::get_dynamic_spec<Handler>(get_arg(ctx, ref.val.name),
-                                              ctx.error_handler());
+    value = detail::get_dynamic_spec<Handler>(get_arg(ctx, ref.val.name));
     break;
   }
 }
@@ -4052,12 +4089,10 @@ class format_int {
 
 template <typename T, typename Char>
 struct formatter<T, Char, enable_if_t<detail::has_format_as<T>::value>>
-    : private formatter<detail::format_as_t<T>, Char> {
-  using base = formatter<detail::format_as_t<T>, Char>;
-  using base::parse;
-
+    : formatter<detail::format_as_t<T>, Char> {
   template <typename FormatContext>
   auto format(const T& value, FormatContext& ctx) const -> decltype(ctx.out()) {
+    using base = formatter<detail::format_as_t<T>, Char>;
     return base::format(format_as(value), ctx);
   }
 };
@@ -4198,84 +4233,59 @@ template <typename T> struct formatter<group_digits_view<T>> : formatter<T> {
   }
 };
 
-// DEPRECATED! join_view will be moved to ranges.h.
-template <typename It, typename Sentinel, typename Char = char>
-struct join_view : detail::view {
-  It begin;
-  Sentinel end;
-  basic_string_view<Char> sep;
-
-  join_view(It b, Sentinel e, basic_string_view<Char> s)
-      : begin(b), end(e), sep(s) {}
+template <typename T> struct nested_view {
+  const formatter<T>* fmt;
+  const T* value;
 };
 
-template <typename It, typename Sentinel, typename Char>
-struct formatter<join_view<It, Sentinel, Char>, Char> {
+template <typename T> struct formatter<nested_view<T>> {
+  FMT_CONSTEXPR auto parse(format_parse_context& ctx) -> const char* {
+    return ctx.begin();
+  }
+  auto format(nested_view<T> view, format_context& ctx) const
+      -> decltype(ctx.out()) {
+    return view.fmt->format(*view.value, ctx);
+  }
+};
+
+template <typename T> struct nested_formatter {
  private:
-  using value_type =
-#ifdef __cpp_lib_ranges
-      std::iter_value_t<It>;
-#else
-      typename std::iterator_traits<It>::value_type;
-#endif
-  formatter<remove_cvref_t<value_type>, Char> value_formatter_;
+  int width_;
+  detail::fill_t<char> fill_;
+  align_t align_ : 4;
+  formatter<T> formatter_;
 
  public:
-  template <typename ParseContext>
-  FMT_CONSTEXPR auto parse(ParseContext& ctx) -> const Char* {
-    return value_formatter_.parse(ctx);
+  constexpr nested_formatter() : width_(0), align_(align_t::none) {}
+
+  FMT_CONSTEXPR auto parse(format_parse_context& ctx) -> const char* {
+    auto specs = detail::dynamic_format_specs<char>();
+    auto it = parse_format_specs(ctx.begin(), ctx.end(), specs, ctx,
+                                 detail::type::none_type);
+    width_ = specs.width;
+    fill_ = specs.fill;
+    align_ = specs.align;
+    ctx.advance_to(it);
+    return formatter_.parse(ctx);
   }
 
-  template <typename FormatContext>
-  auto format(const join_view<It, Sentinel, Char>& value,
-              FormatContext& ctx) const -> decltype(ctx.out()) {
-    auto it = value.begin;
-    auto out = ctx.out();
-    if (it != value.end) {
-      out = value_formatter_.format(*it, ctx);
-      ++it;
-      while (it != value.end) {
-        out = detail::copy_str<Char>(value.sep.begin(), value.sep.end(), out);
-        ctx.advance_to(out);
-        out = value_formatter_.format(*it, ctx);
-        ++it;
-      }
-    }
-    return out;
+  template <typename F>
+  auto write_padded(format_context& ctx, F write) const -> decltype(ctx.out()) {
+    if (width_ == 0) return write(ctx.out());
+    auto buf = memory_buffer();
+    write(std::back_inserter(buf));
+    auto specs = format_specs<>();
+    specs.width = width_;
+    specs.fill = fill_;
+    specs.align = align_;
+    return detail::write(ctx.out(), string_view(buf.data(), buf.size()), specs);
+  }
+
+  auto nested(const T& value) const -> nested_view<T> {
+    return nested_view<T>{&formatter_, &value};
   }
 };
 
-/**
-  Returns a view that formats the iterator range `[begin, end)` with elements
-  separated by `sep`.
- */
-template <typename It, typename Sentinel>
-auto join(It begin, Sentinel end, string_view sep) -> join_view<It, Sentinel> {
-  return {begin, end, sep};
-}
-
-/**
-  \rst
-  Returns a view that formats `range` with elements separated by `sep`.
-
-  **Example**::
-
-    std::vector<int> v = {1, 2, 3};
-    fmt::print("{}", fmt::join(v, ", "));
-    // Output: "1, 2, 3"
-
-  ``fmt::join`` applies passed format specifiers to the range elements::
-
-    fmt::print("{:02}", fmt::join(v, ", "));
-    // Output: "01, 02, 03"
-  \endrst
- */
-template <typename Range>
-auto join(Range&& range, string_view sep)
-    -> join_view<detail::iterator_t<Range>, detail::sentinel_t<Range>> {
-  return join(std::begin(range), std::end(range), sep);
-}
-
 /**
   \rst
   Converts *value* to ``std::string`` using the default format for type *T*.
@@ -4329,12 +4339,12 @@ void vformat_to(buffer<Char>& buf, basic_string_view<Char> fmt,
   auto out = buffer_appender<Char>(buf);
   if (fmt.size() == 2 && equal2(fmt.data(), "{}")) {
     auto arg = args.get(0);
-    if (!arg) error_handler().on_error("argument not found");
-    visit_format_arg(default_arg_formatter<Char>{out, args, loc}, arg);
+    if (!arg) throw_format_error("argument not found");
+    arg.visit(default_arg_formatter<Char>{out, args, loc});
     return;
   }
 
-  struct format_handler : error_handler {
+  struct format_handler {
     basic_format_parse_context<Char> parse_context;
     buffer_context<Char> context;
 
@@ -4356,26 +4366,22 @@ void vformat_to(buffer<Char>& buf, basic_string_view<Char> fmt,
     }
     FMT_CONSTEXPR auto on_arg_id(basic_string_view<Char> id) -> int {
       int arg_id = context.arg_id(id);
-      if (arg_id < 0) on_error("argument not found");
+      if (arg_id < 0) throw_format_error("argument not found");
       return arg_id;
     }
 
     FMT_INLINE void on_replacement_field(int id, const Char*) {
       auto arg = get_arg(context, id);
-      context.advance_to(visit_format_arg(
-          default_arg_formatter<Char>{context.out(), context.args(),
-                                      context.locale()},
-          arg));
+      context.advance_to(arg.visit(default_arg_formatter<Char>{
+          context.out(), context.args(), context.locale()}));
     }
 
     auto on_format_specs(int id, const Char* begin, const Char* end)
         -> const Char* {
       auto arg = get_arg(context, id);
-      if (arg.type() == type::custom_type) {
-        parse_context.advance_to(begin);
-        visit_format_arg(custom_formatter<Char>{parse_context, context}, arg);
+      // Not using a visitor for custom types gives better codegen.
+      if (arg.format_custom(begin, parse_context, context))
         return parse_context.begin();
-      }
       auto specs = detail::dynamic_format_specs<Char>();
       begin = parse_format_specs(begin, end, specs, parse_context, arg.type());
       detail::handle_dynamic_spec<detail::width_checker>(
@@ -4383,11 +4389,13 @@ void vformat_to(buffer<Char>& buf, basic_string_view<Char> fmt,
       detail::handle_dynamic_spec<detail::precision_checker>(
           specs.precision, specs.precision_ref, context);
       if (begin == end || *begin != '}')
-        on_error("missing '}' in format string");
-      auto f = arg_formatter<Char>{context.out(), specs, context.locale()};
-      context.advance_to(visit_format_arg(f, arg));
+        throw_format_error("missing '}' in format string");
+      context.advance_to(arg.visit(
+          arg_formatter<Char>{context.out(), specs, context.locale()}));
       return begin;
     }
+
+    void on_error(const char* message) { throw_format_error(message); }
   };
   detail::parse_format_string<false>(fmt, format_handler(out, fmt, args, loc));
 }
@@ -4426,7 +4434,7 @@ template <detail_exported::fixed_string Str> constexpr auto operator""_a() {
   return detail::udl_arg<char_t, sizeof(Str.data) / sizeof(char_t), Str>();
 }
 #  else
-constexpr auto operator"" _a(const char* s, size_t) -> detail::udl_arg<char> {
+constexpr auto operator""_a(const char* s, size_t) -> detail::udl_arg<char> {
   return {s};
 }
 #  endif
@@ -4486,16 +4494,16 @@ formatter<T, Char,
                       detail::type::custom_type>>::format(const T& val,
                                                           FormatContext& ctx)
     const -> decltype(ctx.out()) {
-  if (specs_.width_ref.kind != detail::arg_id_kind::none ||
-      specs_.precision_ref.kind != detail::arg_id_kind::none) {
-    auto specs = specs_;
-    detail::handle_dynamic_spec<detail::width_checker>(specs.width,
-                                                       specs.width_ref, ctx);
-    detail::handle_dynamic_spec<detail::precision_checker>(
-        specs.precision, specs.precision_ref, ctx);
-    return detail::write<Char>(ctx.out(), val, specs, ctx.locale());
+  if (specs_.width_ref.kind == detail::arg_id_kind::none &&
+      specs_.precision_ref.kind == detail::arg_id_kind::none) {
+    return detail::write<Char>(ctx.out(), val, specs_, ctx.locale());
   }
-  return detail::write<Char>(ctx.out(), val, specs_, ctx.locale());
+  auto specs = specs_;
+  detail::handle_dynamic_spec<detail::width_checker>(specs.width,
+                                                     specs.width_ref, ctx);
+  detail::handle_dynamic_spec<detail::precision_checker>(
+      specs.precision, specs.precision_ref, ctx);
+  return detail::write<Char>(ctx.out(), val, specs, ctx.locale());
 }
 
 FMT_END_NAMESPACE
diff --git a/src/fmt/os.h b/src/fmt/os.h
index 2126424d39..6009ccc112 100644
--- a/src/fmt/os.h
+++ b/src/fmt/os.h
@@ -13,12 +13,14 @@
 #include <cstdio>
 #include <system_error>  // std::system_error
 
-#if defined __APPLE__ || defined(__FreeBSD__)
-#  include <xlocale.h>  // for LC_NUMERIC_MASK on OS X
-#endif
-
 #include "format.h"
 
+#if defined __APPLE__ || defined(__FreeBSD__)
+#  if FMT_HAS_INCLUDE(<xlocale.h>)
+#    include <xlocale.h>  // for LC_NUMERIC_MASK on OS X
+#  endif
+#endif
+
 #ifndef FMT_USE_FCNTL
 // UWP doesn't provide _pipe.
 #  if FMT_HAS_INCLUDE("winapifamily.h")
@@ -46,6 +48,7 @@
 
 // Calls to system functions are wrapped in FMT_SYSTEM for testability.
 #ifdef FMT_SYSTEM
+#  define FMT_HAS_SYSTEM
 #  define FMT_POSIX_CALL(call) FMT_SYSTEM(call)
 #else
 #  define FMT_SYSTEM(call) ::call
@@ -114,7 +117,7 @@ template <typename Char> class basic_cstring_view {
   basic_cstring_view(const std::basic_string<Char>& s) : data_(s.c_str()) {}
 
   /** Returns the pointer to a C string. */
-  const Char* c_str() const { return data_; }
+  auto c_str() const -> const Char* { return data_; }
 };
 
 using cstring_view = basic_cstring_view<char>;
@@ -169,7 +172,7 @@ std::system_error windows_error(int error_code, string_view message,
 // Can be used to report errors from destructors.
 FMT_API void report_windows_error(int error_code, const char* message) noexcept;
 #else
-inline const std::error_category& system_category() noexcept {
+inline auto system_category() noexcept -> const std::error_category& {
   return std::system_category();
 }
 #endif  // _WIN32
@@ -206,7 +209,7 @@ class buffered_file {
     other.file_ = nullptr;
   }
 
-  buffered_file& operator=(buffered_file&& other) {
+  auto operator=(buffered_file&& other) -> buffered_file& {
     close();
     file_ = other.file_;
     other.file_ = nullptr;
@@ -220,9 +223,9 @@ class buffered_file {
   FMT_API void close();
 
   // Returns the pointer to a FILE object representing this file.
-  FILE* get() const noexcept { return file_; }
+  auto get() const noexcept -> FILE* { return file_; }
 
-  FMT_API int descriptor() const;
+  FMT_API auto descriptor() const -> int;
 
   void vprint(string_view format_str, format_args args) {
     fmt::vprint(file_, format_str, args);
@@ -235,6 +238,7 @@ class buffered_file {
 };
 
 #if FMT_USE_FCNTL
+
 // A file. Closed file is represented by a file object with descriptor -1.
 // Methods that are not declared with noexcept may throw
 // fmt::system_error in case of failure. Note that some errors such as
@@ -248,6 +252,8 @@ class FMT_API file {
   // Constructs a file object with a given descriptor.
   explicit file(int fd) : fd_(fd) {}
 
+  friend struct pipe;
+
  public:
   // Possible values for the oflag argument to the constructor.
   enum {
@@ -272,7 +278,7 @@ class FMT_API file {
   file(file&& other) noexcept : fd_(other.fd_) { other.fd_ = -1; }
 
   // Move assignment is not noexcept because close may throw.
-  file& operator=(file&& other) {
+  auto operator=(file&& other) -> file& {
     close();
     fd_ = other.fd_;
     other.fd_ = -1;
@@ -283,24 +289,24 @@ class FMT_API file {
   ~file() noexcept;
 
   // Returns the file descriptor.
-  int descriptor() const noexcept { return fd_; }
+  auto descriptor() const noexcept -> int { return fd_; }
 
   // Closes the file.
   void close();
 
   // Returns the file size. The size has signed type for consistency with
   // stat::st_size.
-  long long size() const;
+  auto size() const -> long long;
 
   // Attempts to read count bytes from the file into the specified buffer.
-  size_t read(void* buffer, size_t count);
+  auto read(void* buffer, size_t count) -> size_t;
 
   // Attempts to write count bytes from the specified buffer to the file.
-  size_t write(const void* buffer, size_t count);
+  auto write(const void* buffer, size_t count) -> size_t;
 
   // Duplicates a file descriptor with the dup function and returns
   // the duplicate as a file object.
-  static file dup(int fd);
+  static auto dup(int fd) -> file;
 
   // Makes fd be the copy of this file descriptor, closing fd first if
   // necessary.
@@ -310,13 +316,9 @@ class FMT_API file {
   // necessary.
   void dup2(int fd, std::error_code& ec) noexcept;
 
-  // Creates a pipe setting up read_end and write_end file objects for reading
-  // and writing respectively.
-  static void pipe(file& read_end, file& write_end);
-
   // Creates a buffered_file object associated with this file and detaches
   // this file object from the file.
-  buffered_file fdopen(const char* mode);
+  auto fdopen(const char* mode) -> buffered_file;
 
 #  if defined(_WIN32) && !defined(__MINGW32__)
   // Opens a file and constructs a file object representing this file by
@@ -325,15 +327,24 @@ class FMT_API file {
 #  endif
 };
 
+struct FMT_API pipe {
+  file read_end;
+  file write_end;
+
+  // Creates a pipe setting up read_end and write_end file objects for reading
+  // and writing respectively.
+  pipe();
+};
+
 // Returns the memory page size.
-long getpagesize();
+auto getpagesize() -> long;
 
 namespace detail {
 
 struct buffer_size {
   buffer_size() = default;
   size_t value = 0;
-  buffer_size operator=(size_t val) const {
+  auto operator=(size_t val) const -> buffer_size {
     auto bs = buffer_size();
     bs.value = val;
     return bs;
@@ -366,9 +377,10 @@ struct ostream_params {
 };
 
 class file_buffer final : public buffer<char> {
+ private:
   file file_;
 
-  FMT_API void grow(size_t) override;
+  FMT_API static void grow(buffer<char>& buf, size_t);
 
  public:
   FMT_API file_buffer(cstring_view path, const ostream_params& params);
@@ -410,7 +422,7 @@ class FMT_API ostream {
   void flush() { buffer_.flush(); }
 
   template <typename... T>
-  friend ostream output_file(cstring_view path, T... params);
+  friend auto output_file(cstring_view path, T... params) -> ostream;
 
   void close() { buffer_.close(); }
 
@@ -419,7 +431,7 @@ class FMT_API ostream {
     output to the file.
    */
   template <typename... T> void print(format_string<T...> fmt, T&&... args) {
-    vformat_to(detail::buffer_appender<char>(buffer_), fmt,
+    vformat_to(std::back_inserter(buffer_), fmt,
                fmt::make_format_args(args...));
   }
 };
@@ -440,7 +452,7 @@ class FMT_API ostream {
   \endrst
  */
 template <typename... T>
-inline ostream output_file(cstring_view path, T... params) {
+inline auto output_file(cstring_view path, T... params) -> ostream {
   return {path, detail::ostream_params(params...)};
 }
 #endif  // FMT_USE_FCNTL
diff --git a/src/fmt/ostream.h b/src/fmt/ostream.h
index a112fe7ba9..26fb3b5ac0 100644
--- a/src/fmt/ostream.h
+++ b/src/fmt/ostream.h
@@ -10,19 +10,50 @@
 
 #include <fstream>  // std::filebuf
 
-#if defined(_WIN32) && defined(__GLIBCXX__)
-#  include <ext/stdio_filebuf.h>
-#  include <ext/stdio_sync_filebuf.h>
-#elif defined(_WIN32) && defined(_LIBCPP_VERSION)
-#  include <__std_stream>
+#ifdef _WIN32
+#  ifdef __GLIBCXX__
+#    include <ext/stdio_filebuf.h>
+#    include <ext/stdio_sync_filebuf.h>
+#  endif
+#  include <io.h>
 #endif
 
 #include "format.h"
 
 FMT_BEGIN_NAMESPACE
-
 namespace detail {
 
+template <typename Streambuf> class formatbuf : public Streambuf {
+ private:
+  using char_type = typename Streambuf::char_type;
+  using streamsize = decltype(std::declval<Streambuf>().sputn(nullptr, 0));
+  using int_type = typename Streambuf::int_type;
+  using traits_type = typename Streambuf::traits_type;
+
+  buffer<char_type>& buffer_;
+
+ public:
+  explicit formatbuf(buffer<char_type>& buf) : buffer_(buf) {}
+
+ protected:
+  // The put area is always empty. This makes the implementation simpler and has
+  // the advantage that the streambuf and the buffer are always in sync and
+  // sputc never writes into uninitialized memory. A disadvantage is that each
+  // call to sputc always results in a (virtual) call to overflow. There is no
+  // disadvantage here for sputn since this always results in a call to xsputn.
+
+  auto overflow(int_type ch) -> int_type override {
+    if (!traits_type::eq_int_type(ch, traits_type::eof()))
+      buffer_.push_back(static_cast<char_type>(ch));
+    return ch;
+  }
+
+  auto xsputn(const char_type* s, streamsize count) -> streamsize override {
+    buffer_.append(s, s + count);
+    return count;
+  }
+};
+
 // Generate a unique explicit instantion in every translation unit using a tag
 // type in an anonymous namespace.
 namespace {
@@ -37,36 +68,40 @@ class file_access {
 template class file_access<file_access_tag, std::filebuf,
                            &std::filebuf::_Myfile>;
 auto get_file(std::filebuf&) -> FILE*;
-#elif defined(_WIN32) && defined(_LIBCPP_VERSION)
-template class file_access<file_access_tag, std::__stdoutbuf<char>,
-                           &std::__stdoutbuf<char>::__file_>;
-auto get_file(std::__stdoutbuf<char>&) -> FILE*;
 #endif
 
-inline bool write_ostream_unicode(std::ostream& os, fmt::string_view data) {
+inline auto write_ostream_unicode(std::ostream& os, fmt::string_view data)
+    -> bool {
+  FILE* f = nullptr;
 #if FMT_MSC_VERSION
   if (auto* buf = dynamic_cast<std::filebuf*>(os.rdbuf()))
-    if (FILE* f = get_file(*buf)) return write_console(f, data);
-#elif defined(_WIN32) && defined(__GLIBCXX__)
-  auto* rdbuf = os.rdbuf();
-  FILE* c_file;
-  if (auto* sfbuf = dynamic_cast<__gnu_cxx::stdio_sync_filebuf<char>*>(rdbuf))
-    c_file = sfbuf->file();
-  else if (auto* fbuf = dynamic_cast<__gnu_cxx::stdio_filebuf<char>*>(rdbuf))
-    c_file = fbuf->file();
+    f = get_file(*buf);
+  else
+    return false;
+#elif defined(_WIN32) && defined(__GLIBCXX__)
+  auto* rdbuf = os.rdbuf();
+  if (auto* sfbuf = dynamic_cast<__gnu_cxx::stdio_sync_filebuf<char>*>(rdbuf))
+    f = sfbuf->file();
+  else if (auto* fbuf = dynamic_cast<__gnu_cxx::stdio_filebuf<char>*>(rdbuf))
+    f = fbuf->file();
   else
     return false;
-  if (c_file) return write_console(c_file, data);
-#elif defined(_WIN32) && defined(_LIBCPP_VERSION)
-  if (auto* buf = dynamic_cast<std::__stdoutbuf<char>*>(os.rdbuf()))
-    if (FILE* f = get_file(*buf)) return write_console(f, data);
 #else
-  ignore_unused(os, data);
+  ignore_unused(os, data, f);
+#endif
+#ifdef _WIN32
+  if (f) {
+    int fd = _fileno(f);
+    if (_isatty(fd)) {
+      os.flush();
+      return write_console(fd, data);
+    }
+  }
 #endif
   return false;
 }
-inline bool write_ostream_unicode(std::wostream&,
-                                  fmt::basic_string_view<wchar_t>) {
+inline auto write_ostream_unicode(std::wostream&,
+                                  fmt::basic_string_view<wchar_t>) -> bool {
   return false;
 }
 
@@ -87,18 +122,19 @@ void write_buffer(std::basic_ostream<Char>& os, buffer<Char>& buf) {
 }
 
 template <typename Char, typename T>
-void format_value(buffer<Char>& buf, const T& value,
-                  locale_ref loc = locale_ref()) {
+void format_value(buffer<Char>& buf, const T& value) {
   auto&& format_buf = formatbuf<std::basic_streambuf<Char>>(buf);
   auto&& output = std::basic_ostream<Char>(&format_buf);
 #if !defined(FMT_STATIC_THOUSANDS_SEPARATOR)
-  if (loc) output.imbue(loc.get<std::locale>());
+  output.imbue(std::locale::classic());  // The default is always unlocalized.
 #endif
   output << value;
   output.exceptions(std::ios_base::failbit | std::ios_base::badbit);
 }
 
-template <typename T> struct streamed_view { const T& value; };
+template <typename T> struct streamed_view {
+  const T& value;
+};
 
 }  // namespace detail
 
@@ -111,7 +147,7 @@ struct basic_ostream_formatter : formatter<basic_string_view<Char>, Char> {
   auto format(const T& value, basic_format_context<OutputIt, Char>& ctx) const
       -> OutputIt {
     auto buffer = basic_memory_buffer<Char>();
-    detail::format_value(buffer, value, ctx.locale());
+    detail::format_value(buffer, value);
     return formatter<basic_string_view<Char>, Char>::format(
         {buffer.data(), buffer.size()}, ctx);
   }
@@ -140,7 +176,7 @@ struct formatter<detail::streamed_view<T>, Char>
   \endrst
  */
 template <typename T>
-auto streamed(const T& value) -> detail::streamed_view<T> {
+constexpr auto streamed(const T& value) -> detail::streamed_view<T> {
   return {value};
 }
 
diff --git a/src/fmt/printf.h b/src/fmt/printf.h
index adef6adf83..35445abce2 100644
--- a/src/fmt/printf.h
+++ b/src/fmt/printf.h
@@ -16,13 +16,19 @@
 FMT_BEGIN_NAMESPACE
 FMT_BEGIN_EXPORT
 
-template <typename T> struct printf_formatter { printf_formatter() = delete; };
+template <typename T> struct printf_formatter {
+  printf_formatter() = delete;
+};
 
 template <typename Char> class basic_printf_context {
  private:
   detail::buffer_appender<Char> out_;
   basic_format_args<basic_printf_context> args_;
 
+  static_assert(std::is_same<Char, char>::value ||
+                    std::is_same<Char, wchar_t>::value,
+                "Unsupported code unit type.");
+
  public:
   using char_type = Char;
   using parse_context_type = basic_format_parse_context<Char>;
@@ -47,9 +53,7 @@ template <typename Char> class basic_printf_context {
     return args_.get(id);
   }
 
-  FMT_CONSTEXPR void on_error(const char* message) {
-    detail::error_handler().on_error(message);
-  }
+  void on_error(const char* message) { throw_format_error(message); }
 };
 
 namespace detail {
@@ -102,7 +106,9 @@ struct is_zero_int {
 
 template <typename T> struct make_unsigned_or_bool : std::make_unsigned<T> {};
 
-template <> struct make_unsigned_or_bool<bool> { using type = bool; };
+template <> struct make_unsigned_or_bool<bool> {
+  using type = bool;
+};
 
 template <typename T, typename Context> class arg_converter {
  private:
@@ -157,7 +163,7 @@ template <typename T, typename Context> class arg_converter {
 // unsigned).
 template <typename T, typename Context, typename Char>
 void convert_arg(basic_format_arg<Context>& arg, Char type) {
-  visit_format_arg(arg_converter<T, Context>(arg, type), arg);
+  arg.visit(arg_converter<T, Context>(arg, type));
 }
 
 // Converts an integer argument to char for printf.
@@ -360,8 +366,8 @@ auto parse_header(const Char*& it, const Char* end, format_specs<Char>& specs,
       if (specs.width == -1) throw_format_error("number is too big");
     } else if (*it == '*') {
       ++it;
-      specs.width = static_cast<int>(visit_format_arg(
-          detail::printf_width_handler<Char>(specs), get_arg(-1)));
+      specs.width = static_cast<int>(
+          get_arg(-1).visit(detail::printf_width_handler<Char>(specs)));
     }
   }
   return arg_index;
@@ -456,8 +462,8 @@ void vprintf(buffer<Char>& buf, basic_string_view<Char> format,
         specs.precision = parse_nonnegative_int(it, end, 0);
       } else if (c == '*') {
         ++it;
-        specs.precision = static_cast<int>(
-            visit_format_arg(printf_precision_handler(), get_arg(-1)));
+        specs.precision =
+            static_cast<int>(get_arg(-1).visit(printf_precision_handler()));
       } else {
         specs.precision = 0;
       }
@@ -471,14 +477,14 @@ void vprintf(buffer<Char>& buf, basic_string_view<Char> format,
       specs.fill[0] = ' ';
     }
     if (specs.precision >= 0 && arg.type() == type::cstring_type) {
-      auto str = visit_format_arg(get_cstring<Char>(), arg);
+      auto str = arg.visit(get_cstring<Char>());
       auto str_end = str + specs.precision;
       auto nul = std::find(str, str_end, Char());
       auto sv = basic_string_view<Char>(
           str, to_unsigned(nul != str_end ? nul - str : specs.precision));
       arg = make_arg<basic_printf_context<Char>>(sv);
     }
-    if (specs.alt && visit_format_arg(is_zero_int(), arg)) specs.alt = false;
+    if (specs.alt && arg.visit(is_zero_int())) specs.alt = false;
     if (specs.fill[0] == '0') {
       if (arg.is_arithmetic() && specs.align != align::left)
         specs.align = align::numeric;
@@ -538,7 +544,7 @@ void vprintf(buffer<Char>& buf, basic_string_view<Char> format,
         type = 'd';
         break;
       case 'c':
-        visit_format_arg(char_converter<basic_printf_context<Char>>(arg), arg);
+        arg.visit(char_converter<basic_printf_context<Char>>(arg));
         break;
       }
     }
@@ -549,7 +555,7 @@ void vprintf(buffer<Char>& buf, basic_string_view<Char> format,
     start = it;
 
     // Format argument.
-    visit_format_arg(printf_arg_formatter<Char>(out, specs, context), arg);
+    arg.visit(printf_arg_formatter<Char>(out, specs, context));
   }
   write(out, basic_string_view<Char>(start, to_unsigned(it - start)));
 }
diff --git a/src/fmt/ranges.h b/src/fmt/ranges.h
index 65beba5bfc..a9cd60e594 100644
--- a/src/fmt/ranges.h
+++ b/src/fmt/ranges.h
@@ -1,13 +1,9 @@
-// Formatting library for C++ - experimental range support
+// Formatting library for C++ - range and tuple support
 //
-// Copyright (c) 2012 - present, Victor Zverovich
+// Copyright (c) 2012 - present, Victor Zverovich and {fmt} contributors
 // All rights reserved.
 //
 // For the license information refer to format.h.
-//
-// Copyright (c) 2018 - present, Remotion (Igor Schulz)
-// All Rights Reserved
-// {fmt} support for ranges, containers and types tuple interface.
 
 #ifndef FMT_RANGES_H_
 #define FMT_RANGES_H_
@@ -187,7 +183,7 @@ template <size_t N> using make_index_sequence = std::make_index_sequence<N>;
 template <typename T, T... N> struct integer_sequence {
   using value_type = T;
 
-  static FMT_CONSTEXPR size_t size() { return sizeof...(N); }
+  static FMT_CONSTEXPR auto size() -> size_t { return sizeof...(N); }
 };
 
 template <size_t... N> using index_sequence = integer_sequence<size_t, N...>;
@@ -211,15 +207,15 @@ class is_tuple_formattable_ {
 };
 template <typename T, typename C> class is_tuple_formattable_<T, C, true> {
   template <std::size_t... Is>
-  static std::true_type check2(index_sequence<Is...>,
-                               integer_sequence<bool, (Is == Is)...>);
-  static std::false_type check2(...);
+  static auto check2(index_sequence<Is...>,
+                     integer_sequence<bool, (Is == Is)...>) -> std::true_type;
+  static auto check2(...) -> std::false_type;
   template <std::size_t... Is>
-  static decltype(check2(
+  static auto check(index_sequence<Is...>) -> decltype(check2(
       index_sequence<Is...>{},
-      integer_sequence<
-          bool, (is_formattable<typename std::tuple_element<Is, T>::type,
-                                C>::value)...>{})) check(index_sequence<Is...>);
+      integer_sequence<bool,
+                       (is_formattable<typename std::tuple_element<Is, T>::type,
+                                       C>::value)...>{}));
 
  public:
   static constexpr const bool value =
@@ -421,6 +417,12 @@ struct is_formattable_delayed
 #endif
 }  // namespace detail
 
+template <typename...> struct conjunction : std::true_type {};
+template <typename P> struct conjunction<P> : P {};
+template <typename P1, typename... Pn>
+struct conjunction<P1, Pn...>
+    : conditional_t<bool(P1::value), conjunction<Pn...>, P1> {};
+
 template <typename T, typename Char, typename Enable = void>
 struct range_formatter;
 
@@ -486,7 +488,8 @@ struct range_formatter<
     for (; it != end; ++it) {
       if (i > 0) out = detail::copy_str<Char>(separator_, out);
       ctx.advance_to(out);
-      out = underlying_.format(mapper.map(*it), ctx);
+      auto&& item = *it;
+      out = underlying_.format(mapper.map(item), ctx);
       ++i;
     }
     out = detail::copy_str<Char>(closing_bracket_, out);
@@ -571,6 +574,83 @@ struct formatter<
                                       Char> {
 };
 
+template <typename It, typename Sentinel, typename Char = char>
+struct join_view : detail::view {
+  It begin;
+  Sentinel end;
+  basic_string_view<Char> sep;
+
+  join_view(It b, Sentinel e, basic_string_view<Char> s)
+      : begin(b), end(e), sep(s) {}
+};
+
+template <typename It, typename Sentinel, typename Char>
+struct formatter<join_view<It, Sentinel, Char>, Char> {
+ private:
+  using value_type =
+#ifdef __cpp_lib_ranges
+      std::iter_value_t<It>;
+#else
+      typename std::iterator_traits<It>::value_type;
+#endif
+  formatter<remove_cvref_t<value_type>, Char> value_formatter_;
+
+ public:
+  template <typename ParseContext>
+  FMT_CONSTEXPR auto parse(ParseContext& ctx) -> const Char* {
+    return value_formatter_.parse(ctx);
+  }
+
+  template <typename FormatContext>
+  auto format(const join_view<It, Sentinel, Char>& value,
+              FormatContext& ctx) const -> decltype(ctx.out()) {
+    auto it = value.begin;
+    auto out = ctx.out();
+    if (it != value.end) {
+      out = value_formatter_.format(*it, ctx);
+      ++it;
+      while (it != value.end) {
+        out = detail::copy_str<Char>(value.sep.begin(), value.sep.end(), out);
+        ctx.advance_to(out);
+        out = value_formatter_.format(*it, ctx);
+        ++it;
+      }
+    }
+    return out;
+  }
+};
+
+/**
+  Returns a view that formats the iterator range `[begin, end)` with elements
+  separated by `sep`.
+ */
+template <typename It, typename Sentinel>
+auto join(It begin, Sentinel end, string_view sep) -> join_view<It, Sentinel> {
+  return {begin, end, sep};
+}
+
+/**
+  \rst
+  Returns a view that formats `range` with elements separated by `sep`.
+
+  **Example**::
+
+    std::vector<int> v = {1, 2, 3};
+    fmt::print("{}", fmt::join(v, ", "));
+    // Output: "1, 2, 3"
+
+  ``fmt::join`` applies passed format specifiers to the range elements::
+
+    fmt::print("{:02}", fmt::join(v, ", "));
+    // Output: "01, 02, 03"
+  \endrst
+ */
+template <typename Range>
+auto join(Range&& range, string_view sep)
+    -> join_view<detail::iterator_t<Range>, detail::sentinel_t<Range>> {
+  return join(std::begin(range), std::end(range), sep);
+}
+
 template <typename Char, typename... T> struct tuple_join_view : detail::view {
   const std::tuple<T...>& tuple;
   basic_string_view<Char> sep;
@@ -705,13 +785,6 @@ FMT_CONSTEXPR auto join(const std::tuple<T...>& tuple, string_view sep)
   return {tuple, sep};
 }
 
-template <typename... T>
-FMT_CONSTEXPR auto join(const std::tuple<T...>& tuple,
-                        basic_string_view<wchar_t> sep)
-    -> tuple_join_view<wchar_t, T...> {
-  return {tuple, sep};
-}
-
 /**
   \rst
   Returns an object that formats `initializer_list` with elements separated by
diff --git a/src/fmt/std.h b/src/fmt/std.h
index b4e055c28d..7cff115920 100644
--- a/src/fmt/std.h
+++ b/src/fmt/std.h
@@ -38,6 +38,10 @@
 #  endif
 #endif
 
+#if FMT_CPLUSPLUS > 201703L && FMT_HAS_INCLUDE(<source_location>)
+#  include <source_location>
+#endif
+
 // GCC 4 does not support FMT_HAS_INCLUDE.
 #if FMT_HAS_INCLUDE(<cxxabi.h>) || defined(__GLIBCXX__)
 #  include <cxxabi.h>
@@ -59,43 +63,53 @@
 #  endif
 #endif
 
-#ifdef __cpp_lib_filesystem
+// For older Xcode versions, __cpp_lib_xxx flags are inaccurately defined.
+#ifndef FMT_CPP_LIB_FILESYSTEM
+#  ifdef __cpp_lib_filesystem
+#    define FMT_CPP_LIB_FILESYSTEM __cpp_lib_filesystem
+#  else
+#    define FMT_CPP_LIB_FILESYSTEM 0
+#  endif
+#endif
+
+#ifndef FMT_CPP_LIB_VARIANT
+#  ifdef __cpp_lib_variant
+#    define FMT_CPP_LIB_VARIANT __cpp_lib_variant
+#  else
+#    define FMT_CPP_LIB_VARIANT 0
+#  endif
+#endif
+
+#if FMT_CPP_LIB_FILESYSTEM
 FMT_BEGIN_NAMESPACE
 
 namespace detail {
 
-template <typename Char> auto get_path_string(const std::filesystem::path& p) {
-  return p.string<Char>();
+template <typename Char, typename PathChar>
+auto get_path_string(const std::filesystem::path& p,
+                     const std::basic_string<PathChar>& native) {
+  if constexpr (std::is_same_v<Char, char> && std::is_same_v<PathChar, wchar_t>)
+    return to_utf8<wchar_t>(native, to_utf8_error_policy::replace);
+  else
+    return p.string<Char>();
 }
 
-template <typename Char>
+template <typename Char, typename PathChar>
 void write_escaped_path(basic_memory_buffer<Char>& quoted,
-                        const std::filesystem::path& p) {
-  write_escaped_string<Char>(std::back_inserter(quoted), p.string<Char>());
-}
-
-#  ifdef _WIN32
-template <>
-inline auto get_path_string<char>(const std::filesystem::path& p) {
-  return to_utf8<wchar_t>(p.native(), to_utf8_error_policy::replace);
-}
-
-template <>
-inline void write_escaped_path<char>(memory_buffer& quoted,
-                                     const std::filesystem::path& p) {
-  auto buf = basic_memory_buffer<wchar_t>();
-  write_escaped_string<wchar_t>(std::back_inserter(buf), p.native());
-  bool valid = to_utf8<wchar_t>::convert(quoted, {buf.data(), buf.size()});
-  FMT_ASSERT(valid, "invalid utf16");
-}
-#  endif  // _WIN32
-
-template <>
-inline void write_escaped_path<std::filesystem::path::value_type>(
-    basic_memory_buffer<std::filesystem::path::value_type>& quoted,
-    const std::filesystem::path& p) {
-  write_escaped_string<std::filesystem::path::value_type>(
-      std::back_inserter(quoted), p.native());
+                        const std::filesystem::path& p,
+                        const std::basic_string<PathChar>& native) {
+  if constexpr (std::is_same_v<Char, char> &&
+                std::is_same_v<PathChar, wchar_t>) {
+    auto buf = basic_memory_buffer<wchar_t>();
+    write_escaped_string<wchar_t>(std::back_inserter(buf), native);
+    bool valid = to_utf8<wchar_t>::convert(quoted, {buf.data(), buf.size()});
+    FMT_ASSERT(valid, "invalid utf16");
+  } else if constexpr (std::is_same_v<Char, PathChar>) {
+    write_escaped_string<std::filesystem::path::value_type>(
+        std::back_inserter(quoted), native);
+  } else {
+    write_escaped_string<Char>(std::back_inserter(quoted), p.string<Char>());
+  }
 }
 
 }  // namespace detail
@@ -106,6 +120,7 @@ template <typename Char> struct formatter<std::filesystem::path, Char> {
   format_specs<Char> specs_;
   detail::arg_ref<Char> width_ref_;
   bool debug_ = false;
+  char path_type_ = 0;
 
  public:
   FMT_CONSTEXPR void set_debug_format(bool set = true) { debug_ = set; }
@@ -122,29 +137,62 @@ template <typename Char> struct formatter<std::filesystem::path, Char> {
       debug_ = true;
       ++it;
     }
+    if (it != end && (*it == 'g')) path_type_ = *it++;
     return it;
   }
 
   template <typename FormatContext>
   auto format(const std::filesystem::path& p, FormatContext& ctx) const {
     auto specs = specs_;
+#  ifdef _WIN32
+    auto path_string = !path_type_ ? p.native() : p.generic_wstring();
+#  else
+    auto path_string = !path_type_ ? p.native() : p.generic_string();
+#  endif
+
     detail::handle_dynamic_spec<detail::width_checker>(specs.width, width_ref_,
                                                        ctx);
     if (!debug_) {
-      auto s = detail::get_path_string<Char>(p);
+      auto s = detail::get_path_string<Char>(p, path_string);
       return detail::write(ctx.out(), basic_string_view<Char>(s), specs);
     }
     auto quoted = basic_memory_buffer<Char>();
-    detail::write_escaped_path(quoted, p);
+    detail::write_escaped_path(quoted, p, path_string);
     return detail::write(ctx.out(),
                          basic_string_view<Char>(quoted.data(), quoted.size()),
                          specs);
   }
 };
 FMT_END_NAMESPACE
-#endif
+#endif  // FMT_CPP_LIB_FILESYSTEM
 
 FMT_BEGIN_NAMESPACE
+FMT_EXPORT
+template <std::size_t N, typename Char>
+struct formatter<std::bitset<N>, Char> : nested_formatter<string_view> {
+ private:
+  // Functor because C++11 doesn't support generic lambdas.
+  struct writer {
+    const std::bitset<N>& bs;
+
+    template <typename OutputIt>
+    FMT_CONSTEXPR auto operator()(OutputIt out) -> OutputIt {
+      for (auto pos = N; pos > 0; --pos) {
+        out = detail::write<Char>(out, bs[pos - 1] ? Char('1') : Char('0'));
+      }
+
+      return out;
+    }
+  };
+
+ public:
+  template <typename FormatContext>
+  auto format(const std::bitset<N>& bs, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    return write_padded(ctx, writer{bs});
+  }
+};
+
 FMT_EXPORT
 template <typename Char>
 struct formatter<std::thread::id, Char> : basic_ostream_formatter<Char> {};
@@ -180,7 +228,7 @@ struct formatter<std::optional<T>, Char,
   }
 
   template <typename FormatContext>
-  auto format(std::optional<T> const& opt, FormatContext& ctx) const
+  auto format(const std::optional<T>& opt, FormatContext& ctx) const
       -> decltype(ctx.out()) {
     if (!opt) return detail::write<Char>(ctx.out(), none);
 
@@ -194,7 +242,32 @@ struct formatter<std::optional<T>, Char,
 FMT_END_NAMESPACE
 #endif  // __cpp_lib_optional
 
-#ifdef __cpp_lib_variant
+#ifdef __cpp_lib_source_location
+FMT_BEGIN_NAMESPACE
+FMT_EXPORT
+template <> struct formatter<std::source_location> {
+  template <typename ParseContext> FMT_CONSTEXPR auto parse(ParseContext& ctx) {
+    return ctx.begin();
+  }
+
+  template <typename FormatContext>
+  auto format(const std::source_location& loc, FormatContext& ctx) const
+      -> decltype(ctx.out()) {
+    auto out = ctx.out();
+    out = detail::write(out, loc.file_name());
+    out = detail::write(out, ':');
+    out = detail::write<char>(out, loc.line());
+    out = detail::write(out, ':');
+    out = detail::write<char>(out, loc.column());
+    out = detail::write(out, ": ");
+    out = detail::write(out, loc.function_name());
+    return out;
+  }
+};
+FMT_END_NAMESPACE
+#endif
+
+#if FMT_CPP_LIB_VARIANT
 FMT_BEGIN_NAMESPACE
 namespace detail {
 
@@ -285,7 +358,7 @@ struct formatter<
   }
 };
 FMT_END_NAMESPACE
-#endif  // __cpp_lib_variant
+#endif  // FMT_CPP_LIB_VARIANT
 
 FMT_BEGIN_NAMESPACE
 FMT_EXPORT
@@ -309,7 +382,7 @@ template <typename Char> struct formatter<std::error_code, Char> {
 FMT_EXPORT
 template <typename T, typename Char>
 struct formatter<
-    T, Char,
+    T, Char,  // DEPRECATED! Mixing code unit types.
     typename std::enable_if<std::is_base_of<std::exception, T>::value>::type> {
  private:
   bool with_typename_ = false;
@@ -340,7 +413,7 @@ struct formatter<
 #  ifdef FMT_HAS_ABI_CXA_DEMANGLE
     int status = 0;
     std::size_t size = 0;
-    std::unique_ptr<char, decltype(&std::free)> demangled_name_ptr(
+    std::unique_ptr<char, void (*)(void*)> demangled_name_ptr(
         abi::__cxa_demangle(ti.name(), nullptr, &size, &status), &std::free);
 
     string_view demangled_name_view;
@@ -451,15 +524,14 @@ struct formatter<std::atomic<T>, Char,
 #ifdef __cpp_lib_atomic_flag_test
 FMT_EXPORT
 template <typename Char>
-struct formatter<std::atomic_flag, Char>
-    : formatter<bool, Char> {
+struct formatter<std::atomic_flag, Char> : formatter<bool, Char> {
   template <typename FormatContext>
   auto format(const std::atomic_flag& v, FormatContext& ctx) const
       -> decltype(ctx.out()) {
     return formatter<bool, Char>::format(v.test(), ctx);
   }
 };
-#endif // __cpp_lib_atomic_flag_test
+#endif  // __cpp_lib_atomic_flag_test
 
 FMT_END_NAMESPACE
 #endif  // FMT_STD_H_
diff --git a/src/fmt/xchar.h b/src/fmt/xchar.h
index 625ec36922..1e791bb07b 100644
--- a/src/fmt/xchar.h
+++ b/src/fmt/xchar.h
@@ -11,6 +11,7 @@
 #include <cwchar>
 
 #include "format.h"
+#include "ranges.h"
 
 #ifndef FMT_STATIC_THOUSANDS_SEPARATOR
 #  include <locale>
@@ -22,7 +23,7 @@ namespace detail {
 template <typename T>
 using is_exotic_char = bool_constant<!std::is_same<T, char>::value>;
 
-inline auto write_loc(std::back_insert_iterator<detail::buffer<wchar_t>> out,
+inline auto write_loc(back_insert_iterator<detail::buffer<wchar_t>> out,
                       loc_value value, const format_specs<wchar_t>& specs,
                       locale_ref loc) -> bool {
 #ifndef FMT_STATIC_THOUSANDS_SEPARATOR
@@ -63,14 +64,15 @@ template <> struct is_char<char16_t> : std::true_type {};
 template <> struct is_char<char32_t> : std::true_type {};
 
 template <typename... T>
-constexpr format_arg_store<wformat_context, T...> make_wformat_args(
-    const T&... args) {
+constexpr auto make_wformat_args(const T&... args)
+    -> format_arg_store<wformat_context, T...> {
   return {args...};
 }
 
 inline namespace literals {
 #if FMT_USE_USER_DEFINED_LITERALS && !FMT_USE_NONTYPE_TEMPLATE_ARGS
-constexpr detail::udl_arg<wchar_t> operator"" _a(const wchar_t* s, size_t) {
+constexpr auto operator""_a(const wchar_t* s, size_t)
+    -> detail::udl_arg<wchar_t> {
   return {s};
 }
 #endif
@@ -95,6 +97,12 @@ auto join(std::initializer_list<T> list, wstring_view sep)
   return join(std::begin(list), std::end(list), sep);
 }
 
+template <typename... T>
+auto join(const std::tuple<T...>& tuple, basic_string_view<wchar_t> sep)
+    -> tuple_join_view<wchar_t, T...> {
+  return {tuple, sep};
+}
+
 template <typename Char, FMT_ENABLE_IF(!std::is_same<Char, char>::value)>
 auto vformat(basic_string_view<Char> format_str,
              basic_format_args<buffer_context<type_identity_t<Char>>> args)
@@ -172,11 +180,11 @@ inline auto vformat_to(
   return detail::get_iterator(buf, out);
 }
 
-template <
-    typename OutputIt, typename Locale, typename S, typename... T,
-    typename Char = char_t<S>,
-    bool enable = detail::is_output_iterator<OutputIt, Char>::value&&
-        detail::is_locale<Locale>::value&& detail::is_exotic_char<Char>::value>
+template <typename OutputIt, typename Locale, typename S, typename... T,
+          typename Char = char_t<S>,
+          bool enable = detail::is_output_iterator<OutputIt, Char>::value &&
+                        detail::is_locale<Locale>::value &&
+                        detail::is_exotic_char<Char>::value>
 inline auto format_to(OutputIt out, const Locale& loc, const S& format_str,
                       T&&... args) ->
     typename std::enable_if<enable, OutputIt>::type {
diff --git a/src/fmtlib_os.cpp b/src/fmtlib_os.cpp
index ff5fe79a5e..3338d13cae 100644
--- a/src/fmtlib_os.cpp
+++ b/src/fmtlib_os.cpp
@@ -19,8 +19,8 @@
 #  include <sys/stat.h>
 #  include <sys/types.h>
 
-#  ifdef _WRS_KERNEL   // VxWorks7 kernel
-#    include <ioLib.h> // getpagesize
+#  ifdef _WRS_KERNEL    // VxWorks7 kernel
+#    include <ioLib.h>  // getpagesize
 #  endif
 
 #  ifndef _WIN32
@@ -183,10 +183,14 @@ void buffered_file::close() {
 }
 
 int buffered_file::descriptor() const {
-#ifdef fileno  // fileno is a macro on OpenBSD so we cannot use FMT_POSIX_CALL.
-  int fd = fileno(file_);
-#else
+#if !defined(fileno)
   int fd = FMT_POSIX_CALL(fileno(file_));
+#elif defined(FMT_HAS_SYSTEM)
+  // fileno is a macro on OpenBSD so we cannot use FMT_POSIX_CALL.
+#  define FMT_DISABLE_MACRO
+  int fd = FMT_SYSTEM(fileno FMT_DISABLE_MACRO(file_));
+#else
+  int fd = fileno(file_);
 #endif
   if (fd == -1)
     FMT_THROW(system_error(errno, FMT_STRING("cannot get file descriptor")));
@@ -197,6 +201,7 @@ int buffered_file::descriptor() const {
 #  ifdef _WIN32
 using mode_t = int;
 #  endif
+
 constexpr mode_t default_open_mode =
     S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
 
@@ -298,29 +303,6 @@ void file::dup2(int fd, std::error_code& ec) noexcept {
   if (result == -1) ec = std::error_code(errno, std::generic_category());
 }
 
-void file::pipe(file& read_end, file& write_end) {
-  // Close the descriptors first to make sure that assignments don't throw
-  // and there are no leaks.
-  read_end.close();
-  write_end.close();
-  int fds[2] = {};
-#  ifdef _WIN32
-  // Make the default pipe capacity same as on Linux 2.6.11+.
-  enum { DEFAULT_CAPACITY = 65536 };
-  int result = FMT_POSIX_CALL(pipe(fds, DEFAULT_CAPACITY, _O_BINARY));
-#  else
-  // Don't retry as the pipe function doesn't return EINTR.
-  // http://pubs.opengroup.org/onlinepubs/009696799/functions/pipe.html
-  int result = FMT_POSIX_CALL(pipe(fds));
-#  endif
-  if (result != 0)
-    FMT_THROW(system_error(errno, FMT_STRING("cannot create pipe")));
-  // The following assignments don't throw because read_fd and write_fd
-  // are closed.
-  read_end = file(fds[0]);
-  write_end = file(fds[1]);
-}
-
 buffered_file file::fdopen(const char* mode) {
 // Don't retry as fdopen doesn't return EINTR.
 #  if defined(__MINGW32__) && defined(_POSIX_)
@@ -349,6 +331,24 @@ file file::open_windows_file(wcstring_view path, int oflag) {
 }
 #  endif
 
+pipe::pipe() {
+  int fds[2] = {};
+#  ifdef _WIN32
+  // Make the default pipe capacity same as on Linux 2.6.11+.
+  enum { DEFAULT_CAPACITY = 65536 };
+  int result = FMT_POSIX_CALL(pipe(fds, DEFAULT_CAPACITY, _O_BINARY));
+#  else
+  // Don't retry as the pipe function doesn't return EINTR.
+  // http://pubs.opengroup.org/onlinepubs/009696799/functions/pipe.html
+  int result = FMT_POSIX_CALL(pipe(fds));
+#  endif
+  if (result != 0)
+    FMT_THROW(system_error(errno, FMT_STRING("cannot create pipe")));
+  // The following assignments don't throw.
+  read_end = file(fds[0]);
+  write_end = file(fds[1]);
+}
+
 #  if !defined(__MSDOS__)
 long getpagesize() {
 #    ifdef _WIN32
@@ -371,18 +371,17 @@ long getpagesize() {
 
 namespace detail {
 
-void file_buffer::grow(size_t) {
-  if (this->size() == this->capacity()) flush();
+void file_buffer::grow(buffer<char>& buf, size_t) {
+  if (buf.size() == buf.capacity()) static_cast<file_buffer&>(buf).flush();
 }
 
-file_buffer::file_buffer(cstring_view path,
-                         const detail::ostream_params& params)
-    : file_(path, params.oflag) {
+file_buffer::file_buffer(cstring_view path, const ostream_params& params)
+    : buffer<char>(grow), file_(path, params.oflag) {
   set(new char[params.buffer_size], params.buffer_size);
 }
 
 file_buffer::file_buffer(file_buffer&& other)
-    : detail::buffer<char>(other.data(), other.size(), other.capacity()),
+    : buffer<char>(grow, other.data(), other.size(), other.capacity()),
       file_(std::move(other.file_)) {
   other.clear();
   other.set(nullptr, 0);
diff --git a/src/grid2d.cpp b/src/grid2d.cpp
index 74fd105ec0..ea6e8e4123 100644
--- a/src/grid2d.cpp
+++ b/src/grid2d.cpp
@@ -25,7 +25,7 @@
 
 using namespace LAMMPS_NS;
 
-#define DELTA 16
+static constexpr int DELTA = 16;
 
 static constexpr int OFFSET = 16384;
 
@@ -114,7 +114,7 @@ Grid2d::Grid2d(LAMMPS *lmp, MPI_Comm gcomm, int gnx, int gny, int ixlo, int ixhi
   // additional intialization
   // other constructor invokes this from setup_grid()
 
-  initialize();
+  Grid2d::initialize();
 }
 
 /* ---------------------------------------------------------------------- */
@@ -522,7 +522,7 @@ void Grid2d::ghost_grid()
   // also ensure no other procs use ghost cells beyond +y limit
 
   if (yextra) {
-    if (layout != Comm::LAYOUT_TILED) {
+    if (comm->layout != Comm::LAYOUT_TILED) {
       if (comm->myloc[1] == comm->procgrid[1]-1) inyhi = outyhi = ny - 1;
     } else {
       if (comm->mysplit[1][1] == 1.0) inyhi = outyhi = ny - 1;
@@ -553,15 +553,13 @@ void Grid2d::ghost_grid()
 
 void Grid2d::extract_comm_info()
 {
-  layout = comm->layout;
-
   // for non TILED layout:
   // proc xyz lohi = my 4 neighbor procs in this MPI_Comm
   //   these proc IDs can be overridden by caller using set_proc_neighs()
   // xyz split = copy of 1d vectors in Comm
   // grid2proc = copy of 3d array in Comm
 
-  if (layout != Comm::LAYOUT_TILED) {
+  if (comm->layout != Comm::LAYOUT_TILED) {
     procxlo = comm->procneigh[0][0];
     procxhi = comm->procneigh[0][1];
     procylo = comm->procneigh[1][0];
@@ -585,7 +583,7 @@ void Grid2d::extract_comm_info()
   // RCBinfo.cut = this proc's inlo in that dim
   // Allgather creates the tree of dims and cuts
 
-  if (layout == Comm::LAYOUT_TILED) {
+  if (comm->layout == Comm::LAYOUT_TILED) {
     rcbinfo = (RCBinfo *)
       memory->smalloc(nprocs*sizeof(RCBinfo),"grid3d:rcbinfo");
     RCBinfo rcbone;
@@ -615,7 +613,7 @@ void Grid2d::extract_comm_info()
 
 void Grid2d::setup_comm(int &nbuf1, int &nbuf2)
 {
-  if (layout != Comm::LAYOUT_TILED) setup_comm_brick(nbuf1,nbuf2);
+  if (comm->layout != Comm::LAYOUT_TILED) setup_comm_brick(nbuf1,nbuf2);
   else setup_comm_tiled(nbuf1,nbuf2);
 }
 
@@ -1039,7 +1037,7 @@ void Grid2d::setup_comm_tiled(int &nbuf1, int &nbuf2)
 
 int Grid2d::ghost_adjacent()
 {
-  if (layout != Comm::LAYOUT_TILED) return ghost_adjacent_brick();
+  if (comm->layout != Comm::LAYOUT_TILED) return ghost_adjacent_brick();
   return ghost_adjacent_tiled();
 }
 
@@ -1085,7 +1083,7 @@ int Grid2d::ghost_adjacent_tiled()
 void Grid2d::forward_comm(int caller, void *ptr, int which, int nper, int nbyte,
                           void *buf1, void *buf2, MPI_Datatype datatype)
 {
-  if (layout != Comm::LAYOUT_TILED) {
+  if (comm->layout != Comm::LAYOUT_TILED) {
     if (caller == KSPACE)
       forward_comm_brick<KSpace>((KSpace *) ptr,which,nper,nbyte,
                                  buf1,buf2,datatype);
@@ -1190,7 +1188,7 @@ forward_comm_tiled(T *ptr, int which, int nper, int nbyte,
 void Grid2d::reverse_comm(int caller, void *ptr, int which, int nper, int nbyte,
                           void *buf1, void *buf2, MPI_Datatype datatype)
 {
-  if (layout != Comm::LAYOUT_TILED) {
+  if (comm->layout != Comm::LAYOUT_TILED) {
     if (caller == KSPACE)
       reverse_comm_brick<KSpace>((KSpace *) ptr,which,nper,nbyte,
                                  buf1,buf2,datatype);
@@ -1314,10 +1312,6 @@ void Grid2d::setup_remap(Grid2d *old, int &nremap_buf1, int &nremap_buf2)
 
   deallocate_remap();
 
-  // set layout to current Comm layout
-
-  layout = comm->layout;
-
   // overlaps of my old decomp owned box with all owned boxes in new decomp
   // noverlap_old = # of overlaps, including self
   // overlap_old = vector of overlap info in Overlap data struct
@@ -1654,7 +1648,7 @@ int Grid2d::compute_overlap(int ghostflag, int *box, int *pbc, Overlap *&overlap
 
   // test obox against appropriate layout
 
-  if (layout != Comm::LAYOUT_TILED) {
+  if (comm->layout != Comm::LAYOUT_TILED) {
 
     // find comm->procgrid indices in each dim for box bounds
 
diff --git a/src/grid2d.h b/src/grid2d.h
index 43316baad8..8316f840be 100644
--- a/src/grid2d.h
+++ b/src/grid2d.h
@@ -55,7 +55,6 @@ class Grid2d : protected Pointers {
 
  protected:
   int me, nprocs;
-  int layout;           // not TILED or TILED, same as Comm class
   MPI_Comm gridcomm;    // communicator for this class
                         // usually world, but MSM calls with subset
 
diff --git a/src/grid3d.cpp b/src/grid3d.cpp
index c6cff3f317..4ce1978660 100644
--- a/src/grid3d.cpp
+++ b/src/grid3d.cpp
@@ -26,7 +26,7 @@
 
 using namespace LAMMPS_NS;
 
-#define DELTA 16
+static constexpr int DELTA = 16;
 
 static constexpr int OFFSET = 16384;
 
@@ -123,7 +123,7 @@ Grid3d::Grid3d(LAMMPS *lmp, MPI_Comm gcomm, int gnx, int gny, int gnz,
   // additional intialization
   // other constructor invokes this from setup_grid()
 
-  initialize();
+  Grid3d::initialize();
 }
 
 /* ---------------------------------------------------------------------- */
@@ -577,7 +577,7 @@ void Grid3d::ghost_grid()
   // also ensure no other procs use ghost cells beyond +z limit
 
   if (zextra) {
-    if (layout != Comm::LAYOUT_TILED) {
+    if (comm->layout != Comm::LAYOUT_TILED) {
       if (comm->myloc[2] == comm->procgrid[2]-1) inzhi = outzhi = nz - 1;
     } else {
       if (comm->mysplit[2][1] == 1.0) inzhi = outzhi = nz - 1;
@@ -613,15 +613,13 @@ void Grid3d::ghost_grid()
 
 void Grid3d::extract_comm_info()
 {
-  layout = comm->layout;
-
   // for non TILED layout:
   // proc xyz lohi = my 6 neighbor procs in this MPI_Comm
   //   these proc IDs can be overridden by caller using set_proc_neighs()
   // xyz split = copy of 1d vectors in Comm
   // grid2proc = copy of 3d array in Comm
 
-  if (layout != Comm::LAYOUT_TILED) {
+  if (comm->layout != Comm::LAYOUT_TILED) {
     procxlo = comm->procneigh[0][0];
     procxhi = comm->procneigh[0][1];
     procylo = comm->procneigh[1][0];
@@ -649,7 +647,7 @@ void Grid3d::extract_comm_info()
   // RCBinfo.cut = this proc's inlo in that dim
   // Allgather creates the tree of dims and cuts
 
-  if (layout == Comm::LAYOUT_TILED) {
+  if (comm->layout == Comm::LAYOUT_TILED) {
     rcbinfo = (RCBinfo *)
       memory->smalloc(nprocs*sizeof(RCBinfo),"grid3d:rcbinfo");
     RCBinfo rcbone;
@@ -680,7 +678,7 @@ void Grid3d::extract_comm_info()
 
 void Grid3d::setup_comm(int &nbuf1, int &nbuf2)
 {
-  if (layout != Comm::LAYOUT_TILED) setup_comm_brick(nbuf1,nbuf2);
+  if (comm->layout != Comm::LAYOUT_TILED) setup_comm_brick(nbuf1,nbuf2);
   else setup_comm_tiled(nbuf1,nbuf2);
 }
 
@@ -1207,7 +1205,7 @@ void Grid3d::setup_comm_tiled(int &nbuf1, int &nbuf2)
 
 int Grid3d::ghost_adjacent()
 {
-  if (layout != Comm::LAYOUT_TILED) return ghost_adjacent_brick();
+  if (comm->layout != Comm::LAYOUT_TILED) return ghost_adjacent_brick();
   return ghost_adjacent_tiled();
 }
 
@@ -1255,7 +1253,7 @@ int Grid3d::ghost_adjacent_tiled()
 void Grid3d::forward_comm(int caller, void *ptr, int which, int nper, int nbyte,
                             void *buf1, void *buf2, MPI_Datatype datatype)
 {
-  if (layout != Comm::LAYOUT_TILED) {
+  if (comm->layout != Comm::LAYOUT_TILED) {
     if (caller == KSPACE)
       forward_comm_brick<KSpace>((KSpace *) ptr,which,nper,nbyte,
                                  buf1,buf2,datatype);
@@ -1360,7 +1358,7 @@ forward_comm_tiled(T *ptr, int which, int nper, int nbyte,
 void Grid3d::reverse_comm(int caller, void *ptr, int which, int nper, int nbyte,
                             void *buf1, void *buf2, MPI_Datatype datatype)
 {
-  if (layout != Comm::LAYOUT_TILED) {
+  if (comm->layout != Comm::LAYOUT_TILED) {
     if (caller == KSPACE)
       reverse_comm_brick<KSpace>((KSpace *) ptr,which,nper,nbyte,
                                 buf1,buf2,datatype);
@@ -1484,10 +1482,6 @@ void Grid3d::setup_remap(Grid3d *old, int &nremap_buf1, int &nremap_buf2)
 
   deallocate_remap();
 
-  // set layout to current Comm layout
-
-  layout = comm->layout;
-
   // overlaps of my old decomp owned box with all owned boxes in new decomp
   // noverlap_old = # of overlaps, including self
   // overlap_old = vector of overlap info in Overlap data struct
@@ -1829,7 +1823,7 @@ int Grid3d::compute_overlap(int ghostflag, int *box, int *pbc, Overlap *&overlap
     return noverlap_list;
   }
 
-  if (layout != Comm::LAYOUT_TILED) {
+  if (comm->layout != Comm::LAYOUT_TILED) {
 
     // find comm->procgrid indices in each dim for box bounds
 
diff --git a/src/grid3d.h b/src/grid3d.h
index e4a8e276f5..6a15c2c942 100644
--- a/src/grid3d.h
+++ b/src/grid3d.h
@@ -57,7 +57,6 @@ class Grid3d : protected Pointers {
 
  protected:
   int me, nprocs;
-  int layout;           // not TILED or TILED, same as Comm class
   MPI_Comm gridcomm;    // communicator for this class
                         // usually world, but MSM calls with subset
 
diff --git a/src/group.cpp b/src/group.cpp
index f841f9d95a..a586c33ed9 100644
--- a/src/group.cpp
+++ b/src/group.cpp
@@ -46,7 +46,7 @@ static constexpr double EPSILON = 1.0e-6;
 enum{NONE,TYPE,MOLECULE,ID};
 enum{LT,LE,GT,GE,EQ,NEQ,BETWEEN};
 
-#define BIG 1.0e20
+static constexpr double BIG = 1.0e20;
 
 /* ----------------------------------------------------------------------
    initialize group memory
diff --git a/src/image.cpp b/src/image.cpp
index 3133723b32..24f94f51f4 100644
--- a/src/image.cpp
+++ b/src/image.cpp
@@ -44,9 +44,9 @@ using MathConst::DEG2RAD;
 using MathConst::MY_PI;
 using MathConst::MY_PI4;
 
-#define NCOLORS 140
-#define NELEMENTS 109
-#define EPSILON 1.0e-6
+static constexpr int NCOLORS = 140;
+static constexpr int NELEMENTS = 109;
+static constexpr double EPSILON = 1.0e-6;
 
 enum{NUMERIC,MINVALUE,MAXVALUE};
 enum{CONTINUOUS,DISCRETE,SEQUENTIAL};
diff --git a/src/imbalance_neigh.cpp b/src/imbalance_neigh.cpp
index 40b7f6b249..8d4a17976b 100644
--- a/src/imbalance_neigh.cpp
+++ b/src/imbalance_neigh.cpp
@@ -21,7 +21,7 @@
 
 using namespace LAMMPS_NS;
 
-#define BIG 1.0e20
+static constexpr double BIG = 1.0e20;
 
 /* -------------------------------------------------------------------- */
 
diff --git a/src/imbalance_time.cpp b/src/imbalance_time.cpp
index 4e16ae3e1f..cf2153011b 100644
--- a/src/imbalance_time.cpp
+++ b/src/imbalance_time.cpp
@@ -19,7 +19,7 @@
 
 using namespace LAMMPS_NS;
 
-#define BIG 1.0e20
+static constexpr double BIG = 1.0e20;
 
 /* -------------------------------------------------------------------- */
 
diff --git a/src/improper_hybrid.cpp b/src/improper_hybrid.cpp
index 9aa20d236e..505488cce6 100644
--- a/src/improper_hybrid.cpp
+++ b/src/improper_hybrid.cpp
@@ -24,7 +24,7 @@
 
 using namespace LAMMPS_NS;
 
-#define EXTRA 1000
+static constexpr int EXTRA = 1000;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/input.cpp b/src/input.cpp
index 4d5ef8bee1..c9a3cf5f52 100644
--- a/src/input.cpp
+++ b/src/input.cpp
@@ -54,8 +54,8 @@
 
 using namespace LAMMPS_NS;
 
-#define DELTALINE 256
-#define DELTA 4
+static constexpr int DELTALINE = 256;
+static constexpr int DELTA = 4;
 
 // maximum nesting level of input files
 static constexpr int LMP_MAXFILE = 16;
diff --git a/src/integrate.cpp b/src/integrate.cpp
index 98ea7a09f9..08300a8d77 100644
--- a/src/integrate.cpp
+++ b/src/integrate.cpp
@@ -29,24 +29,11 @@ using namespace LAMMPS_NS;
 
 Integrate::Integrate(LAMMPS *lmp, int /*narg*/, char ** /*arg*/) : Pointers(lmp)
 {
-  elist_global = elist_atom = nullptr;
-  vlist_global = vlist_atom = cvlist_atom = nullptr;
   external_force_clear = 0;
 }
 
 /* ---------------------------------------------------------------------- */
 
-Integrate::~Integrate()
-{
-  delete [] elist_global;
-  delete [] elist_atom;
-  delete [] vlist_global;
-  delete [] vlist_atom;
-  delete [] cvlist_atom;
-}
-
-/* ---------------------------------------------------------------------- */
-
 void Integrate::init()
 {
   if (lmp->citeme) lmp->citeme->flush();
@@ -74,43 +61,18 @@ void Integrate::init()
 
 void Integrate::ev_setup()
 {
-  delete [] elist_global;
-  delete [] elist_atom;
-  delete [] vlist_global;
-  delete [] vlist_atom;
-  delete [] cvlist_atom;
-  elist_global = elist_atom = nullptr;
-  vlist_global = vlist_atom = cvlist_atom = nullptr;
+  elist_global.clear();
+  elist_atom.clear();
+  vlist_global.clear();
+  vlist_atom.clear();
+  cvlist_atom.clear();
 
-  nelist_global = nelist_atom = 0;
-  nvlist_global = nvlist_atom = ncvlist_atom = 0;
-  for (int i = 0; i < modify->ncompute; i++) {
-    if (modify->compute[i]->peflag) nelist_global++;
-    if (modify->compute[i]->peatomflag) nelist_atom++;
-    if (modify->compute[i]->pressflag) nvlist_global++;
-    if (modify->compute[i]->pressatomflag & 1) nvlist_atom++;
-    if (modify->compute[i]->pressatomflag & 2) ncvlist_atom++;
-  }
-
-  if (nelist_global) elist_global = new Compute*[nelist_global];
-  if (nelist_atom) elist_atom = new Compute*[nelist_atom];
-  if (nvlist_global) vlist_global = new Compute*[nvlist_global];
-  if (nvlist_atom) vlist_atom = new Compute*[nvlist_atom];
-  if (ncvlist_atom) cvlist_atom = new Compute*[ncvlist_atom];
-
-  nelist_global = nelist_atom = 0;
-  nvlist_global = nvlist_atom = ncvlist_atom = 0;
-  for (int i = 0; i < modify->ncompute; i++) {
-    if (modify->compute[i]->peflag)
-      elist_global[nelist_global++] = modify->compute[i];
-    if (modify->compute[i]->peatomflag)
-      elist_atom[nelist_atom++] = modify->compute[i];
-    if (modify->compute[i]->pressflag)
-      vlist_global[nvlist_global++] = modify->compute[i];
-    if (modify->compute[i]->pressatomflag & 1)
-      vlist_atom[nvlist_atom++] = modify->compute[i];
-    if (modify->compute[i]->pressatomflag & 2)
-      cvlist_atom[ncvlist_atom++] = modify->compute[i];
+  for (const auto &icompute : modify->get_compute_list()) {
+    if (icompute->peflag) elist_global.push_back(icompute);
+    if (icompute->peatomflag) elist_atom.push_back(icompute);
+    if (icompute->pressflag) vlist_global.push_back(icompute);
+    if (icompute->pressatomflag & 1) vlist_atom.push_back(icompute);
+    if (icompute->pressatomflag & 2) cvlist_atom.push_back(icompute);
   }
 }
 
@@ -137,7 +99,7 @@ void Integrate::ev_setup()
 
 void Integrate::ev_set(bigint ntimestep)
 {
-  int i,flag;
+  int flag;
 
   int tdflag = 0;
   if (output->any_time_dumps &&
@@ -145,15 +107,15 @@ void Integrate::ev_set(bigint ntimestep)
 
   flag = 0;
   int eflag_global = 0;
-  for (i = 0; i < nelist_global; i++)
-    if (elist_global[i]->matchstep(ntimestep)) flag = 1;
+  for (auto &icompute : elist_global)
+    if (icompute->matchstep(ntimestep)) flag = 1;
   if (flag) eflag_global = ENERGY_GLOBAL;
 
   flag = 0;
   int eflag_atom = 0;
-  for (i = 0; i < nelist_atom; i++)
-    if (elist_atom[i]->matchstep(ntimestep)) flag = 1;
-  if (flag || (tdflag && nelist_atom)) eflag_atom = ENERGY_ATOM;
+  for (auto &icompute : elist_atom)
+    if (icompute->matchstep(ntimestep)) flag = 1;
+  if (flag || (tdflag && (elist_atom.size() > 0))) eflag_atom = ENERGY_ATOM;
 
   if (eflag_global) update->eflag_global = ntimestep;
   if (eflag_atom) update->eflag_atom = ntimestep;
@@ -161,21 +123,21 @@ void Integrate::ev_set(bigint ntimestep)
 
   flag = 0;
   int vflag_global = 0;
-  for (i = 0; i < nvlist_global; i++)
-    if (vlist_global[i]->matchstep(ntimestep)) flag = 1;
+  for (auto &icompute : vlist_global)
+    if (icompute->matchstep(ntimestep)) flag = 1;
   if (flag) vflag_global = virial_style;
 
   flag = 0;
   int vflag_atom = 0;
-  for (i = 0; i < nvlist_atom; i++)
-    if (vlist_atom[i]->matchstep(ntimestep)) flag = 1;
-  if (flag || (tdflag && nvlist_atom)) vflag_atom = VIRIAL_ATOM;
+  for (auto &icompute : vlist_atom)
+    if (icompute->matchstep(ntimestep)) flag = 1;
+  if (flag || (tdflag && (vlist_atom.size() > 0))) vflag_atom = VIRIAL_ATOM;
 
   flag = 0;
   int cvflag_atom = 0;
-  for (i = 0; i < ncvlist_atom; i++)
-    if (cvlist_atom[i]->matchstep(ntimestep)) flag = 1;
-  if (flag || (tdflag && ncvlist_atom)) cvflag_atom = VIRIAL_CENTROID;
+  for (auto &icompute : cvlist_atom)
+    if (icompute->matchstep(ntimestep)) flag = 1;
+  if (flag || (tdflag && (cvlist_atom.size() > 0))) cvflag_atom = VIRIAL_CENTROID;
 
   if (vflag_global) update->vflag_global = ntimestep;
   if (vflag_atom || cvflag_atom) update->vflag_atom = ntimestep;
diff --git a/src/integrate.h b/src/integrate.h
index 85d4bec65b..e622f6328d 100644
--- a/src/integrate.h
+++ b/src/integrate.h
@@ -15,13 +15,13 @@
 #define LMP_INTEGRATE_H
 
 #include "pointers.h"
+#include "compute.h"
 
 namespace LAMMPS_NS {
 
 class Integrate : protected Pointers {
  public:
   Integrate(class LAMMPS *, int, char **);
-  ~Integrate() override;
   virtual void init();
   virtual void setup(int flag) = 0;
   virtual void setup_minimal(int) = 0;
@@ -36,13 +36,8 @@ class Integrate : protected Pointers {
   int virial_style;            // compute virial explicitly or implicitly
   int external_force_clear;    // clear forces locally or externally
 
-  int nelist_global, nelist_atom;    // # of PE,virial computes to check
-  int nvlist_global, nvlist_atom, ncvlist_atom;
-  class Compute **elist_global;    // lists of PE,virial Computes
-  class Compute **elist_atom;
-  class Compute **vlist_global;
-  class Compute **vlist_atom;
-  class Compute **cvlist_atom;
+  // lists of PE,virial Computes
+  std::vector<Compute *> elist_global, elist_atom, vlist_global, vlist_atom, cvlist_atom;
 
   int pair_compute_flag;      // 0 if pair->compute is skipped
   int kspace_compute_flag;    // 0 if kspace->compute is skipped
diff --git a/src/irregular.cpp b/src/irregular.cpp
index b571c35445..0c1ecc21a9 100644
--- a/src/irregular.cpp
+++ b/src/irregular.cpp
@@ -36,9 +36,9 @@ static int compare_standalone(const void *, const void *);
 static int compare_standalone(const int, const int, void *);
 #endif
 
-#define BUFFACTOR 1.5
-#define BUFMIN 1024
-#define BUFEXTRA 1024
+static constexpr double BUFFACTOR = 1.5;
+static constexpr int BUFMIN = 1024;
+static constexpr int BUFEXTRA = 1024;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/kspace.cpp b/src/kspace.cpp
index 583df478e7..7d51c46ea8 100644
--- a/src/kspace.cpp
+++ b/src/kspace.cpp
@@ -29,7 +29,7 @@
 
 using namespace LAMMPS_NS;
 
-#define SMALL 0.00001
+static constexpr double SMALL = 0.00001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/lammps.cpp b/src/lammps.cpp
index f92a76e6dd..b3d2171152 100644
--- a/src/lammps.cpp
+++ b/src/lammps.cpp
@@ -276,6 +276,7 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator) :
         error->universe_all(FLERR,"Invalid command-line argument");
       helpflag = 1;
       citeflag = 0;
+      inflag = -1;              // skip inflag check
       iarg += 1;
 
     } else if (strcmp(arg[iarg],"-in") == 0 ||
@@ -385,6 +386,7 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator) :
                             "Cannot use both -restart2data and -restart2dump");
       restart2data = 1;
       restartfile = arg[iarg+1];
+      inflag = -1;               // skip inflag check
       // check for restart remap flag
       if (strcmp(arg[iarg+2],"remap") == 0) {
         if (iarg+4 > narg)
@@ -407,6 +409,7 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator) :
                             "Cannot use both -restart2data and -restart2dump");
       restart2dump = 1;
       restartfile = arg[iarg+1];
+      inflag = -1;               // skip inflag check
       // check for restart remap flag
       if (strcmp(arg[iarg+2],"remap") == 0) {
         if (iarg+4 > narg)
@@ -535,7 +538,7 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator) :
     world = universe->uworld;
 
     if (universe->me == 0) {
-      if (inflag == 0) infile = stdin;
+      if (inflag <= 0) infile = stdin;
       else if (strcmp(arg[inflag], "none") == 0) infile = stdin;
       else infile = fopen(arg[inflag],"r");
       if (infile == nullptr)
diff --git a/src/lattice.cpp b/src/lattice.cpp
index edb482cfac..fa50a9ea94 100644
--- a/src/lattice.cpp
+++ b/src/lattice.cpp
@@ -25,7 +25,7 @@
 
 using namespace LAMMPS_NS;
 
-#define BIG 1.0e30
+static constexpr double BIG = 1.0e30;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/library.cpp b/src/library.cpp
index 1acdfc4787..bdf315acac 100644
--- a/src/library.cpp
+++ b/src/library.cpp
@@ -709,7 +709,7 @@ void lammps_commands_string(void *handle, const char *str)
           break;
         }
 
-        lmp->input->one(cmd.c_str());
+        lmp->input->one(cmd);
       }
     }
   }
@@ -1265,8 +1265,6 @@ internally by the :doc:`Fortran interface <Fortran>` and are not likely to be us
      - 1 if the atom style includes per-atom masses, 0 if there are per-type masses. See :doc:`atom_style`.
    * - radius_flag
      - 1 if the atom style includes a per-atom radius. See :doc:`atom_style`.
-   * - sphere_flag
-     - 1 if the atom style describes extended particles that can rotate. See :doc:`atom_style`.
    * - ellipsoid_flag
      - 1 if the atom style describes extended particles that may be ellipsoidal. See :doc:`atom_style`.
    * - omega_flag
@@ -1333,7 +1331,7 @@ int lammps_extract_setting(void *handle, const char *keyword)
   if (strcmp(keyword,"mu_flag") == 0) return lmp->atom->mu_flag;
   if (strcmp(keyword,"rmass_flag") == 0) return lmp->atom->rmass_flag;
   if (strcmp(keyword,"radius_flag") == 0) return lmp->atom->radius_flag;
-  if (strcmp(keyword,"sphere_flag") == 0) return lmp->atom->sphere_flag;
+
   if (strcmp(keyword,"ellipsoid_flag") == 0) return lmp->atom->ellipsoid_flag;
   if (strcmp(keyword,"omega_flag") == 0) return lmp->atom->omega_flag;
   if (strcmp(keyword,"torque_flag") == 0) return lmp->atom->torque_flag;
@@ -2451,19 +2449,69 @@ int lammps_extract_variable_datatype(void *handle, const char *name)
 }
 
 /* ---------------------------------------------------------------------- */
+// for printing obsolete function call warning only once
+static int set_variable_deprecated_flag = 1;
 
 /** Set the value of a string-style variable.
- *
- * This function assigns a new value from the string str to the
- * string-style variable name. Returns -1 if a variable of that
- * name does not exist or is not a string-style variable, otherwise 0.
- *
+\verbatim embed:rst
+
+.. deprecated:: TBD
+
+This function assigns a new value from the string str to the
+string-style variable *name*.  This is a way to directly change the
+string value of a LAMMPS variable that was previous defined with a
+:doc:`variable name string <variable>` command without using any
+LAMMPS commands to delete and redefine the variable.
+
+Returns -1 if a variable of that name does not exist or if it is not
+a string-style variable, otherwise 0.
+
+.. warning::
+
+   This function is deprecated and :cpp:func:`lammps_set_string_variable`
+   should be used instead.
+
+   \endverbatim
+
+* \param  handle  pointer to a previously created LAMMPS instance
+ * \param  name    name of the variable
+ * \param  str     new value of the variable
+ * \return         0 on success or -1 on failure */
+
+int lammps_set_variable(void *handle, const char *name, const char *str)
+{
+  if (set_variable_deprecated_flag) {
+    fprintf(stderr,"Using the 'lammps_set_variable()' function is deprecated. "
+            "Please use 'lammps_set_string_variable()' instead.\n");
+    set_variable_deprecated_flag = 0;
+  }
+  return lammps_set_string_variable(handle, name, str);
+}
+
+/* ---------------------------------------------------------------------- */
+
+/** Set the value of a string-style variable.
+\verbatim embed:rst
+
+.. versionadded:: TBD
+
+This function assigns a new value from the string str to the
+string-style variable *name*.  This is a way to directly change the
+string value of a LAMMPS variable that was previous defined with a
+:doc:`variable name string <variable>` command without using any
+LAMMPS commands to delete and redefine the variable.
+
+Returns -1 if a variable of that name does not exist or if it is not
+a string-style variable, otherwise 0.
+
+\endverbatim
+
  * \param  handle  pointer to a previously created LAMMPS instance
  * \param  name    name of the variable
  * \param  str     new value of the variable
  * \return         0 on success or -1 on failure
  */
-int lammps_set_variable(void *handle, char *name, char *str)
+int lammps_set_string_variable(void *handle, const char *name, const char *str)
 {
   auto lmp = (LAMMPS *) handle;
   int err = -1;
@@ -2477,6 +2525,44 @@ int lammps_set_variable(void *handle, char *name, char *str)
   return err;
 }
 
+/* ---------------------------------------------------------------------- */
+
+/** Set the value of an internal-style variable.
+ *
+\verbatim embed:rst
+
+This function assigns a new value from the floating point number *value*
+to the internal-style variable *name*.  This is a way to directly change
+the numerical value of such a LAMMPS variable that was previous defined
+with a :doc:`variable name internal <variable>` command without using
+any LAMMPS commands to delete and redefine the variable.
+
+Returns -1 if a variable of that name does not exist or is not an
+internal-style variable, otherwise 0.
+
+\endverbatim
+
+ * \param  handle  pointer to a previously created LAMMPS instance
+ * \param  name    name of the variable
+ * \param  value   new value of the variable
+ * \return         0 on success or -1 on failure
+ */
+int lammps_set_internal_variable(void *handle, const char *name, double value)
+{
+  auto lmp = (LAMMPS *) handle;
+
+  BEGIN_CAPTURE
+  {
+    int ivar = lmp->input->variable->find(name);
+    if (ivar < 0) return -1;
+    if (lmp->input->variable->internalstyle(ivar)) {
+        lmp->input->variable->internal_set(ivar, value);
+        return 0;
+    }
+  }
+  END_CAPTURE
+  return -1;
+}
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/library.h b/src/library.h
index 7db86bd71d..10cac6741a 100644
--- a/src/library.h
+++ b/src/library.h
@@ -177,7 +177,9 @@ void *lammps_extract_compute(void *handle, const char *, int, int);
 void *lammps_extract_fix(void *handle, const char *, int, int, int, int);
 void *lammps_extract_variable(void *handle, const char *, const char *);
 int lammps_extract_variable_datatype(void *handle, const char *name);
-int lammps_set_variable(void *handle, char *name, char *str);
+int lammps_set_variable(void *handle, const char *name, const char *str);
+int lammps_set_string_variable(void *handle, const char *name, const char *str);
+int lammps_set_internal_variable(void *handle, const char *name, double value);
 int lammps_variable_info(void *handle, int idx, char *buf, int bufsize);
 
 /* ----------------------------------------------------------------------
diff --git a/src/min.cpp b/src/min.cpp
index acc7d17654..e64ff9cb29 100644
--- a/src/min.cpp
+++ b/src/min.cpp
@@ -74,9 +74,6 @@ Min::Min(LAMMPS *lmp) : Pointers(lmp)
   alpha_final = 0.0;
   abcflag = 0;
 
-  elist_global = elist_atom = nullptr;
-  vlist_global = vlist_atom = cvlist_atom = nullptr;
-
   nextra_global = 0;
   fextra = nullptr;
 
@@ -95,12 +92,6 @@ Min::Min(LAMMPS *lmp) : Pointers(lmp)
 
 Min::~Min()
 {
-  delete[] elist_global;
-  delete[] elist_atom;
-  delete[] vlist_global;
-  delete[] vlist_atom;
-  delete[] cvlist_atom;
-
   delete[] fextra;
 
   memory->sfree(xextra_atom);
@@ -757,43 +748,18 @@ void Min::modify_params(int narg, char **arg)
 
 void Min::ev_setup()
 {
-  delete[] elist_global;
-  delete[] elist_atom;
-  delete[] vlist_global;
-  delete[] vlist_atom;
-  delete[] cvlist_atom;
-  elist_global = elist_atom = nullptr;
-  vlist_global = vlist_atom = cvlist_atom = nullptr;
+  elist_global.clear();
+  elist_atom.clear();
+  vlist_global.clear();
+  vlist_atom.clear();
+  cvlist_atom.clear();
 
-  nelist_global = nelist_atom = 0;
-  nvlist_global = nvlist_atom = ncvlist_atom = 0;
-  for (int i = 0; i < modify->ncompute; i++) {
-    if (modify->compute[i]->peflag) nelist_global++;
-    if (modify->compute[i]->peatomflag) nelist_atom++;
-    if (modify->compute[i]->pressflag) nvlist_global++;
-    if (modify->compute[i]->pressatomflag & 1) nvlist_atom++;
-    if (modify->compute[i]->pressatomflag & 2) ncvlist_atom++;
-  }
-
-  if (nelist_global) elist_global = new Compute*[nelist_global];
-  if (nelist_atom) elist_atom = new Compute*[nelist_atom];
-  if (nvlist_global) vlist_global = new Compute*[nvlist_global];
-  if (nvlist_atom) vlist_atom = new Compute*[nvlist_atom];
-  if (ncvlist_atom) cvlist_atom = new Compute*[ncvlist_atom];
-
-  nelist_global = nelist_atom = 0;
-  nvlist_global = nvlist_atom = ncvlist_atom = 0;
-  for (int i = 0; i < modify->ncompute; i++) {
-    if (modify->compute[i]->peflag)
-      elist_global[nelist_global++] = modify->compute[i];
-    if (modify->compute[i]->peatomflag)
-      elist_atom[nelist_atom++] = modify->compute[i];
-    if (modify->compute[i]->pressflag)
-      vlist_global[nvlist_global++] = modify->compute[i];
-    if (modify->compute[i]->pressatomflag & 1)
-      vlist_atom[nvlist_atom++] = modify->compute[i];
-    if (modify->compute[i]->pressatomflag & 2)
-      cvlist_atom[ncvlist_atom++] = modify->compute[i];
+  for (const auto &icompute : modify->get_compute_list()) {
+    if (icompute->peflag) elist_global.push_back(icompute);
+    if (icompute->peatomflag) elist_atom.push_back(icompute);
+    if (icompute->pressflag) vlist_global.push_back(icompute);
+    if (icompute->pressatomflag & 1) vlist_atom.push_back(icompute);
+    if (icompute->pressatomflag & 2) cvlist_atom.push_back(icompute);
   }
 }
 
@@ -816,16 +782,15 @@ void Min::ev_setup()
 
 void Min::ev_set(bigint ntimestep)
 {
-  int i,flag;
+  int flag;
 
   int eflag_global = 1;
-  for (i = 0; i < nelist_global; i++)
-    elist_global[i]->matchstep(ntimestep);
+  for (auto &icompute : elist_global) icompute->matchstep(ntimestep);
 
   flag = 0;
   int eflag_atom = 0;
-  for (i = 0; i < nelist_atom; i++)
-    if (elist_atom[i]->matchstep(ntimestep)) flag = 1;
+  for (auto &icompute : elist_atom)
+    if (icompute->matchstep(ntimestep)) flag = 1;
   if (flag) eflag_atom = ENERGY_ATOM;
 
   if (eflag_global) update->eflag_global = update->ntimestep;
@@ -834,20 +799,20 @@ void Min::ev_set(bigint ntimestep)
 
   flag = 0;
   int vflag_global = 0;
-  for (i = 0; i < nvlist_global; i++)
-    if (vlist_global[i]->matchstep(ntimestep)) flag = 1;
+  for (auto &icompute : vlist_global)
+    if (icompute->matchstep(ntimestep)) flag = 1;
   if (flag) vflag_global = virial_style;
 
   flag = 0;
   int vflag_atom = 0;
-  for (i = 0; i < nvlist_atom; i++)
-    if (vlist_atom[i]->matchstep(ntimestep)) flag = 1;
+  for (auto &icompute : vlist_atom)
+    if (icompute->matchstep(ntimestep)) flag = 1;
   if (flag) vflag_atom = VIRIAL_ATOM;
 
   flag = 0;
   int cvflag_atom = 0;
-  for (i = 0; i < ncvlist_atom; i++)
-    if (cvlist_atom[i]->matchstep(ntimestep)) flag = 1;
+  for (auto &icompute : cvlist_atom)
+    if (icompute->matchstep(ntimestep)) flag = 1;
   if (flag) cvflag_atom = VIRIAL_CENTROID;
 
   if (vflag_global) update->vflag_global = update->ntimestep;
diff --git a/src/min.h b/src/min.h
index 16629db69b..b94d937fa5 100644
--- a/src/min.h
+++ b/src/min.h
@@ -15,6 +15,7 @@
 #define LMP_MIN_H
 
 #include "pointers.h"    // IWYU pragma: export
+#include "compute.h"
 
 namespace LAMMPS_NS {
 
@@ -102,13 +103,8 @@ class Min : protected Pointers {
   int max_vdotf_negatif;         // maximum iteration with v.f > 0.0
   int abcflag;                   // when 1 use ABC-FIRE variant instead of FIRE, default 0
 
-  int nelist_global, nelist_atom;    // # of PE,virial computes to check
-  int nvlist_global, nvlist_atom, ncvlist_atom;
-  class Compute **elist_global;    // lists of PE,virial Computes
-  class Compute **elist_atom;
-  class Compute **vlist_global;
-  class Compute **vlist_atom;
-  class Compute **cvlist_atom;
+  // lists of PE,virial Computes
+  std::vector<Compute *> elist_global, elist_atom, vlist_global, vlist_atom, cvlist_atom;
 
   int triclinic;    // 0 if domain is orthog, 1 if triclinic
   int pairflag;
diff --git a/src/min_linesearch.cpp b/src/min_linesearch.cpp
index 24ba4c5c23..97dcca9d8a 100644
--- a/src/min_linesearch.cpp
+++ b/src/min_linesearch.cpp
@@ -47,7 +47,7 @@ using namespace LAMMPS_NS;
 #define BACKTRACK_SLOPE 0.4
 #define QUADRATIC_TOL 0.1
 //#define EMACH 1.0e-8
-#define EMACH 1.0e-8
+static constexpr double EMACH = 1.0e-8;
 #define EPS_QUAD 1.0e-28
 
 /* ---------------------------------------------------------------------- */
@@ -329,7 +329,7 @@ int MinLineSearch::linemin_quadratic(double eoriginal, double &alpha)
   double fdothall,fdothme,hme,hmax,hmaxall;
   double de_ideal,de;
   double delfh,engprev,relerr,alphaprev,fhprev,fh,alpha0;
-  double dot[2],dotall[2];
+  double dot,dotall;
   double *xatom,*x0atom,*fatom,*hatom;
   double alphamax;
 
@@ -417,10 +417,9 @@ int MinLineSearch::linemin_quadratic(double eoriginal, double &alpha)
 
     // compute new fh, alpha, delfh
 
-    dot[0] = dot[1] = 0.0;
+    dot = 0.0;
     for (i = 0; i < nvec; i++) {
-      dot[0] += fvec[i]*fvec[i];
-      dot[1] += fvec[i]*h[i];
+      dot += fvec[i]*h[i];
     }
     if (nextra_atom)
       for (m = 0; m < nextra_atom; m++) {
@@ -428,18 +427,16 @@ int MinLineSearch::linemin_quadratic(double eoriginal, double &alpha)
         hatom = hextra_atom[m];
         n = extra_nlen[m];
         for (i = 0; i < n; i++) {
-          dot[0] += fatom[i]*fatom[i];
-          dot[1] += fatom[i]*hatom[i];
+          dot += fatom[i]*hatom[i];
         }
       }
-    MPI_Allreduce(dot,dotall,2,MPI_DOUBLE,MPI_SUM,world);
+    MPI_Allreduce(&dot,&dotall,1,MPI_DOUBLE,MPI_SUM,world);
     if (nextra_global) {
       for (i = 0; i < nextra_global; i++) {
-        dotall[0] += fextra[i]*fextra[i];
-        dotall[1] += fextra[i]*hextra[i];
+        dotall += fextra[i]*hextra[i];
       }
     }
-    fh = dotall[1];
+    fh = dotall;
     if (output->thermo->normflag) fh /= atom->natoms;
 
     delfh = fh - fhprev;
diff --git a/src/min_quickmin.cpp b/src/min_quickmin.cpp
index 99d95be57d..c3730f2cd7 100644
--- a/src/min_quickmin.cpp
+++ b/src/min_quickmin.cpp
@@ -30,7 +30,7 @@ using namespace LAMMPS_NS;
 
 #define EPS_ENERGY 1.0e-8
 
-#define DELAYSTEP 5
+static constexpr int DELAYSTEP = 5;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/modify.cpp b/src/modify.cpp
index 427c4e259b..ba04c5969a 100644
--- a/src/modify.cpp
+++ b/src/modify.cpp
@@ -33,8 +33,8 @@
 using namespace LAMMPS_NS;
 using namespace FixConst;
 
-#define DELTA 4
-#define BIG 1.0e20
+static constexpr int DELTA = 4;
+static constexpr double BIG = 1.0e20;
 
 // template for factory function:
 // there will be one instance for each style keyword in the respective style_xxx.h files
diff --git a/src/modify.h b/src/modify.h
index 6ca4b4ad26..26e056d507 100644
--- a/src/modify.h
+++ b/src/modify.h
@@ -115,9 +115,17 @@ class Modify : protected Pointers {
   int find_fix(const std::string &);
   // new API
   Fix *get_fix_by_id(const std::string &) const;
-  Fix *get_fix_by_index(int idx) const { return ((idx >= 0) && (idx < nfix)) ? fix[idx] : nullptr; }
+  Fix *get_fix_by_index(int idx) const {
+    return ((idx >= 0) && (idx < nfix)) ? fix[idx] : nullptr;
+  }
   const std::vector<Fix *> get_fix_by_style(const std::string &) const;
   const std::vector<Fix *> &get_fix_list();
+  int get_fix_mask(Fix *ifix) const {
+    for (int i = 0; i < nfix; ++i) {
+      if (fix[i] == ifix) return fmask[i];
+    }
+    return 0;
+  }
 
   Compute *add_compute(int, char **, int trysuffix = 1);
   Compute *add_compute(const std::string &, int trysuffix = 1);
diff --git a/src/molecule.cpp b/src/molecule.cpp
index 6e2d3891d3..d0c8e793b3 100644
--- a/src/molecule.cpp
+++ b/src/molecule.cpp
@@ -31,25 +31,26 @@
 
 using namespace LAMMPS_NS;
 
-#define MAXLINE 256
-#define EPSILON 1.0e-7
-#define BIG 1.0e20
+static constexpr int MAXLINE = 1024;
+static constexpr double EPSILON = 1.0e-7;
+static constexpr double BIG = 1.0e20;
 
-#define SINERTIA 0.4    // moment of inertia prefactor for sphere
+static constexpr double SINERTIA = 0.4;    // moment of inertia prefactor for sphere
 
 /* ---------------------------------------------------------------------- */
 
 Molecule::Molecule(LAMMPS *lmp, int narg, char **arg, int &index) :
     Pointers(lmp), id(nullptr), x(nullptr), type(nullptr), molecule(nullptr), q(nullptr),
-    radius(nullptr), rmass(nullptr), num_bond(nullptr), bond_type(nullptr), bond_atom(nullptr),
-    num_angle(nullptr), angle_type(nullptr), angle_atom1(nullptr), angle_atom2(nullptr),
-    angle_atom3(nullptr), num_dihedral(nullptr), dihedral_type(nullptr), dihedral_atom1(nullptr),
-    dihedral_atom2(nullptr), dihedral_atom3(nullptr), dihedral_atom4(nullptr),
-    num_improper(nullptr), improper_type(nullptr), improper_atom1(nullptr), improper_atom2(nullptr),
-    improper_atom3(nullptr), improper_atom4(nullptr), nspecial(nullptr), special(nullptr),
-    shake_flag(nullptr), shake_atom(nullptr), shake_type(nullptr), avec_body(nullptr),
-    ibodyparams(nullptr), dbodyparams(nullptr), fragmentmask(nullptr), dx(nullptr), dxcom(nullptr),
-    dxbody(nullptr), quat_external(nullptr), fp(nullptr), count(nullptr)
+    radius(nullptr), rmass(nullptr), mu(nullptr), num_bond(nullptr), bond_type(nullptr),
+    bond_atom(nullptr), num_angle(nullptr), angle_type(nullptr), angle_atom1(nullptr),
+    angle_atom2(nullptr), angle_atom3(nullptr), num_dihedral(nullptr), dihedral_type(nullptr),
+    dihedral_atom1(nullptr), dihedral_atom2(nullptr), dihedral_atom3(nullptr),
+    dihedral_atom4(nullptr), num_improper(nullptr), improper_type(nullptr), improper_atom1(nullptr),
+    improper_atom2(nullptr), improper_atom3(nullptr), improper_atom4(nullptr), nspecial(nullptr),
+    special(nullptr), shake_flag(nullptr), shake_atom(nullptr), shake_type(nullptr),
+    avec_body(nullptr), ibodyparams(nullptr), dbodyparams(nullptr), fragmentmask(nullptr),
+    dx(nullptr), dxcom(nullptr), dxbody(nullptr), quat_external(nullptr), fp(nullptr),
+    count(nullptr)
 {
   me = comm->me;
 
@@ -132,7 +133,7 @@ Molecule::Molecule(LAMMPS *lmp, int narg, char **arg, int &index) :
 
   // initialize all fields to empty
 
-  initialize();
+  Molecule::initialize();
 
   // scan file for sizes of all fields and allocate storage for them
 
@@ -141,28 +142,30 @@ Molecule::Molecule(LAMMPS *lmp, int narg, char **arg, int &index) :
     if (fp == nullptr)
       error->one(FLERR, "Cannot open molecule file {}: {}", arg[ifile], utils::getsyserror());
   }
-  read(0);
+  Molecule::read(0);
   if (me == 0) fclose(fp);
-  allocate();
+  Molecule::allocate();
 
   // read file again to populate all fields
 
   if (me == 0) fp = fopen(arg[ifile], "r");
-  read(1);
+  Molecule::read(1);
   if (me == 0) fclose(fp);
 
   // stats
 
+  if (title.empty()) title = "(no title)";
   if (me == 0)
     utils::logmesg(lmp,
-                   "Read molecule template {}:\n  {} molecules\n"
+                   "Read molecule template {}:\n{}\n"
+                   "  {} molecules\n"
                    "  {} fragments\n"
                    "  {} atoms with max type {}\n"
                    "  {} bonds with max type {}\n"
                    "  {} angles with max type {}\n"
                    "  {} dihedrals with max type {}\n"
                    "  {} impropers with max type {}\n",
-                   id, nmolecules, nfragments, natoms, ntypes, nbonds, nbondtypes, nangles,
+                   id, title, nmolecules, nfragments, natoms, ntypes, nbonds, nbondtypes, nangles,
                    nangletypes, ndihedrals, ndihedraltypes, nimpropers, nimpropertypes);
 }
 
@@ -413,7 +416,7 @@ void Molecule::compute_inertia()
 
 void Molecule::read(int flag)
 {
-  char line[MAXLINE];
+  char line[MAXLINE] = {'\0'};
   char *eof;
 
   // skip 1st line of file
@@ -423,6 +426,8 @@ void Molecule::read(int flag)
     if (eof == nullptr) error->one(FLERR, "Unexpected end of molecule file");
   }
 
+  if (flag == 0) title = utils::trim(line);
+
   // read header lines
   // skip blank lines or lines that start with "#"
   // stop when read an unrecognized line
@@ -572,6 +577,12 @@ void Molecule::read(int flag)
         diameters(line);
       else
         skip_lines(natoms, line, keyword);
+    } else if (keyword == "Dipoles") {
+      muflag = 1;
+      if (flag)
+        dipoles(line);
+      else
+        skip_lines(natoms, line, keyword);
     } else if (keyword == "Masses") {
       rmassflag = 1;
       if (flag)
@@ -948,6 +959,40 @@ void Molecule::diameters(char *line)
   }
 }
 
+/* ----------------------------------------------------------------------
+   read charges from file
+------------------------------------------------------------------------- */
+
+void Molecule::dipoles(char *line)
+{
+  for (int i = 0; i < natoms; i++) count[i] = 0;
+  try {
+    for (int i = 0; i < natoms; i++) {
+      readline(line);
+
+      ValueTokenizer values(utils::trim_comment(line));
+      if ((int) values.count() != 4)
+        error->all(FLERR, "Invalid line in Dipoles section of molecule file: {}", line);
+
+      int iatom = values.next_int() - 1;
+      if (iatom < 0 || iatom >= natoms)
+        error->all(FLERR, "Invalid atom index in Dipoles section of molecule file");
+
+      count[iatom]++;
+      mu[iatom][0] = values.next_double();
+      mu[iatom][1] = values.next_double();
+      mu[iatom][2] = values.next_double();
+    }
+  } catch (TokenizerException &e) {
+    error->all(FLERR, "Invalid line in Dipoles section of molecule file: {}\n{}", e.what(), line);
+  }
+
+  for (int i = 0; i < natoms; i++) {
+    if (count[i] == 0)
+      error->all(FLERR, "Atom {} missing in Dipoles section of molecule file", i + 1);
+  }
+}
+
 /* ----------------------------------------------------------------------
    read masses from file
 ------------------------------------------------------------------------- */
@@ -1828,6 +1873,7 @@ void Molecule::check_attributes()
 
   int mismatch = 0;
   if (qflag && !atom->q_flag) mismatch = 1;
+  if (muflag && !atom->mu_flag) mismatch = 1;
   if (radiusflag && !atom->radius_flag) mismatch = 1;
   if (rmassflag && !atom->rmass_flag) mismatch = 1;
 
@@ -1869,6 +1915,7 @@ void Molecule::check_attributes()
 
 void Molecule::initialize()
 {
+  title.clear();
   natoms = 0;
   nbonds = nangles = ndihedrals = nimpropers = 0;
   ntypes = 0;
@@ -1880,7 +1927,7 @@ void Molecule::initialize()
   bond_per_atom = angle_per_atom = dihedral_per_atom = improper_per_atom = 0;
   maxspecial = 0;
 
-  xflag = typeflag = moleculeflag = fragmentflag = qflag = radiusflag = rmassflag = 0;
+  xflag = typeflag = moleculeflag = fragmentflag = qflag = radiusflag = muflag = rmassflag = 0;
   bondflag = angleflag = dihedralflag = improperflag = 0;
   nspecialflag = specialflag = 0;
   shakeflag = shakeflagflag = shakeatomflag = shaketypeflag = 0;
@@ -1943,6 +1990,7 @@ void Molecule::allocate()
       for (int j = 0; j < natoms; j++) fragmentmask[i][j] = 0;
   }
   if (qflag) memory->create(q, natoms, "molecule:q");
+  if (muflag) memory->create(mu, natoms, 3, "molecule:mu");
   if (radiusflag) memory->create(radius, natoms, "molecule:radius");
   if (rmassflag) memory->create(rmass, natoms, "molecule:rmass");
 
@@ -2086,7 +2134,7 @@ void Molecule::readline(char *line)
 
 std::string Molecule::parse_keyword(int flag, char *line)
 {
-  char line2[MAXLINE];
+  char line2[MAXLINE] = {'\0'};
   if (flag) {
 
     // read upto non-blank line plus 1 following line
@@ -2167,6 +2215,11 @@ void Molecule::print()
     for (int i = 0; i < natoms; i++)
       printf("    %d %g\n",i+1,radius[i]);
   }
+  if (muflag) {
+    printf(  "Dipoles:\n");
+    for (int i = 0; i < natoms; i++)
+      printf("    %d %g %g %g\n",i+1,mu[i][0],mu[i][1],mu[i][2]);
+  }
   if (rmassflag) {
     printf(  "Masses:\n");
     for (int i = 0; i < natoms; i++)
diff --git a/src/molecule.h b/src/molecule.h
index 06a1211ea3..faba036aab 100644
--- a/src/molecule.h
+++ b/src/molecule.h
@@ -25,6 +25,8 @@ class Molecule : protected Pointers {
                // else 0 if not first in set
   int last;    // 1 if last molecule in set, else 0
 
+  std::string title;    // title string of the molecule file
+
   // number of atoms,bonds,etc in molecule
   // nibody,ndbody = # of integer/double fields in body
 
@@ -41,7 +43,7 @@ class Molecule : protected Pointers {
 
   // 1 if attribute defined in file, 0 if not
 
-  int xflag, typeflag, moleculeflag, fragmentflag, qflag, radiusflag, rmassflag;
+  int xflag, typeflag, moleculeflag, fragmentflag, qflag, radiusflag, muflag, rmassflag;
   int bondflag, angleflag, dihedralflag, improperflag;
   int nspecialflag, specialflag;
   int shakeflag, shakeflagflag, shakeatomflag, shaketypeflag;
@@ -63,6 +65,7 @@ class Molecule : protected Pointers {
   double *q;           // charge on each atom
   double *radius;      // radius of each atom
   double *rmass;       // mass of each atom
+  double **mu;         // dipole vector of each atom
 
   int *num_bond;    // bonds, angles, dihedrals, impropers for each atom
   int **bond_type;
@@ -142,6 +145,7 @@ class Molecule : protected Pointers {
   void fragments(char *);
   void charges(char *);
   void diameters(char *);
+  void dipoles(char *);
   void masses(char *);
   void bonds(int, char *);
   void angles(int, char *);
diff --git a/src/nbin_multi.cpp b/src/nbin_multi.cpp
index 0a0f1557de..15c089bee7 100644
--- a/src/nbin_multi.cpp
+++ b/src/nbin_multi.cpp
@@ -26,7 +26,7 @@
 
 using namespace LAMMPS_NS;
 
-#define SMALL 1.0e-6
+static constexpr double SMALL = 1.0e-6;
 #define CUT2BIN_RATIO 100
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/nbin_standard.cpp b/src/nbin_standard.cpp
index 68cc860dbe..08ff537d71 100644
--- a/src/nbin_standard.cpp
+++ b/src/nbin_standard.cpp
@@ -24,7 +24,7 @@
 
 using namespace LAMMPS_NS;
 
-#define SMALL 1.0e-6
+static constexpr double SMALL = 1.0e-6;
 #define CUT2BIN_RATIO 100
 
 /* ---------------------------------------------------------------------- */
diff --git a/src/neigh_list.cpp b/src/neigh_list.cpp
index 2d91eebdef..4bdd58eead 100644
--- a/src/neigh_list.cpp
+++ b/src/neigh_list.cpp
@@ -23,7 +23,7 @@
 
 using namespace LAMMPS_NS;
 
-#define PGDELTA 1
+static constexpr int PGDELTA = 1;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/neighbor.cpp b/src/neighbor.cpp
index ad509dce7b..662494ea7b 100644
--- a/src/neighbor.cpp
+++ b/src/neighbor.cpp
@@ -57,11 +57,11 @@
 using namespace LAMMPS_NS;
 using namespace NeighConst;
 
-#define RQDELTA 1
-#define EXDELTA 1
-#define DELTA_PERATOM 64
+static constexpr int RQDELTA = 1;
+static constexpr int EXDELTA = 1;
+static constexpr int DELTA_PERATOM = 64;
 
-#define BIG 1.0e20
+static constexpr double BIG = 1.0e20;
 
 enum{NONE,ALL,PARTIAL,TEMPLATE};
 
@@ -501,7 +501,7 @@ void Neighbor::init()
   // fixchecklist = other classes that can induce reneighboring in decide()
 
   fixchecklist.clear();
-  for (auto &ifix : modify->get_fix_list()) {
+  for (const auto &ifix : modify->get_fix_list()) {
     if (ifix->force_reneighbor) {
       fixchecklist.push_back(ifix);
       must_check = 1;
@@ -1596,10 +1596,16 @@ void Neighbor::init_topology()
 
   int bond_off = 0;
   int angle_off = 0;
-  for (i = 0; i < modify->nfix; i++)
-    if (utils::strmatch(modify->fix[i]->style,"^shake")
-        || utils::strmatch(modify->fix[i]->style,"^rattle"))
+  int dihedral_off = 0;
+  int improper_off = 0;
+
+  for (const auto &ifix : modify->get_fix_list()) {
+    if (utils::strmatch(ifix->style,"^shake") || utils::strmatch(ifix->style,"^rattle"))
       bond_off = angle_off = 1;
+    if (utils::strmatch(ifix->style,"gcmc"))
+      bond_off = angle_off = dihedral_off = improper_off = 1;
+  }
+
   if (force->bond)
     if (force->bond->partial_flag)
       bond_off = 1;
@@ -1620,7 +1626,6 @@ void Neighbor::init_topology()
     }
   }
 
-  int dihedral_off = 0;
   if (atom->avec->dihedrals_allow && atom->molecular == Atom::MOLECULAR) {
     for (i = 0; i < atom->nlocal; i++) {
       if (dihedral_off) break;
@@ -1629,7 +1634,6 @@ void Neighbor::init_topology()
     }
   }
 
-  int improper_off = 0;
   if (atom->avec->impropers_allow && atom->molecular == Atom::MOLECULAR) {
     for (i = 0; i < atom->nlocal; i++) {
       if (improper_off) break;
@@ -1638,10 +1642,6 @@ void Neighbor::init_topology()
     }
   }
 
-  for (i = 0; i < modify->nfix; i++)
-    if ((strcmp(modify->fix[i]->style,"gcmc") == 0))
-      bond_off = angle_off = dihedral_off = improper_off = 1;
-
   // sync on/off settings across all procs
 
   int onoff = bond_off;
@@ -1791,16 +1791,17 @@ void Neighbor::print_pairwise_info()
         out += fmt::format(", trim from ({})",rq->copylist+1);
       else
         out += fmt::format(", copy from ({})",rq->copylist+1);
-    } else if (rq->halffull)
+    } else if (rq->halffull) {
       if (rq->trim)
         out += fmt::format(", half/full trim from ({})",rq->halffulllist+1);
       else
         out += fmt::format(", half/full from ({})",rq->halffulllist+1);
-    else if (rq->skip)
+    } else if (rq->skip) {
       if (rq->trim)
         out += fmt::format(", skip trim from ({})",rq->skiplist+1);
       else
         out += fmt::format(", skip from ({})",rq->skiplist+1);
+    }
     out += "\n";
 
     // list of neigh list attributes
@@ -2015,6 +2016,7 @@ int Neighbor::choose_stencil(NeighRequest *rq)
     // require match of these request flags and mask bits
     // (!A != !B) is effectively a logical xor
 
+    if (!rq->intel != !(mask & NS_INTEL)) continue;
     if (!rq->ghost != !(mask & NS_GHOST)) continue;
     if (!rq->ssa != !(mask & NS_SSA)) continue;
 
diff --git a/src/neighbor.h b/src/neighbor.h
index ed99ae0af1..4807e90393 100644
--- a/src/neighbor.h
+++ b/src/neighbor.h
@@ -301,8 +301,9 @@ namespace NeighConst {
     NS_ORTHO = 1 << 6,
     NS_TRI = 1 << 7,
     NS_GHOST = 1 << 8,
-    NS_SSA = 1 << 9,
-    NS_MULTI_OLD = 1 << 10
+    NS_INTEL = 1 << 9,
+    NS_SSA = 1 << 10,
+    NS_MULTI_OLD = 1 << 11
   };
 
   enum {
diff --git a/src/npair.cpp b/src/npair.cpp
index c1615411c0..9af7767912 100644
--- a/src/npair.cpp
+++ b/src/npair.cpp
@@ -174,6 +174,9 @@ void NPair::copy_stencil_info()
 
   nstencil_multi = ns->nstencil_multi;
   stencil_multi = ns->stencil_multi;
+
+  flag_half_multi = ns->flag_half_multi;
+  flag_same_multi = ns->flag_same_multi;
 }
 
 /* ----------------------------------------------------------------------
diff --git a/src/npair.h b/src/npair.h
index 3eeb1d48f4..4abc7aabef 100644
--- a/src/npair.h
+++ b/src/npair.h
@@ -96,6 +96,8 @@ class NPair : protected Pointers {
   int *nstencil_multi_old;
   int **stencil_multi_old;
   double **distsq_multi_old;
+  bool **flag_half_multi;
+  bool **flag_same_multi;
 
   int **nstencil_multi;
   int ***stencil_multi;
diff --git a/src/npair_bin.cpp b/src/npair_bin.cpp
new file mode 100644
index 0000000000..d3d3415bc0
--- /dev/null
+++ b/src/npair_bin.cpp
@@ -0,0 +1,270 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "npair_bin.h"
+
+#include "atom.h"
+#include "atom_vec.h"
+#include "domain.h"
+#include "error.h"
+#include "force.h"
+#include "molecule.h"
+#include "my_page.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+
+using namespace LAMMPS_NS;
+using namespace NeighConst;
+
+/* ---------------------------------------------------------------------- */
+
+template<int HALF, int NEWTON, int TRI, int SIZE, int ATOMONLY>
+NPairBin<HALF, NEWTON, TRI, SIZE, ATOMONLY>::NPairBin(LAMMPS *lmp) : NPair(lmp) {}
+
+/* ----------------------------------------------------------------------
+   Full:
+     binned neighbor list construction for all neighbors
+     every neighbor pair appears in list of both atoms i and j
+   Half + Newtoff:
+     binned neighbor list construction with partial Newton's 3rd law
+     each owned atom i checks own bin and other bins in stencil
+     pair stored once if i,j are both owned and i < j
+     pair stored by me if j is ghost (also stored by proc owning j)
+   Half + Newton:
+     binned neighbor list construction with full Newton's 3rd law
+     each owned atom i checks its own bin and other bins in Newton stencil
+     every pair stored exactly once by some processor
+------------------------------------------------------------------------- */
+
+template<int HALF, int NEWTON, int TRI, int SIZE, int ATOMONLY>
+void NPairBin<HALF, NEWTON, TRI, SIZE, ATOMONLY>::build(NeighList *list)
+{
+  int i, j, jh, k, n, itype, jtype, ibin, bin_start, which, imol, iatom, moltemplate;
+  tagint itag, jtag, tagprev;
+  double xtmp, ytmp, ztmp, delx, dely, delz, rsq, radsum,cut,cutsq;
+  int *neighptr;
+
+  const double delta = 0.01 * force->angstrom;
+
+  double **x = atom->x;
+  double *radius = atom->radius;
+  int *type = atom->type;
+  int *mask = atom->mask;
+  tagint *tag = atom->tag;
+  tagint *molecule = atom->molecule;
+  tagint **special = atom->special;
+  int **nspecial = atom->nspecial;
+  int nlocal = atom->nlocal;
+  if (includegroup) nlocal = atom->nfirst;
+
+  int *molindex = atom->molindex;
+  int *molatom = atom->molatom;
+  Molecule **onemols = atom->avec->onemols;
+  if (!ATOMONLY) {
+    if (molecular == Atom::TEMPLATE)
+      moltemplate = 1;
+    else
+      moltemplate = 0;
+  }
+
+  int history = list->history;
+  int mask_history = 1 << HISTBITS;
+
+  int *ilist = list->ilist;
+  int *numneigh = list->numneigh;
+  int **firstneigh = list->firstneigh;
+  MyPage<int> *ipage = list->ipage;
+
+  int inum = 0;
+  ipage->reset();
+
+  for (i = 0; i < nlocal; i++) {
+    n = 0;
+    neighptr = ipage->vget();
+
+    itag = tag[i];
+    itype = type[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    if (!ATOMONLY) {
+      if (moltemplate) {
+        imol = molindex[i];
+        iatom = molatom[i];
+        tagprev = tag[i] - iatom - 1;
+      }
+    }
+
+    ibin = atom2bin[i];
+
+    for (k = 0; k < nstencil; k++) {
+      bin_start = binhead[ibin + stencil[k]];
+      if (HALF && NEWTON && (!TRI)) {
+        if (k == 0) {
+          // Half neighbor list, newton on, orthonormal
+          // loop over rest of atoms in i's bin, ghosts are at end of linked list
+          bin_start = bins[i];
+        }
+      }
+
+      for (j = bin_start; j >= 0; j = bins[j]) {
+        if (!HALF) {
+          // Full neighbor list
+          // only skip i = j
+          if (i == j) continue;
+        } else if (!NEWTON) {
+          // Half neighbor list, newton off
+          // only store pair if i < j
+          // stores own/own pairs only once
+          // stores own/ghost pairs on both procs
+          if (j <= i) continue;
+        } else if (TRI) {
+          // for triclinic, bin stencil is full in all 3 dims
+          // must use itag/jtag to eliminate half the I/J interactions
+          // cannot use I/J exact coord comparision
+          //   b/c transforming orthog -> lambda -> orthog for ghost atoms
+          //   with an added PBC offset can shift all 3 coords by epsilon
+          if (j <= i) continue;
+          if (j >= nlocal) {
+            jtag = tag[j];
+            if (itag > jtag) {
+              if ((itag + jtag) % 2 == 0) continue;
+            } else if (itag < jtag) {
+              if ((itag + jtag) % 2 == 1) continue;
+            } else {
+              if (fabs(x[j][2] - ztmp) > delta) {
+                if (x[j][2] < ztmp) continue;
+              } else if (fabs(x[j][1] - ytmp) > delta) {
+                if (x[j][1] < ytmp) continue;
+              } else {
+                if (x[j][0] < xtmp) continue;
+              }
+            }
+          }
+        } else {
+          // Half neighbor list, newton on, orthonormal
+          // store every pair for every bin in stencil, except for i's bin
+
+          if (k == 0) {
+            // if j is owned atom, store it, since j is beyond i in linked list
+            // if j is ghost, only store if j coords are "above and to the "right" of i
+            if (j >= nlocal) {
+              if (x[j][2] < ztmp) continue;
+              if (x[j][2] == ztmp) {
+                if (x[j][1] < ytmp) continue;
+                if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
+              }
+            }
+          }
+        }
+
+        jtype = type[j];
+        if (exclude && exclusion(i, j, itype, jtype, mask, molecule)) continue;
+
+        delx = xtmp - x[j][0];
+        dely = ytmp - x[j][1];
+        delz = ztmp - x[j][2];
+        rsq = delx * delx + dely * dely + delz * delz;
+
+        if (SIZE) {
+          radsum = radius[i] + radius[j];
+          cut = radsum + skin;
+          cutsq = cut * cut;
+
+          if (ATOMONLY) {
+            if (rsq <= cutsq) {
+              jh = j;
+              if (history && rsq < (radsum * radsum))
+                jh = jh ^ mask_history;
+              neighptr[n++] = jh;
+            }
+          } else {
+            if (rsq <= cutsq) {
+              jh = j;
+              if (history && rsq < (radsum * radsum))
+                jh = jh ^ mask_history;
+
+              if (molecular != Atom::ATOMIC) {
+                if (!moltemplate)
+                  which = find_special(special[i], nspecial[i], tag[j]);
+                else if (imol >= 0)
+                  which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom],
+                                       tag[j] - tagprev);
+                else
+                  which = 0;
+                if (which == 0)
+                  neighptr[n++] = jh;
+                else if (domain->minimum_image_check(delx, dely, delz))
+                  neighptr[n++] = jh;
+                else if (which > 0)
+                  neighptr[n++] = jh ^ (which << SBBITS);
+              } else
+                neighptr[n++] = jh;
+            }
+          }
+        } else {
+          if (ATOMONLY) {
+            if (rsq <= cutneighsq[itype][jtype]) neighptr[n++] = j;
+          } else {
+            if (rsq <= cutneighsq[itype][jtype]) {
+              if (molecular != Atom::ATOMIC) {
+                if (!moltemplate)
+                  which = find_special(special[i], nspecial[i], tag[j]);
+                else if (imol >= 0)
+                  which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom],
+                                       tag[j] - tagprev);
+                else
+                  which = 0;
+                if (which == 0)
+                  neighptr[n++] = j;
+                else if (domain->minimum_image_check(delx, dely, delz))
+                  neighptr[n++] = j;
+                else if (which > 0)
+                  neighptr[n++] = j ^ (which << SBBITS);
+              } else
+                neighptr[n++] = j;
+            }
+          }
+        }
+      }
+    }
+
+    ilist[inum++] = i;
+    firstneigh[i] = neighptr;
+    numneigh[i] = n;
+    ipage->vgot(n);
+    if (ipage->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
+  }
+
+  list->inum = inum;
+  if (!HALF) list->gnum = 0;
+}
+
+namespace LAMMPS_NS {
+template class NPairBin<0,1,0,0,0>;
+template class NPairBin<1,0,0,0,0>;
+template class NPairBin<1,1,0,0,0>;
+template class NPairBin<1,1,1,0,0>;
+template class NPairBin<0,1,0,1,0>;
+template class NPairBin<1,0,0,1,0>;
+template class NPairBin<1,1,0,1,0>;
+template class NPairBin<1,1,1,1,0>;
+template class NPairBin<0,1,0,0,1>;
+template class NPairBin<1,0,0,0,1>;
+template class NPairBin<1,1,0,0,1>;
+template class NPairBin<1,1,1,0,1>;
+template class NPairBin<0,1,0,1,1>;
+template class NPairBin<1,0,0,1,1>;
+template class NPairBin<1,1,0,1,1>;
+template class NPairBin<1,1,1,1,1>;
+}
diff --git a/src/npair_bin.h b/src/npair_bin.h
new file mode 100644
index 0000000000..94b7c7077e
--- /dev/null
+++ b/src/npair_bin.h
@@ -0,0 +1,119 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef NPAIR_CLASS
+// clang-format off
+typedef NPairBin<0, 1, 0, 0, 0> NPairFullBin;
+NPairStyle(full/bin,
+           NPairFullBin,
+           NP_FULL | NP_BIN | NP_MOLONLY |
+           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairBin<1, 0, 0, 0, 0> NPairHalfBinNewtoff;
+NPairStyle(half/bin/newtoff,
+           NPairHalfBinNewtoff,
+           NP_HALF | NP_BIN | NP_MOLONLY | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairBin<1, 1, 0, 0, 0> NPairHalfBinNewton;
+NPairStyle(half/bin/newton,
+           NPairHalfBinNewton,
+           NP_HALF | NP_BIN | NP_MOLONLY | NP_NEWTON | NP_ORTHO);
+
+typedef NPairBin<1, 1, 1, 0, 0> NPairHalfBinNewtonTri;
+NPairStyle(half/bin/newton/tri,
+           NPairHalfBinNewtonTri,
+           NP_HALF | NP_BIN | NP_MOLONLY | NP_NEWTON | NP_TRI);
+
+typedef NPairBin<0, 1, 0, 1, 0> NPairFullSizeBin;
+NPairStyle(full/size/bin,
+           NPairFullSizeBin,
+           NP_FULL | NP_SIZE | NP_BIN | NP_MOLONLY |
+           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairBin<1, 0, 0, 1, 0> NPairHalfSizeBinNewtoff;
+NPairStyle(half/size/bin/newtoff,
+           NPairHalfSizeBinNewtoff,
+           NP_HALF | NP_SIZE | NP_BIN | NP_MOLONLY | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairBin<1, 1, 0, 1, 0> NPairHalfSizeBinNewton;
+NPairStyle(half/size/bin/newton,
+           NPairHalfSizeBinNewton,
+           NP_HALF | NP_SIZE | NP_BIN | NP_MOLONLY | NP_NEWTON | NP_ORTHO);
+
+typedef NPairBin<1, 1, 1, 1, 0> NPairHalfSizeBinNewtonTri;
+NPairStyle(half/size/bin/newton/tri,
+           NPairHalfSizeBinNewtonTri,
+           NP_HALF | NP_SIZE | NP_BIN | NP_MOLONLY | NP_NEWTON | NP_TRI);
+
+typedef NPairBin<0, 1, 0, 0, 1> NPairFullBinAtomonly;
+NPairStyle(full/bin/atomonly,
+           NPairFullBinAtomonly,
+           NP_FULL | NP_BIN | NP_ATOMONLY |
+           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairBin<1, 0, 0, 0, 1> NPairHalfBinAtomonlyNewtoff;
+NPairStyle(half/bin/atomonly/newtoff,
+           NPairHalfBinAtomonlyNewtoff,
+           NP_HALF | NP_BIN | NP_ATOMONLY | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairBin<1, 1, 0, 0, 1> NPairHalfBinAtomonlyNewton;
+NPairStyle(half/bin/atomonly/newton,
+           NPairHalfBinAtomonlyNewton,
+           NP_HALF | NP_BIN | NP_ATOMONLY | NP_NEWTON | NP_ORTHO);
+
+typedef NPairBin<1, 1, 1, 0, 1> NPairHalfBinAtomonlyNewtonTri;
+NPairStyle(half/bin/atomonly/newton/tri,
+           NPairHalfBinAtomonlyNewtonTri,
+           NP_HALF | NP_BIN | NP_ATOMONLY | NP_NEWTON | NP_TRI);
+
+typedef NPairBin<0, 1, 0, 1, 1> NPairFullSizeBinAtomonly;
+NPairStyle(full/size/bin/atomonly,
+           NPairFullSizeBinAtomonly,
+           NP_FULL | NP_SIZE | NP_BIN | NP_ATOMONLY |
+           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairBin<1, 0, 0, 1, 1> NPairHalfSizeBinAtomonlyNewtoff;
+NPairStyle(half/size/bin/atomonly/newtoff,
+           NPairHalfSizeBinAtomonlyNewtoff,
+           NP_HALF | NP_SIZE | NP_BIN | NP_ATOMONLY | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairBin<1, 1, 0, 1, 1> NPairHalfSizeBinAtomonlyNewton;
+NPairStyle(half/size/bin/atomonly/newton,
+           NPairHalfSizeBinAtomonlyNewton,
+           NP_HALF | NP_SIZE | NP_BIN | NP_ATOMONLY | NP_NEWTON | NP_ORTHO);
+
+typedef NPairBin<1, 1, 1, 1, 1> NPairHalfSizeBinAtomonlyNewtonTri;
+NPairStyle(half/size/bin/atomonly/newton/tri,
+           NPairHalfSizeBinAtomonlyNewtonTri,
+           NP_HALF | NP_SIZE | NP_BIN | NP_ATOMONLY | NP_NEWTON | NP_TRI);
+// clang-format on
+#else
+
+#ifndef LMP_NPAIR_BIN_H
+#define LMP_NPAIR_BIN_H
+
+#include "npair.h"
+
+namespace LAMMPS_NS {
+
+template<int HALF, int NEWTON, int TRI, int SIZE, int ATOMONLY>
+class NPairBin : public NPair {
+ public:
+  NPairBin(class LAMMPS *);
+  void build(class NeighList *) override;
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/npair_full_bin_ghost.cpp b/src/npair_bin_ghost.cpp
similarity index 51%
rename from src/npair_full_bin_ghost.cpp
rename to src/npair_bin_ghost.cpp
index fc2f325c48..8403bc8f3d 100644
--- a/src/npair_full_bin_ghost.cpp
+++ b/src/npair_bin_ghost.cpp
@@ -12,33 +12,46 @@
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
-#include "npair_full_bin_ghost.h"
-#include "neigh_list.h"
+#include "npair_bin_ghost.h"
+
 #include "atom.h"
 #include "atom_vec.h"
-#include "molecule.h"
 #include "domain.h"
-#include "my_page.h"
 #include "error.h"
+#include "molecule.h"
+#include "my_page.h"
+#include "neigh_list.h"
+#include "neighbor.h"
 
 using namespace LAMMPS_NS;
+using namespace NeighConst;
 
 /* ---------------------------------------------------------------------- */
 
-NPairFullBinGhost::NPairFullBinGhost(LAMMPS *lmp) : NPair(lmp) {}
+template<int HALF>
+NPairBinGhost<HALF>::NPairBinGhost(LAMMPS *lmp) : NPair(lmp) {}
 
 /* ----------------------------------------------------------------------
-   binned neighbor list construction for all neighbors
-   include neighbors of ghost atoms, but no "special neighbors" for ghosts
-   every neighbor pair appears in list of both atoms i and j
+   Full:
+     binned neighbor list construction for all neighbors
+     include neighbors of ghost atoms, but no "special neighbors" for ghosts
+     every neighbor pair appears in list of both atoms i and j
+   Half + Newtoff:
+     binned neighbor list construction with partial Newton's 3rd law
+     include neighbors of ghost atoms, but no "special neighbors" for ghosts
+     owned and ghost atoms check own bin and other bins in stencil
+     pair stored once if i,j are both owned and i < j
+     pair stored by me if i owned and j ghost (also stored by proc owning j)
+     pair stored once if i,j are both ghost and i < j
 ------------------------------------------------------------------------- */
 
-void NPairFullBinGhost::build(NeighList *list)
+template<int HALF>
+void NPairBinGhost<HALF>::build(NeighList *list)
 {
-  int i,j,k,n,itype,jtype,ibin,which,imol,iatom,moltemplate;
+  int i, j, k, n, itype, jtype, ibin, bin_start, which, imol, iatom, moltemplate;
   tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int xbin,ybin,zbin,xbin2,ybin2,zbin2;
+  double xtmp, ytmp, ztmp, delx, dely, delz, rsq;
+  int xbin, ybin, zbin, xbin2, ybin2, zbin2;
   int *neighptr;
 
   double **x = atom->x;
@@ -50,12 +63,15 @@ void NPairFullBinGhost::build(NeighList *list)
   int **nspecial = atom->nspecial;
   int nlocal = atom->nlocal;
   int nall = nlocal + atom->nghost;
+  if (includegroup) nlocal = atom->nfirst;
 
   int *molindex = atom->molindex;
   int *molatom = atom->molatom;
   Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE) moltemplate = 1;
-  else moltemplate = 0;
+  if (molecular == Atom::TEMPLATE)
+    moltemplate = 1;
+  else
+    moltemplate = 0;
 
   int *ilist = list->ilist;
   int *numneigh = list->numneigh;
@@ -66,7 +82,6 @@ void NPairFullBinGhost::build(NeighList *list)
   ipage->reset();
 
   // loop over owned & ghost atoms, storing neighbors
-
   for (i = 0; i < nall; i++) {
     n = 0;
     neighptr = ipage->vget();
@@ -81,45 +96,58 @@ void NPairFullBinGhost::build(NeighList *list)
       tagprev = tag[i] - iatom - 1;
     }
 
-    // loop over all atoms in surrounding bins in stencil including self
-    // when i is a ghost atom, must check if stencil bin is out of bounds
-    // skip i = j
-    // no molecular test when i = ghost atom
-
     if (i < nlocal) {
       ibin = atom2bin[i];
+
+      // loop over all atoms in surrounding bins in stencil including self
+      // when i is a ghost atom, must check if stencil bin is out of bounds
+      // no molecular test when i = ghost atom
       for (k = 0; k < nstencil; k++) {
-        for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
-          if (i == j) continue;
+        bin_start = binhead[ibin + stencil[k]];
+        for (j = bin_start; j >= 0; j = bins[j]) {
+          if (HALF) {
+            // Half neighbor list, newton off
+            // only store pair if i < j
+            // stores own/own pairs only once
+            // stores own/ghost pairs on both procs
+            // stores ghost/ghost pairs only once
+            if (j <= i) continue;
+          } else {
+            // Full neighbor list
+            // only skip i = j
+            if (i == j) continue;
+          }
 
           jtype = type[j];
-          if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
+          if (exclude && exclusion(i, j, itype, jtype, mask, molecule)) continue;
 
           delx = xtmp - x[j][0];
           dely = ytmp - x[j][1];
           delz = ztmp - x[j][2];
-          rsq = delx*delx + dely*dely + delz*delz;
+          rsq = delx * delx + dely * dely + delz * delz;
 
           if (rsq <= cutneighsq[itype][jtype]) {
             if (molecular != Atom::ATOMIC) {
               if (!moltemplate)
-                which = find_special(special[i],nspecial[i],tag[j]);
+                which = find_special(special[i], nspecial[i], tag[j]);
               else if (imol >= 0)
-                which = find_special(onemols[imol]->special[iatom],
-                                     onemols[imol]->nspecial[iatom],
-                                     tag[j]-tagprev);
-              else which = 0;
-              if (which == 0) neighptr[n++] = j;
-              else if (domain->minimum_image_check(delx,dely,delz))
+                which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom],
+                                     tag[j] - tagprev);
+              else
+                which = 0;
+              if (which == 0)
                 neighptr[n++] = j;
-              else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-            } else neighptr[n++] = j;
+              else if (domain->minimum_image_check(delx, dely, delz))
+                neighptr[n++] = j;
+              else if (which > 0)
+                neighptr[n++] = j ^ (which << SBBITS);
+            } else
+              neighptr[n++] = j;
           }
         }
       }
-
     } else {
-      ibin = coord2bin(x[i],xbin,ybin,zbin);
+      ibin = coord2bin(x[i], xbin, ybin, zbin);
       for (k = 0; k < nstencil; k++) {
         xbin2 = xbin + stencilxyz[k][0];
         ybin2 = ybin + stencilxyz[k][1];
@@ -127,16 +155,20 @@ void NPairFullBinGhost::build(NeighList *list)
         if (xbin2 < 0 || xbin2 >= mbinx ||
             ybin2 < 0 || ybin2 >= mbiny ||
             zbin2 < 0 || zbin2 >= mbinz) continue;
-        for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
-          if (i == j) continue;
+        for (j = binhead[ibin + stencil[k]]; j >= 0; j = bins[j]) {
+          if (HALF) {
+            if (j <= i) continue;
+          } else {
+            if (i == j) continue;
+          }
 
           jtype = type[j];
-          if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
+          if (exclude && exclusion(i, j, itype, jtype, mask, molecule)) continue;
 
           delx = xtmp - x[j][0];
           dely = ytmp - x[j][1];
           delz = ztmp - x[j][2];
-          rsq = delx*delx + dely*dely + delz*delz;
+          rsq = delx * delx + dely * dely + delz * delz;
 
           if (rsq <= cutneighghostsq[itype][jtype]) neighptr[n++] = j;
         }
@@ -147,10 +179,14 @@ void NPairFullBinGhost::build(NeighList *list)
     firstneigh[i] = neighptr;
     numneigh[i] = n;
     ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
+    if (ipage->status()) error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
   }
 
   list->inum = atom->nlocal;
   list->gnum = inum - atom->nlocal;
 }
+
+namespace LAMMPS_NS {
+template class NPairBinGhost<0>;
+template class NPairBinGhost<1>;
+}
diff --git a/src/npair_half_bin_newtoff_ghost.h b/src/npair_bin_ghost.h
similarity index 66%
rename from src/npair_half_bin_newtoff_ghost.h
rename to src/npair_bin_ghost.h
index c581ba86fe..f6cbcf2851 100644
--- a/src/npair_half_bin_newtoff_ghost.h
+++ b/src/npair_bin_ghost.h
@@ -13,22 +13,29 @@
 
 #ifdef NPAIR_CLASS
 // clang-format off
-NPairStyle(half/bin/newtoff/ghost,
-           NPairHalfBinNewtoffGhost,
+typedef NPairBinGhost<0> NPairFullBinGhost;
+NPairStyle(full/bin/ghost,
+           NPairFullBinGhost,
+           NP_FULL | NP_BIN | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI);
+
+typedef NPairBinGhost<1> NPairHalfBinGhostNewtoff;
+NPairStyle(half/bin/ghost/newtoff,
+           NPairHalfBinGhostNewtoff,
            NP_HALF | NP_BIN | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI);
 // clang-format on
 #else
 
-#ifndef LMP_NPAIR_HALF_BIN_NEWTOFF_GHOST_H
-#define LMP_NPAIR_HALF_BIN_NEWTOFF_GHOST_H
+#ifndef LMP_NPAIR_BIN_GHOST_H
+#define LMP_NPAIR_BIN_GHOST_H
 
 #include "npair.h"
 
 namespace LAMMPS_NS {
 
-class NPairHalfBinNewtoffGhost : public NPair {
+template<int HALF>
+class NPairBinGhost : public NPair {
  public:
-  NPairHalfBinNewtoffGhost(class LAMMPS *);
+  NPairBinGhost(class LAMMPS *);
   void build(class NeighList *) override;
 };
 
diff --git a/src/npair_full_bin.cpp b/src/npair_full_bin.cpp
deleted file mode 100644
index 47e0943221..0000000000
--- a/src/npair_full_bin.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_full_bin.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairFullBin::NPairFullBin(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction for all neighbors
-   every neighbor pair appears in list of both atoms i and j
-------------------------------------------------------------------------- */
-
-void NPairFullBin::build(NeighList *list)
-{
-  int i, j, k, n, itype, jtype, ibin, which, imol, iatom, moltemplate;
-  tagint tagprev;
-  double xtmp, ytmp, ztmp, delx, dely, delz, rsq;
-  int *neighptr;
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  if (includegroup) nlocal = atom->nfirst;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE)
-    moltemplate = 1;
-  else
-    moltemplate = 0;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int inum = 0;
-  ipage->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = 0;
-    neighptr = ipage->vget();
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over all atoms in surrounding bins in stencil including self
-    // skip i = j
-
-    ibin = atom2bin[i];
-
-    for (k = 0; k < nstencil; k++) {
-      for (j = binhead[ibin + stencil[k]]; j >= 0; j = bins[j]) {
-        if (i == j) continue;
-
-        jtype = type[j];
-        if (exclude && exclusion(i, j, itype, jtype, mask, molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx * delx + dely * dely + delz * delz;
-
-        if (rsq <= cutneighsq[itype][jtype]) {
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate) {
-              which = find_special(special[i], nspecial[i], tag[j]);
-            } else if (imol >= 0) {
-              const auto mol = onemols[imol];
-              which = find_special(mol->special[iatom], mol->nspecial[iatom], tag[j] - tagprev);
-            } else {
-              which = 0;
-            }
-            if (which == 0)
-              neighptr[n++] = j;
-            else if (domain->minimum_image_check(delx, dely, delz))
-              neighptr[n++] = j;
-            else if (which > 0)
-              neighptr[n++] = j ^ (which << SBBITS);
-          } else
-            neighptr[n++] = j;
-        }
-      }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = inum;
-  list->gnum = 0;
-}
diff --git a/src/npair_full_bin.h b/src/npair_full_bin.h
deleted file mode 100644
index 7ee37c8ec5..0000000000
--- a/src/npair_full_bin.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(full/bin,
-           NPairFullBin,
-           NP_FULL | NP_BIN | NP_MOLONLY |
-           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_FULL_BIN_H
-#define LMP_NPAIR_FULL_BIN_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairFullBin : public NPair {
- public:
-  NPairFullBin(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_full_bin_atomonly.cpp b/src/npair_full_bin_atomonly.cpp
deleted file mode 100644
index 825b25fe56..0000000000
--- a/src/npair_full_bin_atomonly.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_full_bin_atomonly.h"
-
-#include "atom.h"
-#include "error.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairFullBinAtomonly::NPairFullBinAtomonly(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction for all neighbors
-   every neighbor pair appears in list of both atoms i and j
-------------------------------------------------------------------------- */
-
-void NPairFullBinAtomonly::build(NeighList *list)
-{
-  int i, j, k, n, itype, jtype, ibin;
-  double xtmp, ytmp, ztmp, delx, dely, delz, rsq;
-  int *neighptr;
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *molecule = atom->molecule;
-  int nlocal = atom->nlocal;
-  if (includegroup) nlocal = atom->nfirst;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int inum = 0;
-  ipage->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = 0;
-    neighptr = ipage->vget();
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-
-    // loop over all atoms in surrounding bins in stencil including self
-    // skip i = j
-
-    ibin = atom2bin[i];
-
-    for (k = 0; k < nstencil; k++) {
-      for (j = binhead[ibin + stencil[k]]; j >= 0; j = bins[j]) {
-        if (i == j) continue;
-
-        jtype = type[j];
-        if (exclude && exclusion(i, j, itype, jtype, mask, molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx * delx + dely * dely + delz * delz;
-
-        if (rsq <= cutneighsq[itype][jtype]) neighptr[n++] = j;
-      }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = inum;
-  list->gnum = 0;
-}
diff --git a/src/npair_full_bin_atomonly.h b/src/npair_full_bin_atomonly.h
deleted file mode 100644
index b30d0433e0..0000000000
--- a/src/npair_full_bin_atomonly.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(full/bin/atomonly,
-           NPairFullBinAtomonly,
-           NP_FULL | NP_BIN | NP_ATOMONLY |
-           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_FULL_BIN_ATOMONLY_H
-#define LMP_NPAIR_FULL_BIN_ATOMONLY_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairFullBinAtomonly : public NPair {
- public:
-  NPairFullBinAtomonly(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_full_bin_ghost.h b/src/npair_full_bin_ghost.h
deleted file mode 100644
index 7fedb1e0c5..0000000000
--- a/src/npair_full_bin_ghost.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(full/bin/ghost,
-           NPairFullBinGhost,
-           NP_FULL | NP_BIN | NP_GHOST | NP_NEWTON | NP_NEWTOFF |
-           NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_FULL_BIN_GHOST_H
-#define LMP_NPAIR_FULL_BIN_GHOST_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairFullBinGhost : public NPair {
- public:
-  NPairFullBinGhost(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_full_multi.cpp b/src/npair_full_multi.cpp
deleted file mode 100644
index 5df1c5b200..0000000000
--- a/src/npair_full_multi.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_full_multi.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neighbor.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairFullMulti::NPairFullMulti(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction for all neighbors
-   multi stencil is icollection-jcollection dependent
-   every neighbor pair appears in list of both atoms i and j
-------------------------------------------------------------------------- */
-
-void NPairFullMulti::build(NeighList *list)
-{
-  int i,j,k,n,itype,jtype,icollection,jcollection,ibin,jbin,which,ns,imol,iatom,moltemplate;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr,*s;
-  int js;
-
-  int *collection = neighbor->collection;
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  if (includegroup) nlocal = atom->nfirst;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE) moltemplate = 1;
-  else moltemplate = 0;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int inum = 0;
-  ipage->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = 0;
-    neighptr = ipage->vget();
-    itype = type[i];
-    icollection = collection[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    ibin = atom2bin[i];
-
-    // loop through stencils for all collections
-    for (jcollection = 0; jcollection < ncollections; jcollection++) {
-
-      // if same collection use own bin
-      if(icollection == jcollection) jbin = ibin;
-          else jbin = coord2bin(x[i], jcollection);
-
-      // loop over all atoms in surrounding bins in stencil including self
-      // skip i = j
-      // use full stencil for all collection combinations
-
-      s = stencil_multi[icollection][jcollection];
-      ns = nstencil_multi[icollection][jcollection];
-
-      for (k = 0; k < ns; k++) {
-            js = binhead_multi[jcollection][jbin + s[k]];
-            for (j = js; j >= 0; j = bins[j]) {
-              if (i == j) continue;
-
-          jtype = type[j];
-          if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-              delx = xtmp - x[j][0];
-              dely = ytmp - x[j][1];
-              delz = ztmp - x[j][2];
-              rsq = delx*delx + dely*dely + delz*delz;
-
-              if (rsq <= cutneighsq[itype][jtype]) {
-                if (molecular != Atom::ATOMIC) {
-                  if (!moltemplate)
-                        which = find_special(special[i],nspecial[i],tag[j]);
-                  else if (imol >= 0)
-                        which = find_special(onemols[imol]->special[iatom],
-                                     onemols[imol]->nspecial[iatom],
-                                     tag[j]-tagprev);
-                  else which = 0;
-                  if (which == 0) neighptr[n++] = j;
-                  else if (domain->minimum_image_check(delx,dely,delz))
-                        neighptr[n++] = j;
-                  else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-                } else neighptr[n++] = j;
-              }
-            }
-      }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = inum;
-  list->gnum = 0;
-}
diff --git a/src/npair_full_multi.h b/src/npair_full_multi.h
deleted file mode 100644
index c3c707a91d..0000000000
--- a/src/npair_full_multi.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(full/multi,
-           NPairFullMulti,
-           NP_FULL | NP_MULTI | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_FULL_MULTI_H
-#define LMP_NPAIR_FULL_MULTI_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairFullMulti : public NPair {
- public:
-  NPairFullMulti(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_full_multi_old.cpp b/src/npair_full_multi_old.cpp
deleted file mode 100644
index 2d529627f2..0000000000
--- a/src/npair_full_multi_old.cpp
+++ /dev/null
@@ -1,133 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_full_multi_old.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairFullMultiOld::NPairFullMultiOld(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction for all neighbors
-   multi-type stencil is itype dependent and is distance checked
-   every neighbor pair appears in list of both atoms i and j
-------------------------------------------------------------------------- */
-
-void NPairFullMultiOld::build(NeighList *list)
-{
-  int i,j,k,n,itype,jtype,ibin,which,ns,imol,iatom,moltemplate;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr,*s;
-  double *cutsq,*distsq;
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  if (includegroup) nlocal = atom->nfirst;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE) moltemplate = 1;
-  else moltemplate = 0;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int inum = 0;
-  ipage->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = 0;
-    neighptr = ipage->vget();
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over all atoms in other bins in stencil, including self
-    // skip if i,j neighbor cutoff is less than bin distance
-    // skip i = j
-
-    ibin = atom2bin[i];
-    s = stencil_multi_old[itype];
-    distsq = distsq_multi_old[itype];
-    cutsq = cutneighsq[itype];
-    ns = nstencil_multi_old[itype];
-    for (k = 0; k < ns; k++) {
-      for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) {
-        jtype = type[j];
-        if (cutsq[jtype] < distsq[k]) continue;
-        if (i == j) continue;
-
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-
-        if (rsq <= cutneighsq[itype][jtype]) {
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = j;
-            else if (domain->minimum_image_check(delx,dely,delz))
-              neighptr[n++] = j;
-            else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-          } else neighptr[n++] = j;
-        }
-      }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = inum;
-  list->gnum = 0;
-}
diff --git a/src/npair_full_nsq.cpp b/src/npair_full_nsq.cpp
deleted file mode 100644
index c4ae539622..0000000000
--- a/src/npair_full_nsq.cpp
+++ /dev/null
@@ -1,131 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_full_nsq.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "group.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairFullNsq::NPairFullNsq(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   N^2 search for all neighbors
-   every neighbor pair appears in list of both atoms i and j
-------------------------------------------------------------------------- */
-
-void NPairFullNsq::build(NeighList *list)
-{
-  int i, j, n, itype, jtype, which, bitmask, imol, iatom, moltemplate;
-  tagint tagprev;
-  double xtmp, ytmp, ztmp, delx, dely, delz, rsq;
-  int *neighptr;
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  int nall = nlocal + atom->nghost;
-  if (includegroup) {
-    nlocal = atom->nfirst;
-    bitmask = group->bitmask[includegroup];
-  }
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE)
-    moltemplate = 1;
-  else
-    moltemplate = 0;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int inum = 0;
-  ipage->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = 0;
-    neighptr = ipage->vget();
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over all atoms, owned and ghost
-    // skip i = j
-
-    for (j = 0; j < nall; j++) {
-      if (includegroup && !(mask[j] & bitmask)) continue;
-      if (i == j) continue;
-      jtype = type[j];
-      if (exclude && exclusion(i, j, itype, jtype, mask, molecule)) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx * delx + dely * dely + delz * delz;
-      if (rsq <= cutneighsq[itype][jtype]) {
-        if (molecular != Atom::ATOMIC) {
-          if (!moltemplate) {
-            which = find_special(special[i], nspecial[i], tag[j]);
-          } else if (imol >= 0) {
-            const auto mol = onemols[imol];
-            which = find_special(mol->special[iatom], mol->nspecial[iatom], tag[j] - tagprev);
-          } else {
-            which = 0;
-          }
-
-          if (which == 0)
-            neighptr[n++] = j;
-          else if (domain->minimum_image_check(delx, dely, delz))
-            neighptr[n++] = j;
-          else if (which > 0)
-            neighptr[n++] = j ^ (which << SBBITS);
-        } else
-          neighptr[n++] = j;
-      }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = inum;
-  list->gnum = 0;
-}
diff --git a/src/npair_full_nsq.h b/src/npair_full_nsq.h
deleted file mode 100644
index 0f6665826d..0000000000
--- a/src/npair_full_nsq.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(full/nsq,
-           NPairFullNsq,
-           NP_FULL | NP_NSQ | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_FULL_NSQ_H
-#define LMP_NPAIR_FULL_NSQ_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairFullNsq : public NPair {
- public:
-  NPairFullNsq(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_full_nsq_ghost.cpp b/src/npair_full_nsq_ghost.cpp
deleted file mode 100644
index 1ea118bc85..0000000000
--- a/src/npair_full_nsq_ghost.cpp
+++ /dev/null
@@ -1,138 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_full_nsq_ghost.h"
-#include "neigh_list.h"
-#include "atom.h"
-#include "atom_vec.h"
-#include "molecule.h"
-#include "domain.h"
-#include "my_page.h"
-#include "error.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairFullNsqGhost::NPairFullNsqGhost(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   N^2 search for all neighbors
-   include neighbors of ghost atoms, but no "special neighbors" for ghosts
-   every neighbor pair appears in list of both atoms i and j
-------------------------------------------------------------------------- */
-
-void NPairFullNsqGhost::build(NeighList *list)
-{
-  int i,j,n,itype,jtype,which,imol,iatom,moltemplate;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr;
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  int nall = nlocal + atom->nghost;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE) moltemplate = 1;
-  else moltemplate = 0;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int inum = 0;
-  ipage->reset();
-
-  // loop over owned & ghost atoms, storing neighbors
-
-  for (i = 0; i < nall; i++) {
-    n = 0;
-    neighptr = ipage->vget();
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over all atoms, owned and ghost
-    // skip i = j
-    // no molecular test when i = ghost atom
-
-    if (i < nlocal) {
-      for (j = 0; j < nall; j++) {
-        if (i == j) continue;
-        jtype = type[j];
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-        if (rsq <= cutneighsq[itype][jtype]) {
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = j;
-            else if (domain->minimum_image_check(delx,dely,delz))
-              neighptr[n++] = j;
-            else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-          } else neighptr[n++] = j;
-        }
-      }
-    } else {
-      for (j = 0; j < nall; j++) {
-        if (i == j) continue;
-        jtype = type[j];
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-
-        if (rsq <= cutneighghostsq[itype][jtype]) neighptr[n++] = j;
-      }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = atom->nlocal;
-  list->gnum = inum - atom->nlocal;
-}
diff --git a/src/npair_full_nsq_ghost.h b/src/npair_full_nsq_ghost.h
deleted file mode 100644
index 0e26d8c5df..0000000000
--- a/src/npair_full_nsq_ghost.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(full/nsq/ghost,
-           NPairFullNsqGhost,
-           NP_FULL | NP_NSQ | NP_GHOST | NP_NEWTON | NP_NEWTOFF |
-           NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_FULL_NSQ_GHOST_H
-#define LMP_NPAIR_FULL_NSQ_GHOST_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairFullNsqGhost : public NPair {
- public:
-  NPairFullNsqGhost(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_half_bin_atomonly_newton.cpp b/src/npair_half_bin_atomonly_newton.cpp
deleted file mode 100644
index 316d7731fc..0000000000
--- a/src/npair_half_bin_atomonly_newton.cpp
+++ /dev/null
@@ -1,113 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_bin_atomonly_newton.h"
-
-#include "atom.h"
-#include "error.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfBinAtomonlyNewton::NPairHalfBinAtomonlyNewton(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction with full Newton's 3rd law
-   each owned atom i checks its own bin and other bins in Newton stencil
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfBinAtomonlyNewton::build(NeighList *list)
-{
-  int i, j, k, n, itype, jtype, ibin;
-  double xtmp, ytmp, ztmp, delx, dely, delz, rsq;
-  int *neighptr;
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *molecule = atom->molecule;
-  int nlocal = atom->nlocal;
-  if (includegroup) nlocal = atom->nfirst;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int inum = 0;
-  ipage->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = 0;
-    neighptr = ipage->vget();
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-
-    // loop over rest of atoms in i's bin, ghosts are at end of linked list
-    // if j is owned atom, store it, since j is beyond i in linked list
-    // if j is ghost, only store if j coords are "above and to the right" of i
-
-    for (j = bins[i]; j >= 0; j = bins[j]) {
-      if (j >= nlocal) {
-        if (x[j][2] < ztmp) continue;
-        if (x[j][2] == ztmp) {
-          if (x[j][1] < ytmp) continue;
-          if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
-        }
-      }
-
-      jtype = type[j];
-      if (exclude && exclusion(i, j, itype, jtype, mask, molecule)) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx * delx + dely * dely + delz * delz;
-
-      if (rsq <= cutneighsq[itype][jtype]) neighptr[n++] = j;
-    }
-
-    // loop over all atoms in other bins in stencil, store every pair
-
-    ibin = atom2bin[i];
-
-    for (k = 0; k < nstencil; k++) {
-      for (j = binhead[ibin + stencil[k]]; j >= 0; j = bins[j]) {
-        jtype = type[j];
-        if (exclude && exclusion(i, j, itype, jtype, mask, molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx * delx + dely * dely + delz * delz;
-
-        if (rsq <= cutneighsq[itype][jtype]) neighptr[n++] = j;
-      }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = inum;
-}
diff --git a/src/npair_half_bin_newtoff.cpp b/src/npair_half_bin_newtoff.cpp
deleted file mode 100644
index 09d3e2a682..0000000000
--- a/src/npair_half_bin_newtoff.cpp
+++ /dev/null
@@ -1,132 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_bin_newtoff.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfBinNewtoff::NPairHalfBinNewtoff(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction with partial Newton's 3rd law
-   each owned atom i checks own bin and other bins in stencil
-   pair stored once if i,j are both owned and i < j
-   pair stored by me if j is ghost (also stored by proc owning j)
-------------------------------------------------------------------------- */
-
-void NPairHalfBinNewtoff::build(NeighList *list)
-{
-  int i, j, k, n, itype, jtype, ibin, which, imol, iatom, moltemplate;
-  tagint tagprev;
-  double xtmp, ytmp, ztmp, delx, dely, delz, rsq;
-  int *neighptr;
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  if (includegroup) nlocal = atom->nfirst;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE)
-    moltemplate = 1;
-  else
-    moltemplate = 0;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int inum = 0;
-  ipage->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = 0;
-    neighptr = ipage->vget();
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over all atoms in other bins in stencil including self
-    // only store pair if i < j
-    // stores own/own pairs only once
-    // stores own/ghost pairs on both procs
-
-    ibin = atom2bin[i];
-
-    for (k = 0; k < nstencil; k++) {
-      for (j = binhead[ibin + stencil[k]]; j >= 0; j = bins[j]) {
-        if (j <= i) continue;
-
-        jtype = type[j];
-        if (exclude && exclusion(i, j, itype, jtype, mask, molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx * delx + dely * dely + delz * delz;
-
-        if (rsq <= cutneighsq[itype][jtype]) {
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i], nspecial[i], tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom],
-                                   tag[j] - tagprev);
-            else
-              which = 0;
-            if (which == 0)
-              neighptr[n++] = j;
-            else if (domain->minimum_image_check(delx, dely, delz))
-              neighptr[n++] = j;
-            else if (which > 0)
-              neighptr[n++] = j ^ (which << SBBITS);
-            // OLD: if (which >= 0) neighptr[n++] = j ^ (which << SBBITS);
-          } else
-            neighptr[n++] = j;
-        }
-      }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
-  }
-  list->inum = inum;
-}
diff --git a/src/npair_half_bin_newtoff.h b/src/npair_half_bin_newtoff.h
deleted file mode 100644
index ce551b1b95..0000000000
--- a/src/npair_half_bin_newtoff.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/bin/newtoff,
-           NPairHalfBinNewtoff,
-           NP_HALF | NP_BIN | NP_NEWTOFF | NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_BIN_NEWTOFF_H
-#define LMP_NPAIR_HALF_BIN_NEWTOFF_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfBinNewtoff : public NPair {
- public:
-  NPairHalfBinNewtoff(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_half_bin_newtoff_ghost.cpp b/src/npair_half_bin_newtoff_ghost.cpp
deleted file mode 100644
index 6672e8fd5b..0000000000
--- a/src/npair_half_bin_newtoff_ghost.cpp
+++ /dev/null
@@ -1,162 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_bin_newtoff_ghost.h"
-#include "neigh_list.h"
-#include "atom.h"
-#include "atom_vec.h"
-#include "molecule.h"
-#include "domain.h"
-#include "my_page.h"
-#include "error.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfBinNewtoffGhost::NPairHalfBinNewtoffGhost(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction with partial Newton's 3rd law
-   include neighbors of ghost atoms, but no "special neighbors" for ghosts
-   owned and ghost atoms check own bin and other bins in stencil
-   pair stored once if i,j are both owned and i < j
-   pair stored by me if i owned and j ghost (also stored by proc owning j)
-   pair stored once if i,j are both ghost and i < j
-------------------------------------------------------------------------- */
-
-void NPairHalfBinNewtoffGhost::build(NeighList *list)
-{
-  int i,j,k,n,itype,jtype,ibin,which,imol,iatom,moltemplate;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int xbin,ybin,zbin,xbin2,ybin2,zbin2;
-  int *neighptr;
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  int nall = nlocal + atom->nghost;
-  if (includegroup) nlocal = atom->nfirst;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE) moltemplate = 1;
-  else moltemplate = 0;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int inum = 0;
-  ipage->reset();
-
-  for (i = 0; i < nall; i++) {
-    n = 0;
-    neighptr = ipage->vget();
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over all atoms in other bins in stencil including self
-    // when i is a ghost atom, must check if stencil bin is out of bounds
-    // only store pair if i < j
-    // stores own/own pairs only once
-    // stores own/ghost pairs with owned atom only, on both procs
-    // stores ghost/ghost pairs only once
-    // no molecular test when i = ghost atom
-
-    if (i < nlocal) {
-      ibin = atom2bin[i];
-
-      for (k = 0; k < nstencil; k++) {
-        for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
-          if (j <= i) continue;
-
-          jtype = type[j];
-          if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-          delx = xtmp - x[j][0];
-          dely = ytmp - x[j][1];
-          delz = ztmp - x[j][2];
-          rsq = delx*delx + dely*dely + delz*delz;
-
-          if (rsq <= cutneighsq[itype][jtype]) {
-            if (molecular != Atom::ATOMIC) {
-              if (!moltemplate)
-                which = find_special(special[i],nspecial[i],tag[j]);
-              else if (imol >= 0)
-                which = find_special(onemols[imol]->special[iatom],
-                                     onemols[imol]->nspecial[iatom],
-                                     tag[j]-tagprev);
-              else which = 0;
-              if (which == 0) neighptr[n++] = j;
-              else if (domain->minimum_image_check(delx,dely,delz))
-                neighptr[n++] = j;
-              else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-            } else neighptr[n++] = j;
-          }
-        }
-      }
-
-    } else {
-      ibin = coord2bin(x[i],xbin,ybin,zbin);
-      for (k = 0; k < nstencil; k++) {
-        xbin2 = xbin + stencilxyz[k][0];
-        ybin2 = ybin + stencilxyz[k][1];
-        zbin2 = zbin + stencilxyz[k][2];
-        if (xbin2 < 0 || xbin2 >= mbinx ||
-            ybin2 < 0 || ybin2 >= mbiny ||
-            zbin2 < 0 || zbin2 >= mbinz) continue;
-        for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
-          if (j <= i) continue;
-
-          jtype = type[j];
-          if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-          delx = xtmp - x[j][0];
-          dely = ytmp - x[j][1];
-          delz = ztmp - x[j][2];
-          rsq = delx*delx + dely*dely + delz*delz;
-
-          if (rsq <= cutneighghostsq[itype][jtype]) neighptr[n++] = j;
-        }
-      }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = atom->nlocal;
-  list->gnum = inum - atom->nlocal;
-}
diff --git a/src/npair_half_bin_newton.cpp b/src/npair_half_bin_newton.cpp
deleted file mode 100644
index 201b1d2e27..0000000000
--- a/src/npair_half_bin_newton.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_bin_newton.h"
-#include "neigh_list.h"
-#include "atom.h"
-#include "atom_vec.h"
-#include "molecule.h"
-#include "domain.h"
-#include "my_page.h"
-#include "error.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfBinNewton::NPairHalfBinNewton(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction with full Newton's 3rd law
-   each owned atom i checks its own bin and other bins in Newton stencil
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfBinNewton::build(NeighList *list)
-{
-  int i,j,k,n,itype,jtype,ibin,which,imol,iatom,moltemplate;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr;
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  if (includegroup) nlocal = atom->nfirst;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE) moltemplate = 1;
-  else moltemplate = 0;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int inum = 0;
-  ipage->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = 0;
-    neighptr = ipage->vget();
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over rest of atoms in i's bin, ghosts are at end of linked list
-    // if j is owned atom, store it, since j is beyond i in linked list
-    // if j is ghost, only store if j coords are "above and to the right" of i
-
-    for (j = bins[i]; j >= 0; j = bins[j]) {
-      if (j >= nlocal) {
-        if (x[j][2] < ztmp) continue;
-        if (x[j][2] == ztmp) {
-          if (x[j][1] < ytmp) continue;
-          if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
-        }
-      }
-
-      jtype = type[j];
-      if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
-
-      if (rsq <= cutneighsq[itype][jtype]) {
-        if (molecular != Atom::ATOMIC) {
-          if (!moltemplate)
-            which = find_special(special[i],nspecial[i],tag[j]);
-          else if (imol >= 0)
-            which = find_special(onemols[imol]->special[iatom],
-                                 onemols[imol]->nspecial[iatom],
-                                 tag[j]-tagprev);
-          else which = 0;
-          if (which == 0) neighptr[n++] = j;
-          else if (domain->minimum_image_check(delx,dely,delz))
-            neighptr[n++] = j;
-          else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-          // OLD: if (which >= 0) neighptr[n++] = j ^ (which << SBBITS);
-        } else neighptr[n++] = j;
-      }
-    }
-
-    // loop over all atoms in other bins in stencil, store every pair
-
-    ibin = atom2bin[i];
-    for (k = 0; k < nstencil; k++) {
-      for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
-        jtype = type[j];
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-
-        if (rsq <= cutneighsq[itype][jtype]) {
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = j;
-            else if (domain->minimum_image_check(delx,dely,delz))
-              neighptr[n++] = j;
-            else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-            // OLD: if (which >= 0) neighptr[n++] = j ^ (which << SBBITS);
-          } else neighptr[n++] = j;
-        }
-      }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = inum;
-}
diff --git a/src/npair_half_bin_newton.h b/src/npair_half_bin_newton.h
deleted file mode 100644
index bbb4f825a0..0000000000
--- a/src/npair_half_bin_newton.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/bin/newton,
-           NPairHalfBinNewton,
-           NP_HALF | NP_BIN | NP_MOLONLY | NP_NEWTON | NP_ORTHO);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_BIN_NEWTON_H
-#define LMP_NPAIR_HALF_BIN_NEWTON_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfBinNewton : public NPair {
- public:
-  NPairHalfBinNewton(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_half_bin_newton_tri.cpp b/src/npair_half_bin_newton_tri.cpp
deleted file mode 100644
index d261363b0e..0000000000
--- a/src/npair_half_bin_newton_tri.cpp
+++ /dev/null
@@ -1,150 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_bin_newton_tri.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "force.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfBinNewtonTri::NPairHalfBinNewtonTri(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction with Newton's 3rd law for triclinic
-   each owned atom i checks its own bin and other bins in triclinic stencil
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfBinNewtonTri::build(NeighList *list)
-{
-  int i,j,k,n,itype,jtype,ibin,which,imol,iatom,moltemplate;
-  tagint itag,jtag,tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr;
-
-  const double delta = 0.01 * force->angstrom;
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  if (includegroup) nlocal = atom->nfirst;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE) moltemplate = 1;
-  else moltemplate = 0;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int inum = 0;
-  ipage->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = 0;
-    neighptr = ipage->vget();
-
-    itag = tag[i];
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over all atoms in bins in stencil
-    // for triclinic, bin stencil is full in all 3 dims
-    // must use itag/jtag to eliminate half the I/J interactions
-    // cannot use I/J exact coord comparision
-    //   b/c transforming orthog -> lambda -> orthog for ghost atoms
-    //   with an added PBC offset can shift all 3 coords by epsilon
-
-    ibin = atom2bin[i];
-    for (k = 0; k < nstencil; k++) {
-      for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
-
-        if (j <= i) continue;
-        if (j >= nlocal) {
-          jtag = tag[j];
-          if (itag > jtag) {
-            if ((itag+jtag) % 2 == 0) continue;
-          } else if (itag < jtag) {
-            if ((itag+jtag) % 2 == 1) continue;
-          } else {
-            if (fabs(x[j][2]-ztmp) > delta) {
-              if (x[j][2] < ztmp) continue;
-            } else if (fabs(x[j][1]-ytmp) > delta) {
-              if (x[j][1] < ytmp) continue;
-            } else {
-              if (x[j][0] < xtmp) continue;
-            }
-          }
-        }
-
-        jtype = type[j];
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-
-        if (rsq <= cutneighsq[itype][jtype]) {
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = j;
-            else if (domain->minimum_image_check(delx,dely,delz))
-              neighptr[n++] = j;
-            else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-          } else neighptr[n++] = j;
-        }
-      }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = inum;
-}
diff --git a/src/npair_half_bin_newton_tri.h b/src/npair_half_bin_newton_tri.h
deleted file mode 100644
index fbc3703f64..0000000000
--- a/src/npair_half_bin_newton_tri.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/bin/newton/tri,
-           NPairHalfBinNewtonTri,
-           NP_HALF | NP_BIN | NP_NEWTON | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_BIN_NEWTON_TRI_H
-#define LMP_NPAIR_HALF_BIN_NEWTON_TRI_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfBinNewtonTri : public NPair {
- public:
-  NPairHalfBinNewtonTri(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_half_multi_newtoff.cpp b/src/npair_half_multi_newtoff.cpp
deleted file mode 100644
index 5cacd8015d..0000000000
--- a/src/npair_half_multi_newtoff.cpp
+++ /dev/null
@@ -1,146 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_multi_newtoff.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neighbor.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfMultiNewtoff::NPairHalfMultiNewtoff(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction with partial Newton's 3rd law
-   multi stencil is icollection-jcollection dependent
-   each owned atom i checks own bin and other bins in stencil
-   pair stored once if i,j are both owned and i < j
-   pair stored by me if j is ghost (also stored by proc owning j)
-------------------------------------------------------------------------- */
-
-void NPairHalfMultiNewtoff::build(NeighList *list)
-{
-  int i,j,k,n,itype,jtype,icollection,jcollection,ibin,jbin,which,ns,imol,iatom,moltemplate;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr,*s;
-  int js;
-
-  int *collection = neighbor->collection;
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  if (includegroup) nlocal = atom->nfirst;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE) moltemplate = 1;
-  else moltemplate = 0;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int inum = 0;
-  ipage->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = 0;
-    neighptr = ipage->vget();
-    itype = type[i];
-    icollection = collection[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    ibin = atom2bin[i];
-
-    // loop through stencils for all collections
-    for (jcollection = 0; jcollection < ncollections; jcollection++) {
-
-      // if same collection use own bin
-      if (icollection == jcollection) jbin = ibin;
-          else jbin = coord2bin(x[i], jcollection);
-
-      // loop over all atoms in other bins in stencil including self
-      // only store pair if i < j
-      // stores own/own pairs only once
-      // stores own/ghost pairs on both procs
-      // use full stencil for all collection combinations
-
-      s = stencil_multi[icollection][jcollection];
-      ns = nstencil_multi[icollection][jcollection];
-
-      for (k = 0; k < ns; k++) {
-            js = binhead_multi[jcollection][jbin + s[k]];
-            for (j = js; j >= 0; j = bins[j]) {
-              if (j <= i) continue;
-
-          jtype = type[j];
-          if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-              delx = xtmp - x[j][0];
-              dely = ytmp - x[j][1];
-              delz = ztmp - x[j][2];
-          rsq = delx*delx + dely*dely + delz*delz;
-
-          if (rsq <= cutneighsq[itype][jtype]) {
-            if (molecular != Atom::ATOMIC) {
-              if (!moltemplate)
-                which = find_special(special[i],nspecial[i],tag[j]);
-              else if (imol >= 0)
-                which = find_special(onemols[imol]->special[iatom],
-                                     onemols[imol]->nspecial[iatom],
-                                     tag[j]-tagprev);
-              else which = 0;
-              if (which == 0) neighptr[n++] = j;
-              else if (domain->minimum_image_check(delx,dely,delz))
-                neighptr[n++] = j;
-              else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-            } else neighptr[n++] = j;
-          }
-        }
-      }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = inum;
-}
diff --git a/src/npair_half_multi_newtoff.h b/src/npair_half_multi_newtoff.h
deleted file mode 100644
index cfb3ba9e60..0000000000
--- a/src/npair_half_multi_newtoff.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/multi/newtoff,
-           NPairHalfMultiNewtoff,
-           NP_HALF | NP_MULTI | NP_NEWTOFF | NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_MULTI_NEWTOFF_H
-#define LMP_NPAIR_HALF_MULTI_NEWTOFF_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfMultiNewtoff : public NPair {
- public:
-  NPairHalfMultiNewtoff(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_half_multi_newton.cpp b/src/npair_half_multi_newton.cpp
deleted file mode 100644
index dad42ffb7f..0000000000
--- a/src/npair_half_multi_newton.cpp
+++ /dev/null
@@ -1,194 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_multi_newton.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neighbor.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfMultiNewton::NPairHalfMultiNewton(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction with full Newton's 3rd law
-   multi stencil is icollection-jcollection dependent
-   each owned atom i checks its own bin and other bins in Newton stencil
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfMultiNewton::build(NeighList *list)
-{
-  int i,j,k,n,itype,jtype,icollection,jcollection,ibin,jbin,which,ns,imol,iatom,moltemplate;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr,*s;
-  int js;
-
-  int *collection = neighbor->collection;
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  if (includegroup) nlocal = atom->nfirst;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == 2) moltemplate = 1;
-  else moltemplate = 0;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int inum = 0;
-  ipage->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = 0;
-    neighptr = ipage->vget();
-    itype = type[i];
-    icollection = collection[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    ibin = atom2bin[i];
-
-    // loop through stencils for all collections
-    for (jcollection = 0; jcollection < ncollections; jcollection++) {
-
-      // if same collection use own bin
-      if(icollection == jcollection) jbin = ibin;
-          else jbin = coord2bin(x[i], jcollection);
-
-      // if same size: uses half stencil so check central bin
-      if(cutcollectionsq[icollection][icollection] == cutcollectionsq[jcollection][jcollection]){
-
-        if (icollection == jcollection) js = bins[i];
-        else js = binhead_multi[jcollection][jbin];
-
-        // if same collection,
-        //   if j is owned atom, store it, since j is beyond i in linked list
-        //   if j is ghost, only store if j coords are "above and to the right" of i
-
-        // if different collections,
-        //   if j is owned atom, store it if j > i
-        //   if j is ghost, only store if j coords are "above and to the right" of i
-
-            for (j = js; j >= 0; j = bins[j]) {
-          if((icollection != jcollection) && (j < i)) continue;
-
-              if (j >= nlocal) {
-                if (x[j][2] < ztmp) continue;
-                if (x[j][2] == ztmp) {
-                  if (x[j][1] < ytmp) continue;
-                  if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
-                }
-              }
-
-          jtype = type[j];
-          if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-              delx = xtmp - x[j][0];
-              dely = ytmp - x[j][1];
-              delz = ztmp - x[j][2];
-              rsq = delx*delx + dely*dely + delz*delz;
-
-              if (rsq <= cutneighsq[itype][jtype]) {
-                if (molecular) {
-                  if (!moltemplate)
-                    which = find_special(special[i],nspecial[i],tag[j]);
-                  else if (imol >= 0)
-                    which = find_special(onemols[imol]->special[iatom],
-                               onemols[imol]->nspecial[iatom],
-                               tag[j]-tagprev);
-                  else which = 0;
-                  if (which == 0) neighptr[n++] = j;
-                  else if (domain->minimum_image_check(delx,dely,delz))
-                    neighptr[n++] = j;
-                  else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-                } else neighptr[n++] = j;
-              }
-            }
-      }
-
-      // for all collections, loop over all atoms in other bins in stencil, store every pair
-      // stencil is empty if i larger than j
-      // stencil is half if i same size as j
-      // stencil is full if i smaller than j
-
-          s = stencil_multi[icollection][jcollection];
-          ns = nstencil_multi[icollection][jcollection];
-
-          for (k = 0; k < ns; k++) {
-            js = binhead_multi[jcollection][jbin + s[k]];
-            for (j = js; j >= 0; j = bins[j]) {
-
-          jtype = type[j];
-          if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-              delx = xtmp - x[j][0];
-              dely = ytmp - x[j][1];
-              delz = ztmp - x[j][2];
-              rsq = delx*delx + dely*dely + delz*delz;
-
-              if (rsq <= cutneighsq[itype][jtype]) {
-                if (molecular != Atom::ATOMIC) {
-                    if (!moltemplate)
-                      which = find_special(special[i],nspecial[i],tag[j]);
-                    else if (imol >= 0)
-                      which = find_special(onemols[imol]->special[iatom],
-                                       onemols[imol]->nspecial[iatom],
-                                       tag[j]-tagprev);
-                    else which = 0;
-                    if (which == 0) neighptr[n++] = j;
-                    else if (domain->minimum_image_check(delx,dely,delz))
-                      neighptr[n++] = j;
-                    else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-                } else neighptr[n++] = j;
-              }
-            }
-          }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = inum;
-}
diff --git a/src/npair_half_multi_newton_tri.cpp b/src/npair_half_multi_newton_tri.cpp
deleted file mode 100644
index 24300f6929..0000000000
--- a/src/npair_half_multi_newton_tri.cpp
+++ /dev/null
@@ -1,179 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_multi_newton_tri.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "force.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neigh_list.h"
-#include "neighbor.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfMultiNewtonTri::NPairHalfMultiNewtonTri(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction with Newton's 3rd law for triclinic
-   multi stencil is icollection-jcollection dependent
-   each owned atom i checks its own bin and other bins in triclinic stencil
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfMultiNewtonTri::build(NeighList *list)
-{
-  int i,j,k,n,itype,jtype,ibin,jbin,icollection,jcollection,which,ns,imol,iatom,moltemplate;
-  tagint itag,jtag,tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr,*s;
-  int js;
-
-  const double delta = 0.01 * force->angstrom;
-
-  int *collection = neighbor->collection;
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  if (includegroup) nlocal = atom->nfirst;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == 2) moltemplate = 1;
-  else moltemplate = 0;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int inum = 0;
-  ipage->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = 0;
-    neighptr = ipage->vget();
-
-    itag = tag[i];
-    itype = type[i];
-    icollection = collection[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    ibin = atom2bin[i];
-
-    // loop through stencils for all collections
-
-    for (jcollection = 0; jcollection < ncollections; jcollection++) {
-
-      // if same collection use own bin
-
-      if (icollection == jcollection) jbin = ibin;
-      else jbin = coord2bin(x[i], jcollection);
-
-      // loop over all atoms in bins in stencil
-      // for triclinic:
-      //   stencil is empty if i larger than j
-      //   stencil is full if i smaller than j
-      //   stencil is full if i same size as j
-      // for i smaller than j:
-      //   must use itag/jtag to eliminate half the I/J interactions
-      //   cannot use I/J exact coord comparision
-      //     b/c transforming orthog -> lambda -> orthog for ghost atoms
-      //     with an added PBC offset can shift all 3 coords by epsilon
-
-      s = stencil_multi[icollection][jcollection];
-      ns = nstencil_multi[icollection][jcollection];
-
-      for (k = 0; k < ns; k++) {
-        js = binhead_multi[jcollection][jbin + s[k]];
-        for (j = js; j >= 0; j = bins[j]) {
-
-          // if same size (same collection), exclude half of interactions
-
-          if (cutcollectionsq[icollection][icollection] ==
-              cutcollectionsq[jcollection][jcollection]) {
-            if (j <= i) continue;
-            if (j >= nlocal) {
-              jtag = tag[j];
-              if (itag > jtag) {
-                if ((itag+jtag) % 2 == 0) continue;
-              } else if (itag < jtag) {
-                if ((itag+jtag) % 2 == 1) continue;
-              } else {
-                if (fabs(x[j][2]-ztmp) > delta) {
-                  if (x[j][2] < ztmp) continue;
-                } else if (fabs(x[j][1]-ytmp) > delta) {
-                  if (x[j][1] < ytmp) continue;
-                } else {
-                  if (x[j][0] < xtmp) continue;
-                }
-              }
-            }
-          }
-
-          jtype = type[j];
-          if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-          delx = xtmp - x[j][0];
-          dely = ytmp - x[j][1];
-          delz = ztmp - x[j][2];
-          rsq = delx*delx + dely*dely + delz*delz;
-
-          if (rsq <= cutneighsq[itype][jtype]) {
-            if (molecular != Atom::ATOMIC) {
-              if (!moltemplate)
-                which = find_special(special[i],nspecial[i],tag[j]);
-              else if (imol >= 0)
-                which = find_special(onemols[imol]->special[iatom],
-                                     onemols[imol]->nspecial[iatom],
-                                     tag[j]-tagprev);
-              else which = 0;
-              if (which == 0) neighptr[n++] = j;
-              else if (domain->minimum_image_check(delx,dely,delz))
-                neighptr[n++] = j;
-              else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-            } else neighptr[n++] = j;
-          }
-        }
-      }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = inum;
-}
diff --git a/src/npair_half_multi_newton_tri.h b/src/npair_half_multi_newton_tri.h
deleted file mode 100644
index a26087465f..0000000000
--- a/src/npair_half_multi_newton_tri.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/multi/newton/tri,
-           NPairHalfMultiNewtonTri,
-           NP_HALF | NP_MULTI | NP_NEWTON | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_MULTI_NEWTON_TRI_H
-#define LMP_NPAIR_HALF_MULTI_NEWTON_TRI_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfMultiNewtonTri : public NPair {
- public:
-  NPairHalfMultiNewtonTri(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_half_multi_old_newtoff.cpp b/src/npair_half_multi_old_newtoff.cpp
deleted file mode 100644
index 69c9543838..0000000000
--- a/src/npair_half_multi_old_newtoff.cpp
+++ /dev/null
@@ -1,136 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_multi_old_newtoff.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfMultiOldNewtoff::NPairHalfMultiOldNewtoff(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction with partial Newton's 3rd law
-   each owned atom i checks own bin and other bins in stencil
-   multi-type stencil is itype dependent and is distance checked
-   pair stored once if i,j are both owned and i < j
-   pair stored by me if j is ghost (also stored by proc owning j)
-------------------------------------------------------------------------- */
-
-void NPairHalfMultiOldNewtoff::build(NeighList *list)
-{
-  int i,j,k,n,itype,jtype,ibin,which,ns,imol,iatom,moltemplate;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr,*s;
-  double *cutsq,*distsq;
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  if (includegroup) nlocal = atom->nfirst;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE) moltemplate = 1;
-  else moltemplate = 0;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int inum = 0;
-  ipage->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = 0;
-    neighptr = ipage->vget();
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over all atoms in other bins in stencil including self
-    // only store pair if i < j
-    // skip if i,j neighbor cutoff is less than bin distance
-    // stores own/own pairs only once
-    // stores own/ghost pairs on both procs
-
-    ibin = atom2bin[i];
-    s = stencil_multi_old[itype];
-    distsq = distsq_multi_old[itype];
-    cutsq = cutneighsq[itype];
-    ns = nstencil_multi_old[itype];
-    for (k = 0; k < ns; k++) {
-      for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) {
-        if (j <= i) continue;
-        jtype = type[j];
-        if (cutsq[jtype] < distsq[k]) continue;
-
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-
-        if (rsq <= cutneighsq[itype][jtype]) {
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = j;
-            else if (domain->minimum_image_check(delx,dely,delz))
-              neighptr[n++] = j;
-            else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-          } else neighptr[n++] = j;
-        }
-      }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = inum;
-}
diff --git a/src/npair_half_multi_old_newtoff.h b/src/npair_half_multi_old_newtoff.h
deleted file mode 100644
index d81d2d685a..0000000000
--- a/src/npair_half_multi_old_newtoff.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/multi/old/newtoff,
-           NPairHalfMultiOldNewtoff,
-           NP_HALF | NP_MULTI_OLD | NP_NEWTOFF | NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_MULTI_OLD_NEWTOFF_H
-#define LMP_NPAIR_HALF_MULTI_OLD_NEWTOFF_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfMultiOldNewtoff : public NPair {
- public:
-  NPairHalfMultiOldNewtoff(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_half_multi_old_newton.cpp b/src/npair_half_multi_old_newton.cpp
deleted file mode 100644
index e9556d50ff..0000000000
--- a/src/npair_half_multi_old_newton.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_multi_old_newton.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfMultiOldNewton::NPairHalfMultiOldNewton(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction with full Newton's 3rd law
-   each owned atom i checks its own bin and other bins in Newton stencil
-   multi-type stencil is itype dependent and is distance checked
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfMultiOldNewton::build(NeighList *list)
-{
-  int i,j,k,n,itype,jtype,ibin,which,ns,imol,iatom,moltemplate;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr,*s;
-  double *cutsq,*distsq;
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  if (includegroup) nlocal = atom->nfirst;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE) moltemplate = 1;
-  else moltemplate = 0;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int inum = 0;
-  ipage->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = 0;
-    neighptr = ipage->vget();
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over rest of atoms in i's bin, ghosts are at end of linked list
-    // if j is owned atom, store it, since j is beyond i in linked list
-    // if j is ghost, only store if j coords are "above and to the right" of i
-
-    for (j = bins[i]; j >= 0; j = bins[j]) {
-      if (j >= nlocal) {
-        if (x[j][2] < ztmp) continue;
-        if (x[j][2] == ztmp) {
-          if (x[j][1] < ytmp) continue;
-          if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
-        }
-      }
-
-      jtype = type[j];
-      if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
-
-      if (rsq <= cutneighsq[itype][jtype]) {
-        if (molecular != Atom::ATOMIC) {
-          if (!moltemplate)
-            which = find_special(special[i],nspecial[i],tag[j]);
-          else if (imol >= 0)
-            which = find_special(onemols[imol]->special[iatom],
-                                 onemols[imol]->nspecial[iatom],
-                                 tag[j]-tagprev);
-          else which = 0;
-          if (which == 0) neighptr[n++] = j;
-          else if (domain->minimum_image_check(delx,dely,delz))
-            neighptr[n++] = j;
-          else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-        } else neighptr[n++] = j;
-      }
-    }
-
-    // loop over all atoms in other bins in stencil, store every pair
-    // skip if i,j neighbor cutoff is less than bin distance
-
-    ibin = atom2bin[i];
-    s = stencil_multi_old[itype];
-    distsq = distsq_multi_old[itype];
-    cutsq = cutneighsq[itype];
-    ns = nstencil_multi_old[itype];
-    for (k = 0; k < ns; k++) {
-      for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) {
-        jtype = type[j];
-        if (cutsq[jtype] < distsq[k]) continue;
-
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-
-        if (rsq <= cutneighsq[itype][jtype]) {
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = j;
-            else if (domain->minimum_image_check(delx,dely,delz))
-              neighptr[n++] = j;
-            else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-          } else neighptr[n++] = j;
-        }
-      }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = inum;
-}
diff --git a/src/npair_half_multi_old_newton.h b/src/npair_half_multi_old_newton.h
deleted file mode 100644
index 1b8bff5080..0000000000
--- a/src/npair_half_multi_old_newton.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/multi/old/newton,
-           NPairHalfMultiOldNewton,
-           NP_HALF | NP_MULTI_OLD | NP_NEWTON | NP_ORTHO);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_MULTI_OLD_NEWTON_H
-#define LMP_NPAIR_HALF_MULTI_OLD_NEWTON_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfMultiOldNewton : public NPair {
- public:
-  NPairHalfMultiOldNewton(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_half_multi_old_newton_tri.cpp b/src/npair_half_multi_old_newton_tri.cpp
deleted file mode 100644
index ce3149ebf5..0000000000
--- a/src/npair_half_multi_old_newton_tri.cpp
+++ /dev/null
@@ -1,157 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_multi_old_newton_tri.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "force.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfMultiOldNewtonTri::NPairHalfMultiOldNewtonTri(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction with Newton's 3rd law for triclinic
-   each owned atom i checks its own bin and other bins in triclinic stencil
-   multi-type stencil is itype dependent and is distance checked
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfMultiOldNewtonTri::build(NeighList *list)
-{
-  int i,j,k,n,itype,jtype,ibin,which,ns,imol,iatom,moltemplate;
-  tagint itag,jtag,tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr,*s;
-  double *cutsq,*distsq;
-
-  const double delta = 0.01 * force->angstrom;
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  if (includegroup) nlocal = atom->nfirst;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE) moltemplate = 1;
-  else moltemplate = 0;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int inum = 0;
-  ipage->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = 0;
-    neighptr = ipage->vget();
-
-    itag = tag[i];
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over all atoms in bins in stencil
-    // for triclinic, bin stencil is full in all 3 dims
-    // must use itag/jtag to eliminate half the I/J interactions
-    // cannot use I/J exact coord comparision
-    //   b/c transforming orthog -> lambda -> orthog for ghost atoms
-    //   with an added PBC offset can shift all 3 coords by epsilon
-
-    ibin = atom2bin[i];
-    s = stencil_multi_old[itype];
-    distsq = distsq_multi_old[itype];
-    cutsq = cutneighsq[itype];
-    ns = nstencil_multi_old[itype];
-    for (k = 0; k < ns; k++) {
-      for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) {
-        jtype = type[j];
-        if (cutsq[jtype] < distsq[k]) continue;
-
-        if (j <= i) continue;
-        if (j >= nlocal) {
-          jtag = tag[j];
-          if (itag > jtag) {
-            if ((itag+jtag) % 2 == 0) continue;
-          } else if (itag < jtag) {
-            if ((itag+jtag) % 2 == 1) continue;
-          } else {
-            if (fabs(x[j][2]-ztmp) > delta) {
-              if (x[j][2] < ztmp) continue;
-            } else if (fabs(x[j][1]-ytmp) > delta) {
-              if (x[j][1] < ytmp) continue;
-            } else {
-              if (x[j][0] < xtmp) continue;
-            }
-          }
-        }
-
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-
-        if (rsq <= cutneighsq[itype][jtype]) {
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = j;
-            else if (domain->minimum_image_check(delx,dely,delz))
-              neighptr[n++] = j;
-            else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-          } else neighptr[n++] = j;
-        }
-      }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = inum;
-}
diff --git a/src/npair_half_multi_old_newton_tri.h b/src/npair_half_multi_old_newton_tri.h
deleted file mode 100644
index bb15724074..0000000000
--- a/src/npair_half_multi_old_newton_tri.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/multi/old/newton/tri,
-           NPairHalfMultiOldNewtonTri,
-           NP_HALF | NP_MULTI_OLD | NP_NEWTON | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_MULTI_OLD_NEWTON_TRI_H
-#define LMP_NPAIR_HALF_MULTI_OLD_NEWTON_TRI_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfMultiOldNewtonTri : public NPair {
- public:
-  NPairHalfMultiOldNewtonTri(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_half_nsq_newtoff.cpp b/src/npair_half_nsq_newtoff.cpp
deleted file mode 100644
index 8cc79b7013..0000000000
--- a/src/npair_half_nsq_newtoff.cpp
+++ /dev/null
@@ -1,128 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_nsq_newtoff.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "group.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfNsqNewtoff::NPairHalfNsqNewtoff(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   N^2 / 2 search for neighbor pairs with partial Newton's 3rd law
-   pair stored once if i,j are both owned and i < j
-   pair stored by me if j is ghost (also stored by proc owning j)
-------------------------------------------------------------------------- */
-
-void NPairHalfNsqNewtoff::build(NeighList *list)
-{
-  int i, j, n, itype, jtype, which, bitmask, imol, iatom, moltemplate;
-  tagint tagprev;
-  double xtmp, ytmp, ztmp, delx, dely, delz, rsq;
-  int *neighptr;
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  int nall = nlocal + atom->nghost;
-  if (includegroup) {
-    nlocal = atom->nfirst;
-    bitmask = group->bitmask[includegroup];
-  }
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE)
-    moltemplate = 1;
-  else
-    moltemplate = 0;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int inum = 0;
-  ipage->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = 0;
-    neighptr = ipage->vget();
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over remaining atoms, owned and ghost
-    // only store pair if i < j
-
-    for (j = i + 1; j < nall; j++) {
-      if (includegroup && !(mask[j] & bitmask)) continue;
-      jtype = type[j];
-      if (exclude && exclusion(i, j, itype, jtype, mask, molecule)) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx * delx + dely * dely + delz * delz;
-
-      if (rsq <= cutneighsq[itype][jtype]) {
-        if (molecular != Atom::ATOMIC) {
-          if (!moltemplate)
-            which = find_special(special[i], nspecial[i], tag[j]);
-          else if (imol >= 0)
-            which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom],
-                                 tag[j] - tagprev);
-          else
-            which = 0;
-          if (which == 0)
-            neighptr[n++] = j;
-          else if (domain->minimum_image_check(delx, dely, delz))
-            neighptr[n++] = j;
-          else if (which > 0)
-            neighptr[n++] = j ^ (which << SBBITS);
-        } else
-          neighptr[n++] = j;
-      }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
-  }
-  list->inum = inum;
-}
diff --git a/src/npair_half_nsq_newtoff.h b/src/npair_half_nsq_newtoff.h
deleted file mode 100644
index 157d9c3835..0000000000
--- a/src/npair_half_nsq_newtoff.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/nsq/newtoff,
-           NPairHalfNsqNewtoff,
-           NP_HALF | NP_NSQ | NP_NEWTOFF | NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_NSQ_NEWTOFF_H
-#define LMP_NPAIR_HALF_NSQ_NEWTOFF_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfNsqNewtoff : public NPair {
- public:
-  NPairHalfNsqNewtoff(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_half_nsq_newton.cpp b/src/npair_half_nsq_newton.cpp
deleted file mode 100644
index 4d5afbdd3e..0000000000
--- a/src/npair_half_nsq_newton.cpp
+++ /dev/null
@@ -1,160 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_nsq_newton.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "force.h"
-#include "group.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfNsqNewton::NPairHalfNsqNewton(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   N^2 / 2 search for neighbor pairs with full Newton's 3rd law
-   every pair stored exactly once by some processor
-   decision on ghost atoms based on itag,jtag tests
-------------------------------------------------------------------------- */
-
-void NPairHalfNsqNewton::build(NeighList *list)
-{
-  int i,j,n,itype,jtype,which,bitmask,imol,iatom,moltemplate;
-  tagint itag,jtag,tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr;
-
-  const double delta = 0.01 * force->angstrom;
-  const int triclinic = domain->triclinic;
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  int nall = nlocal + atom->nghost;
-  if (includegroup) {
-    nlocal = atom->nfirst;
-    bitmask = group->bitmask[includegroup];
-  }
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE) moltemplate = 1;
-  else moltemplate = 0;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int inum = 0;
-  ipage->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = 0;
-    neighptr = ipage->vget();
-
-    itag = tag[i];
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over remaining atoms, owned and ghost
-    // use itag/jtap comparision to eliminate half the interactions
-    // itag = jtag is possible for long cutoffs that include images of self
-    // for triclinic, must use delta to eliminate half the I/J interactions
-    // cannot use I/J exact coord comparision as for orthog
-    //   b/c transforming orthog -> lambda -> orthog for ghost atoms
-    //   with an added PBC offset can shift all 3 coords by epsilon
-
-    for (j = i+1; j < nall; j++) {
-      if (includegroup && !(mask[j] & bitmask)) continue;
-
-      if (j >= nlocal) {
-        jtag = tag[j];
-        if (itag > jtag) {
-          if ((itag+jtag) % 2 == 0) continue;
-        } else if (itag < jtag) {
-          if ((itag+jtag) % 2 == 1) continue;
-        } else if (triclinic) {
-          if (fabs(x[j][2]-ztmp) > delta) {
-            if (x[j][2] < ztmp) continue;
-          } else if (fabs(x[j][1]-ytmp) > delta) {
-            if (x[j][1] < ytmp) continue;
-          } else {
-            if (x[j][0] < xtmp) continue;
-          }
-        } else {
-          if (x[j][2] < ztmp) continue;
-          if (x[j][2] == ztmp) {
-            if (x[j][1] < ytmp) continue;
-            if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
-          }
-        }
-      }
-
-      jtype = type[j];
-      if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
-
-      if (rsq <= cutneighsq[itype][jtype]) {
-        if (molecular != Atom::ATOMIC) {
-          if (!moltemplate)
-            which = find_special(special[i],nspecial[i],tag[j]);
-          else if (imol >= 0)
-            which = find_special(onemols[imol]->special[iatom],
-                                 onemols[imol]->nspecial[iatom],
-                                 tag[j]-tagprev);
-          else which = 0;
-          if (which == 0) neighptr[n++] = j;
-          else if (domain->minimum_image_check(delx,dely,delz))
-            neighptr[n++] = j;
-          else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-        } else neighptr[n++] = j;
-      }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = inum;
-}
diff --git a/src/npair_half_nsq_newton.h b/src/npair_half_nsq_newton.h
deleted file mode 100644
index 8f6952ff01..0000000000
--- a/src/npair_half_nsq_newton.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/nsq/newton,
-           NPairHalfNsqNewton,
-           NP_HALF | NP_NSQ | NP_NEWTON | NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_NSQ_NEWTON_H
-#define LMP_NPAIR_HALF_NSQ_NEWTON_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfNsqNewton : public NPair {
- public:
-  NPairHalfNsqNewton(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_half_respa_bin_newtoff.cpp b/src/npair_half_respa_bin_newtoff.cpp
deleted file mode 100644
index 11d9e916e7..0000000000
--- a/src/npair_half_respa_bin_newtoff.cpp
+++ /dev/null
@@ -1,187 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_respa_bin_newtoff.h"
-#include "neigh_list.h"
-#include "atom.h"
-#include "atom_vec.h"
-#include "molecule.h"
-#include "domain.h"
-#include "my_page.h"
-#include "error.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfRespaBinNewtoff::NPairHalfRespaBinNewtoff(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   multiple respa lists
-   binned neighbor list construction with partial Newton's 3rd law
-   each owned atom i checks own bin and surrounding bins in non-Newton stencil
-   pair stored once if i,j are both owned and i < j
-   pair stored by me if j is ghost (also stored by proc owning j)
-------------------------------------------------------------------------- */
-
-void NPairHalfRespaBinNewtoff::build(NeighList *list)
-{
-  int i,j,k,n,itype,jtype,ibin,n_inner,n_middle,imol,iatom,moltemplate;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr,*neighptr_inner,*neighptr_middle;
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  if (includegroup) nlocal = atom->nfirst;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE) moltemplate = 1;
-  else moltemplate = 0;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int *ilist_inner = list->ilist_inner;
-  int *numneigh_inner = list->numneigh_inner;
-  int **firstneigh_inner = list->firstneigh_inner;
-  MyPage<int> *ipage_inner = list->ipage_inner;
-
-  int *ilist_middle,*numneigh_middle,**firstneigh_middle;
-  MyPage<int> *ipage_middle;
-  int respamiddle = list->respamiddle;
-  if (respamiddle) {
-    ilist_middle = list->ilist_middle;
-    numneigh_middle = list->numneigh_middle;
-    firstneigh_middle = list->firstneigh_middle;
-    ipage_middle = list->ipage_middle;
-  }
-
-  int inum = 0;
-  int which = 0;
-  int minchange = 0;
-  ipage->reset();
-  ipage_inner->reset();
-  if (respamiddle) ipage_middle->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = n_inner = 0;
-    neighptr = ipage->vget();
-    neighptr_inner = ipage_inner->vget();
-    if (respamiddle) {
-      n_middle = 0;
-      neighptr_middle = ipage_middle->vget();
-    }
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    ibin = atom2bin[i];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over all atoms in surrounding bins in stencil including self
-    // only store pair if i < j
-    // stores own/own pairs only once
-    // stores own/ghost pairs on both procs
-
-    for (k = 0; k < nstencil; k++) {
-      for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
-        if (j <= i) continue;
-
-        jtype = type[j];
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-
-        if (rsq <= cutneighsq[itype][jtype]) {
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = j;
-            else if ((minchange = domain->minimum_image_check(delx,dely,delz)))
-              neighptr[n++] = j;
-            else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-          } else neighptr[n++] = j;
-
-          if (rsq < cut_inner_sq) {
-            if (which == 0) neighptr_inner[n_inner++] = j;
-            else if (minchange) neighptr_inner[n_inner++] = j;
-            else if (which > 0)
-              neighptr_inner[n_inner++] = j ^ (which << SBBITS);
-          }
-
-          if (respamiddle &&
-              rsq < cut_middle_sq && rsq > cut_middle_inside_sq) {
-            if (which == 0) neighptr_middle[n_middle++] = j;
-            else if (minchange) neighptr_middle[n_middle++] = j;
-            else if (which > 0)
-              neighptr_middle[n_middle++] = j ^ (which << SBBITS);
-          }
-        }
-      }
-    }
-
-    ilist[inum] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-
-    ilist_inner[inum] = i;
-    firstneigh_inner[i] = neighptr_inner;
-    numneigh_inner[i] = n_inner;
-    ipage_inner->vgot(n_inner);
-    if (ipage_inner->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-
-    if (respamiddle) {
-      ilist_middle[inum] = i;
-      firstneigh_middle[i] = neighptr_middle;
-      numneigh_middle[i] = n_middle;
-      ipage_middle->vgot(n_middle);
-      if (ipage_middle->status())
-        error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-    }
-
-    inum++;
-  }
-
-  list->inum = inum;
-  list->inum_inner = inum;
-  if (respamiddle) list->inum_middle = inum;
-}
diff --git a/src/npair_half_respa_bin_newton.cpp b/src/npair_half_respa_bin_newton.cpp
deleted file mode 100644
index 6f829660bd..0000000000
--- a/src/npair_half_respa_bin_newton.cpp
+++ /dev/null
@@ -1,233 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_respa_bin_newton.h"
-#include "neigh_list.h"
-#include "atom.h"
-#include "atom_vec.h"
-#include "molecule.h"
-#include "domain.h"
-#include "my_page.h"
-#include "error.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfRespaBinNewton::NPairHalfRespaBinNewton(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   multiple respa lists
-   binned neighbor list construction with full Newton's 3rd law
-   each owned atom i checks its own bin and other bins in Newton stencil
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfRespaBinNewton::build(NeighList *list)
-{
-  int i,j,k,n,itype,jtype,ibin,n_inner,n_middle,imol,iatom,moltemplate;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr,*neighptr_inner,*neighptr_middle;
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  if (includegroup) nlocal = atom->nfirst;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE) moltemplate = 1;
-  else moltemplate = 0;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int *ilist_inner = list->ilist_inner;
-  int *numneigh_inner = list->numneigh_inner;
-  int **firstneigh_inner = list->firstneigh_inner;
-  MyPage<int> *ipage_inner = list->ipage_inner;
-
-  int *ilist_middle,*numneigh_middle,**firstneigh_middle;
-  MyPage<int> *ipage_middle;
-  int respamiddle = list->respamiddle;
-  if (respamiddle) {
-    ilist_middle = list->ilist_middle;
-    numneigh_middle = list->numneigh_middle;
-    firstneigh_middle = list->firstneigh_middle;
-    ipage_middle = list->ipage_middle;
-  }
-
-  int inum = 0;
-  int which = 0;
-  int minchange = 0;
-  ipage->reset();
-  ipage_inner->reset();
-  if (respamiddle) ipage_middle->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = n_inner = 0;
-    neighptr = ipage->vget();
-    neighptr_inner = ipage_inner->vget();
-    if (respamiddle) {
-      n_middle = 0;
-      neighptr_middle = ipage_middle->vget();
-    }
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over rest of atoms in i's bin, ghosts are at end of linked list
-    // if j is owned atom, store it, since j is beyond i in linked list
-    // if j is ghost, only store if j coords are "above and to the right" of i
-
-    for (j = bins[i]; j >= 0; j = bins[j]) {
-      if (j >= nlocal) {
-        if (x[j][2] < ztmp) continue;
-        if (x[j][2] == ztmp) {
-          if (x[j][1] < ytmp) continue;
-          if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
-        }
-      }
-
-      jtype = type[j];
-      if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
-
-      if (rsq <= cutneighsq[itype][jtype]) {
-        if (molecular != Atom::ATOMIC) {
-          if (!moltemplate)
-            which = find_special(special[i],nspecial[i],tag[j]);
-          else if (imol >= 0)
-            which = find_special(onemols[imol]->special[iatom],
-                                 onemols[imol]->nspecial[iatom],
-                                 tag[j]-tagprev);
-          else which = 0;
-          if (which == 0) neighptr[n++] = j;
-          else if ((minchange = domain->minimum_image_check(delx,dely,delz)))
-            neighptr[n++] = j;
-          else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-        } else neighptr[n++] = j;
-
-        if (rsq < cut_inner_sq) {
-          if (which == 0) neighptr_inner[n_inner++] = j;
-          else if (minchange) neighptr_inner[n_inner++] = j;
-          else if (which > 0) neighptr_inner[n_inner++] = j ^ (which << SBBITS);
-        }
-
-        if (respamiddle &&
-            rsq < cut_middle_sq && rsq > cut_middle_inside_sq) {
-          if (which == 0) neighptr_middle[n_middle++] = j;
-          else if (minchange) neighptr_middle[n_middle++] = j;
-          else if (which > 0)
-            neighptr_middle[n_middle++] = j ^ (which << SBBITS);
-        }
-      }
-    }
-
-    // loop over all atoms in other bins in stencil, store every pair
-
-    ibin = atom2bin[i];
-    for (k = 0; k < nstencil; k++) {
-      for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
-        jtype = type[j];
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-
-        if (rsq <= cutneighsq[itype][jtype]) {
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = j;
-            else if ((minchange = domain->minimum_image_check(delx,dely,delz)))
-              neighptr[n++] = j;
-            else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-          } else neighptr[n++] = j;
-
-          if (rsq < cut_inner_sq) {
-            if (which == 0) neighptr_inner[n_inner++] = j;
-            else if (minchange) neighptr_inner[n_inner++] = j;
-            else if (which > 0)
-              neighptr_inner[n_inner++] = j ^ (which << SBBITS);
-          }
-
-          if (respamiddle &&
-              rsq < cut_middle_sq && rsq > cut_middle_inside_sq) {
-            if (which == 0) neighptr_middle[n_middle++] = j;
-            else if (minchange) neighptr_middle[n_middle++] = j;
-            else if (which > 0)
-              neighptr_middle[n_middle++] = j ^ (which << SBBITS);
-          }
-        }
-      }
-    }
-
-    ilist[inum] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-
-    ilist_inner[inum] = i;
-    firstneigh_inner[i] = neighptr_inner;
-    numneigh_inner[i] = n_inner;
-    ipage_inner->vgot(n_inner);
-    if (ipage_inner->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-
-    if (respamiddle) {
-      ilist_middle[inum] = i;
-      firstneigh_middle[i] = neighptr_middle;
-      numneigh_middle[i] = n_middle;
-      ipage_middle->vgot(n_middle);
-      if (ipage_middle->status())
-        error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-    }
-
-    inum++;
-  }
-
-  list->inum = inum;
-  list->inum_inner = inum;
-  if (respamiddle) list->inum_middle = inum;
-}
diff --git a/src/npair_half_respa_bin_newton.h b/src/npair_half_respa_bin_newton.h
deleted file mode 100644
index 2cd68446a8..0000000000
--- a/src/npair_half_respa_bin_newton.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/respa/bin/newton,
-           NPairHalfRespaBinNewton,
-           NP_HALF | NP_RESPA | NP_BIN | NP_NEWTON | NP_ORTHO);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_RESPA_BIN_NEWTON_H
-#define LMP_NPAIR_HALF_RESPA_BIN_NEWTON_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfRespaBinNewton : public NPair {
- public:
-  NPairHalfRespaBinNewton(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_half_respa_bin_newton_tri.cpp b/src/npair_half_respa_bin_newton_tri.cpp
deleted file mode 100644
index 4cd4ead0fa..0000000000
--- a/src/npair_half_respa_bin_newton_tri.cpp
+++ /dev/null
@@ -1,211 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_respa_bin_newton_tri.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "force.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfRespaBinNewtonTri::NPairHalfRespaBinNewtonTri(LAMMPS *lmp) :
-  NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   multiple respa lists
-   binned neighbor list construction with Newton's 3rd law for triclinic
-   each owned atom i checks its own bin and other bins in triclinic stencil
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfRespaBinNewtonTri::build(NeighList *list)
-{
-  int i,j,k,n,itype,jtype,ibin,n_inner,n_middle,imol,iatom,moltemplate;
-  tagint itag,jtag,tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr,*neighptr_inner,*neighptr_middle;
-
-  const double delta = 0.01 * force->angstrom;
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  if (includegroup) nlocal = atom->nfirst;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE) moltemplate = 1;
-  else moltemplate = 0;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int *ilist_inner = list->ilist_inner;
-  int *numneigh_inner = list->numneigh_inner;
-  int **firstneigh_inner = list->firstneigh_inner;
-  MyPage<int> *ipage_inner = list->ipage_inner;
-
-  int *ilist_middle,*numneigh_middle,**firstneigh_middle;
-  MyPage<int> *ipage_middle;
-  int respamiddle = list->respamiddle;
-  if (respamiddle) {
-    ilist_middle = list->ilist_middle;
-    numneigh_middle = list->numneigh_middle;
-    firstneigh_middle = list->firstneigh_middle;
-    ipage_middle = list->ipage_middle;
-  }
-
-  int inum = 0;
-  int which = 0;
-  int minchange = 0;
-  ipage->reset();
-  ipage_inner->reset();
-  if (respamiddle) ipage_middle->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = n_inner = 0;
-    neighptr = ipage->vget();
-    neighptr_inner = ipage_inner->vget();
-    if (respamiddle) {
-      n_middle = 0;
-      neighptr_middle = ipage_middle->vget();
-    }
-
-    itag = tag[i];
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over all atoms in bins in stencil
-    // for triclinic, bin stencil is full in all 3 dims
-    // must use itag/jtag to eliminate half the I/J interactions
-    // cannot use I/J exact coord comparision
-    //   b/c transforming orthog -> lambda -> orthog for ghost atoms
-    //   with an added PBC offset can shift all 3 coords by epsilon
-
-    ibin = atom2bin[i];
-    for (k = 0; k < nstencil; k++) {
-      for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
-
-        if (j <= i) continue;
-        if (j >= nlocal) {
-          jtag = tag[j];
-          if (itag > jtag) {
-            if ((itag+jtag) % 2 == 0) continue;
-          } else if (itag < jtag) {
-            if ((itag+jtag) % 2 == 1) continue;
-          } else {
-            if (fabs(x[j][2]-ztmp) > delta) {
-              if (x[j][2] < ztmp) continue;
-            } else if (fabs(x[j][1]-ytmp) > delta) {
-              if (x[j][1] < ytmp) continue;
-            } else {
-              if (x[j][0] < xtmp) continue;
-            }
-          }
-        }
-
-        jtype = type[j];
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-
-        if (rsq <= cutneighsq[itype][jtype]) {
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = j;
-            else if ((minchange = domain->minimum_image_check(delx,dely,delz)))
-              neighptr[n++] = j;
-            else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-          } else neighptr[n++] = j;
-
-          if (rsq < cut_inner_sq) {
-            if (which == 0) neighptr_inner[n_inner++] = j;
-            else if (minchange) neighptr_inner[n_inner++] = j;
-            else if (which > 0)
-              neighptr_inner[n_inner++] = j ^ (which << SBBITS);
-          }
-
-          if (respamiddle &&
-              rsq < cut_middle_sq && rsq > cut_middle_inside_sq) {
-            if (which == 0) neighptr_middle[n_middle++] = j;
-            else if (minchange) neighptr_middle[n_middle++] = j;
-            else if (which > 0)
-              neighptr_middle[n_middle++] = j ^ (which << SBBITS);
-          }
-        }
-      }
-    }
-
-    ilist[inum] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-
-    ilist_inner[inum] = i;
-    firstneigh_inner[i] = neighptr_inner;
-    numneigh_inner[i] = n_inner;
-    ipage_inner->vgot(n_inner);
-    if (ipage_inner->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-
-    if (respamiddle) {
-      ilist_middle[inum] = i;
-      firstneigh_middle[i] = neighptr_middle;
-      numneigh_middle[i] = n_middle;
-      ipage_middle->vgot(n_middle);
-      if (ipage_middle->status())
-        error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-    }
-
-    inum++;
-  }
-
-  list->inum = inum;
-  list->inum_inner = inum;
-  if (respamiddle) list->inum_middle = inum;
-}
diff --git a/src/npair_half_respa_bin_newton_tri.h b/src/npair_half_respa_bin_newton_tri.h
deleted file mode 100644
index 68289c2d37..0000000000
--- a/src/npair_half_respa_bin_newton_tri.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/respa/bin/newton/tri,
-           NPairHalfRespaBinNewtonTri,
-           NP_HALF | NP_RESPA | NP_BIN | NP_NEWTON | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_RESPA_BIN_NEWTON_TRI_H
-#define LMP_NPAIR_HALF_RESPA_BIN_NEWTON_TRI_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfRespaBinNewtonTri : public NPair {
- public:
-  NPairHalfRespaBinNewtonTri(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_half_respa_nsq_newtoff.cpp b/src/npair_half_respa_nsq_newtoff.cpp
deleted file mode 100644
index 7bbd9dbece..0000000000
--- a/src/npair_half_respa_nsq_newtoff.cpp
+++ /dev/null
@@ -1,182 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_respa_nsq_newtoff.h"
-#include "neigh_list.h"
-#include "atom.h"
-#include "atom_vec.h"
-#include "group.h"
-#include "molecule.h"
-#include "domain.h"
-#include "my_page.h"
-#include "error.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfRespaNsqNewtoff::NPairHalfRespaNsqNewtoff(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   multiple respa lists
-   N^2 / 2 search for neighbor pairs with partial Newton's 3rd law
-   pair added to list if atoms i and j are both owned and i < j
-   pair added if j is ghost (also stored by proc owning j)
-------------------------------------------------------------------------- */
-
-void NPairHalfRespaNsqNewtoff::build(NeighList *list)
-{
-  int i,j,n,itype,jtype,n_inner,n_middle,bitmask,imol,iatom,moltemplate;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr,*neighptr_inner,*neighptr_middle;
-
-  double **x = atom->x;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  int nall = nlocal + atom->nghost;
-  if (includegroup) {
-    nlocal = atom->nfirst;
-    bitmask = group->bitmask[includegroup];
-  }
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE) moltemplate = 1;
-  else moltemplate = 0;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int *ilist_inner = list->ilist_inner;
-  int *numneigh_inner = list->numneigh_inner;
-  int **firstneigh_inner = list->firstneigh_inner;
-  MyPage<int> *ipage_inner = list->ipage_inner;
-
-  int *ilist_middle,*numneigh_middle,**firstneigh_middle;
-  MyPage<int> *ipage_middle;
-  int respamiddle = list->respamiddle;
-  if (respamiddle) {
-    ilist_middle = list->ilist_middle;
-    numneigh_middle = list->numneigh_middle;
-    firstneigh_middle = list->firstneigh_middle;
-    ipage_middle = list->ipage_middle;
-  }
-
-  int inum = 0;
-  int which = 0;
-  int minchange = 0;
-  ipage->reset();
-  ipage_inner->reset();
-  if (respamiddle) ipage_middle->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = n_inner = 0;
-    neighptr = ipage->vget();
-    neighptr_inner = ipage_inner->vget();
-    if (respamiddle) {
-      n_middle = 0;
-      neighptr_middle = ipage_middle->vget();
-    }
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over remaining atoms, owned and ghost
-
-    for (j = i+1; j < nall; j++) {
-      if (includegroup && !(mask[j] & bitmask)) continue;
-      jtype = type[j];
-      if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
-
-      if (rsq <= cutneighsq[itype][jtype]) {
-        if (molecular != Atom::ATOMIC) {
-          if (!moltemplate)
-            which = find_special(special[i],nspecial[i],tag[j]);
-          else if (imol >= 0)
-            which = find_special(onemols[imol]->special[iatom],
-                                 onemols[imol]->nspecial[iatom],
-                                 tag[j]-tagprev);
-          else which = 0;
-          if (which == 0) neighptr[n++] = j;
-          else if ((minchange = domain->minimum_image_check(delx,dely,delz)))
-            neighptr[n++] = j;
-          else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-        } else neighptr[n++] = j;
-
-        if (rsq < cut_inner_sq) {
-          if (which == 0) neighptr_inner[n_inner++] = j;
-          else if (minchange) neighptr_inner[n_inner++] = j;
-          else if (which > 0) neighptr_inner[n_inner++] = j ^ (which << SBBITS);
-        }
-
-        if (respamiddle && rsq < cut_middle_sq && rsq > cut_middle_inside_sq) {
-          if (which == 0) neighptr_middle[n_middle++] = j;
-          else if (minchange) neighptr_middle[n_middle++] = j;
-          else if (which > 0)
-            neighptr_middle[n_middle++] = j ^ (which << SBBITS);
-        }
-      }
-    }
-
-    ilist[inum] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-
-    ilist_inner[inum] = i;
-    firstneigh_inner[i] = neighptr_inner;
-    numneigh_inner[i] = n_inner;
-    ipage_inner->vgot(n_inner);
-    if (ipage_inner->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-
-    if (respamiddle) {
-      ilist_middle[inum] = i;
-      firstneigh_middle[i] = neighptr_middle;
-      numneigh_middle[i] = n_middle;
-      ipage_middle->vgot(n_middle);
-      if (ipage_middle->status())
-        error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-    }
-
-    inum++;
-  }
-
-  list->inum = inum;
-  list->inum_inner = inum;
-  if (respamiddle) list->inum_middle = inum;
-}
diff --git a/src/npair_half_respa_nsq_newton.h b/src/npair_half_respa_nsq_newton.h
deleted file mode 100644
index 4a5ae23aef..0000000000
--- a/src/npair_half_respa_nsq_newton.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/respa/nsq/newton,
-           NPairHalfRespaNsqNewton,
-           NP_HALF | NP_RESPA | NP_NSQ | NP_NEWTON | NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_RESPA_NSQ_NEWTON_H
-#define LMP_NPAIR_HALF_RESPA_NSQ_NEWTON_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfRespaNsqNewton : public NPair {
- public:
-  NPairHalfRespaNsqNewton(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_half_size_bin_newtoff.cpp b/src/npair_half_size_bin_newtoff.cpp
deleted file mode 100644
index b4842337ca..0000000000
--- a/src/npair_half_size_bin_newtoff.cpp
+++ /dev/null
@@ -1,138 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_size_bin_newtoff.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfSizeBinNewtoff::NPairHalfSizeBinNewtoff(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   size particles
-   binned neighbor list construction with partial Newton's 3rd law
-   each owned atom i checks own bin and surrounding bins in non-Newton stencil
-   pair stored once if i,j are both owned and i < j
-   pair stored by me if j is ghost (also stored by proc owning j)
-------------------------------------------------------------------------- */
-
-void NPairHalfSizeBinNewtoff::build(NeighList *list)
-{
-  int i,j,jh,k,n,ibin,which,imol,iatom,moltemplate;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  double radi,radsum,cutsq;
-  int *neighptr;
-
-  double **x = atom->x;
-  double *radius = atom->radius;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  if (includegroup) nlocal = atom->nfirst;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE) moltemplate = 1;
-  else moltemplate = 0;
-
-  int history = list->history;
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int mask_history = 1 << HISTBITS;
-
-  int inum = 0;
-  ipage->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = 0;
-    neighptr = ipage->vget();
-
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    radi = radius[i];
-    ibin = atom2bin[i];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over all atoms in surrounding bins in stencil including self
-    // only store pair if i < j
-    // stores own/own pairs only once
-    // stores own/ghost pairs on both procs
-
-    for (k = 0; k < nstencil; k++) {
-      for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
-        if (j <= i) continue;
-        if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-        radsum = radi + radius[j];
-        cutsq = (radsum+skin) * (radsum+skin);
-
-        if (rsq <= cutsq) {
-          jh = j;
-          if (history && rsq < radsum*radsum)
-            jh = jh ^ mask_history;
-
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = jh;
-            else if (domain->minimum_image_check(delx,dely,delz))
-              neighptr[n++] = jh;
-            else if (which > 0) neighptr[n++] = jh ^ (which << SBBITS);
-          } else neighptr[n++] = jh;
-        }
-      }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = inum;
-}
diff --git a/src/npair_half_size_bin_newtoff.h b/src/npair_half_size_bin_newtoff.h
deleted file mode 100644
index ac68f699ca..0000000000
--- a/src/npair_half_size_bin_newtoff.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/size/bin/newtoff,
-           NPairHalfSizeBinNewtoff,
-           NP_HALF | NP_SIZE | NP_BIN | NP_NEWTOFF | NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_SIZE_BIN_NEWTOFF_H
-#define LMP_NPAIR_HALF_SIZE_BIN_NEWTOFF_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfSizeBinNewtoff : public NPair {
- public:
-  NPairHalfSizeBinNewtoff(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_half_size_bin_newton.cpp b/src/npair_half_size_bin_newton.cpp
deleted file mode 100644
index 11ac30ed30..0000000000
--- a/src/npair_half_size_bin_newton.cpp
+++ /dev/null
@@ -1,176 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_size_bin_newton.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfSizeBinNewton::NPairHalfSizeBinNewton(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   size particles
-   binned neighbor list construction with full Newton's 3rd law
-   each owned atom i checks its own bin and other bins in Newton stencil
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfSizeBinNewton::build(NeighList *list)
-{
-  int i,j,jh,k,n,ibin,which,imol,iatom,moltemplate;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  double radi,radsum,cutsq;
-  int *neighptr;
-
-  double **x = atom->x;
-  double *radius = atom->radius;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  if (includegroup) nlocal = atom->nfirst;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE) moltemplate = 1;
-  else moltemplate = 0;
-
-  int history = list->history;
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int mask_history = 1 << HISTBITS;
-
-  int inum = 0;
-  ipage->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = 0;
-    neighptr = ipage->vget();
-
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    radi = radius[i];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over rest of atoms in i's bin, ghosts are at end of linked list
-    // if j is owned atom, store it, since j is beyond i in linked list
-    // if j is ghost, only store if j coords are "above and to the right" of i
-
-    for (j = bins[i]; j >= 0; j = bins[j]) {
-      if (j >= nlocal) {
-        if (x[j][2] < ztmp) continue;
-        if (x[j][2] == ztmp) {
-          if (x[j][1] < ytmp) continue;
-          if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
-        }
-      }
-
-      if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
-      radsum = radi + radius[j];
-      cutsq = (radsum+skin) * (radsum+skin);
-
-      if (rsq <= cutsq) {
-        jh = j;
-        if (history && rsq < radsum*radsum)
-            jh = jh ^ mask_history;
-
-        if (molecular != Atom::ATOMIC) {
-          if (!moltemplate)
-            which = find_special(special[i],nspecial[i],tag[j]);
-          else if (imol >= 0)
-            which = find_special(onemols[imol]->special[iatom],
-                                 onemols[imol]->nspecial[iatom],
-                                 tag[j]-tagprev);
-          else which = 0;
-          if (which == 0) neighptr[n++] = jh;
-          else if (domain->minimum_image_check(delx,dely,delz))
-            neighptr[n++] = jh;
-          else if (which > 0) neighptr[n++] = jh ^ (which << SBBITS);
-        } else neighptr[n++] = jh;
-      }
-    }
-
-    // loop over all atoms in other bins in stencil, store every pair
-
-    ibin = atom2bin[i];
-    for (k = 0; k < nstencil; k++) {
-      for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
-        if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-        radsum = radi + radius[j];
-        cutsq = (radsum+skin) * (radsum+skin);
-
-        if (rsq <= cutsq) {
-          jh = j;
-          if (history && rsq < radsum*radsum)
-            jh = jh ^ mask_history;
-
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = jh;
-            else if (domain->minimum_image_check(delx,dely,delz))
-              neighptr[n++] = jh;
-            else if (which > 0) neighptr[n++] = jh ^ (which << SBBITS);
-          } else neighptr[n++] = jh;
-        }
-      }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = inum;
-}
diff --git a/src/npair_half_size_bin_newton_tri.cpp b/src/npair_half_size_bin_newton_tri.cpp
deleted file mode 100644
index 0d1a0a7329..0000000000
--- a/src/npair_half_size_bin_newton_tri.cpp
+++ /dev/null
@@ -1,162 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_size_bin_newton_tri.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "force.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfSizeBinNewtonTri::NPairHalfSizeBinNewtonTri(LAMMPS *lmp) :
-  NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   size particles
-   binned neighbor list construction with Newton's 3rd law for triclinic
-   each owned atom i checks its own bin and other bins in triclinic stencil
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfSizeBinNewtonTri::build(NeighList *list)
-{
-  int i,j,jh,k,n,ibin,which,imol,iatom,moltemplate;
-  tagint itag,jtag,tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  double radi,radsum,cutsq;
-  int *neighptr;
-
-  const double delta = 0.01 * force->angstrom;
-
-  double **x = atom->x;
-  double *radius = atom->radius;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  if (includegroup) nlocal = atom->nfirst;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE) moltemplate = 1;
-  else moltemplate = 0;
-
-  int history = list->history;
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int mask_history = 1 << HISTBITS;
-
-  int inum = 0;
-  ipage->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = 0;
-    neighptr = ipage->vget();
-
-    itag = tag[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    radi = radius[i];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over all atoms in bins in stencil
-    // for triclinic, bin stencil is full in all 3 dims
-    // must use itag/jtag to eliminate half the I/J interactions
-    // cannot use I/J exact coord comparision
-    //   b/c transforming orthog -> lambda -> orthog for ghost atoms
-    //   with an added PBC offset can shift all 3 coords by epsilon
-
-    ibin = atom2bin[i];
-    for (k = 0; k < nstencil; k++) {
-      for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) {
-
-        if (j <= i) continue;
-        if (j >= nlocal) {
-          jtag = tag[j];
-          if (itag > jtag) {
-            if ((itag+jtag) % 2 == 0) continue;
-          } else if (itag < jtag) {
-            if ((itag+jtag) % 2 == 1) continue;
-          } else {
-            if (fabs(x[j][2]-ztmp) > delta) {
-              if (x[j][2] < ztmp) continue;
-            } else if (fabs(x[j][1]-ytmp) > delta) {
-              if (x[j][1] < ytmp) continue;
-            } else {
-              if (x[j][0] < xtmp) continue;
-            }
-          }
-        }
-
-        if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-        radsum = radi + radius[j];
-        cutsq = (radsum+skin) * (radsum+skin);
-
-        if (rsq <= cutsq) {
-          jh = j;
-          if (history && rsq < radsum*radsum)
-            jh = jh ^ mask_history;
-
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = jh;
-            else if (domain->minimum_image_check(delx,dely,delz))
-              neighptr[n++] = jh;
-            else if (which > 0) neighptr[n++] = jh ^ (which << SBBITS);
-          } else neighptr[n++] = jh;
-        }
-      }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = inum;
-}
diff --git a/src/npair_half_size_bin_newton_tri.h b/src/npair_half_size_bin_newton_tri.h
deleted file mode 100644
index ad35b7ac36..0000000000
--- a/src/npair_half_size_bin_newton_tri.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/size/bin/newton/tri,
-           NPairHalfSizeBinNewtonTri,
-           NP_HALF | NP_SIZE | NP_BIN | NP_NEWTON | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_SIZE_BIN_NEWTON_TRI_H
-#define LMP_NPAIR_HALF_SIZE_BIN_NEWTON_TRI_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfSizeBinNewtonTri : public NPair {
- public:
-  NPairHalfSizeBinNewtonTri(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_half_size_multi_newtoff.cpp b/src/npair_half_size_multi_newtoff.cpp
deleted file mode 100644
index 0c22a49ed0..0000000000
--- a/src/npair_half_size_multi_newtoff.cpp
+++ /dev/null
@@ -1,160 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-es   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_size_multi_newtoff.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neighbor.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfSizeMultiNewtoff::NPairHalfSizeMultiNewtoff(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   size particles
-   binned neighbor list construction with partial Newton's 3rd law
-   multi stencil is icollection-jcollection dependent
-   each owned atom i checks own bin and other bins in stencil
-   pair stored once if i,j are both owned and i < j
-   pair stored by me if j is ghost (also stored by proc owning j)
-------------------------------------------------------------------------- */
-
-void NPairHalfSizeMultiNewtoff::build(NeighList *list)
-{
-  int i,j,jh,k,n,itype,jtype,icollection,jcollection,ibin,jbin,ns;
-  int which,imol,iatom,moltemplate;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  double radi,radsum,cutdistsq;
-  int *neighptr,*s;
-  int js;
-
-  int *collection = neighbor->collection;
-  double **x = atom->x;
-  double *radius = atom->radius;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  if (includegroup) nlocal = atom->nfirst;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE) moltemplate = 1;
-  else moltemplate = 0;
-
-  int history = list->history;
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int mask_history = 1 << HISTBITS;
-
-  int inum = 0;
-  ipage->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = 0;
-    neighptr = ipage->vget();
-    itype = type[i];
-    icollection = collection[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    radi = radius[i];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    ibin = atom2bin[i];
-
-    // loop through stencils for all collections
-    for (jcollection = 0; jcollection < ncollections; jcollection++) {
-
-      // if same collection use own bin
-      if (icollection == jcollection) jbin = ibin;
-          else jbin = coord2bin(x[i], jcollection);
-
-      // loop over all atoms in other bins in stencil including self
-      // only store pair if i < j
-      // stores own/own pairs only once
-      // stores own/ghost pairs on both procs
-      // use full stencil for all collection combinations
-
-      s = stencil_multi[icollection][jcollection];
-      ns = nstencil_multi[icollection][jcollection];
-
-      for (k = 0; k < ns; k++) {
-        js = binhead_multi[jcollection][jbin + s[k]];
-        for (j = js; j >= 0; j = bins[j]) {
-          if (j <= i) continue;
-
-          jtype = type[j];
-          if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-          delx = xtmp - x[j][0];
-          dely = ytmp - x[j][1];
-          delz = ztmp - x[j][2];
-          rsq = delx*delx + dely*dely + delz*delz;
-          radsum = radi + radius[j];
-          cutdistsq = (radsum+skin) * (radsum+skin);
-
-          if (rsq <= cutdistsq) {
-            jh = j;
-            if (history && rsq < radsum*radsum)
-              jh = jh ^ mask_history;
-
-            if (molecular != Atom::ATOMIC) {
-              if (!moltemplate)
-                which = find_special(special[i],nspecial[i],tag[j]);
-              else if (imol >= 0)
-                which = find_special(onemols[imol]->special[iatom],
-                                     onemols[imol]->nspecial[iatom],
-                                     tag[j]-tagprev);
-              else which = 0;
-              if (which == 0) neighptr[n++] = jh;
-              else if (domain->minimum_image_check(delx,dely,delz))
-                neighptr[n++] = jh;
-              else if (which > 0) neighptr[n++] = jh ^ (which << SBBITS);
-            } else neighptr[n++] = jh;
-          }
-        }
-      }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = inum;
-}
diff --git a/src/npair_half_size_multi_newtoff.h b/src/npair_half_size_multi_newtoff.h
deleted file mode 100644
index 89ca0eae4e..0000000000
--- a/src/npair_half_size_multi_newtoff.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/size/multi/newtoff,
-           NPairHalfSizeMultiNewtoff,
-           NP_HALF | NP_SIZE | NP_MULTI | NP_NEWTOFF | NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_SIZE_MULTI_NEWTOFF_H
-#define LMP_NPAIR_HALF_SIZE_MULTI_NEWTOFF_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfSizeMultiNewtoff : public NPair {
- public:
-  NPairHalfSizeMultiNewtoff(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_half_size_multi_newton.cpp b/src/npair_half_size_multi_newton.cpp
deleted file mode 100644
index ff9df7e2cf..0000000000
--- a/src/npair_half_size_multi_newton.cpp
+++ /dev/null
@@ -1,213 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_size_multi_newton.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neighbor.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfSizeMultiNewton::NPairHalfSizeMultiNewton(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   size particles
-   binned neighbor list construction with full Newton's 3rd law
-   multi stencil is icollection-jcollection dependent
-   each owned atom i checks its own bin and other bins in Newton stencil
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfSizeMultiNewton::build(NeighList *list)
-{
-  int i,j,jh,k,n,itype,jtype,icollection,jcollection,ibin,jbin,ns,js;
-  int which,imol,iatom,moltemplate;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  double radi,radsum,cutdistsq;
-  int *neighptr,*s;
-
-  int *collection = neighbor->collection;
-  double **x = atom->x;
-  double *radius = atom->radius;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  if (includegroup) nlocal = atom->nfirst;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE) moltemplate = 1;
-  else moltemplate = 0;
-
-  int history = list->history;
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int mask_history = 1 << HISTBITS;
-
-  int inum = 0;
-  ipage->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = 0;
-    neighptr = ipage->vget();
-    itype = type[i];
-    icollection = collection[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    radi = radius[i];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    ibin = atom2bin[i];
-
-    // loop through stencils for all collections
-    for (jcollection = 0; jcollection < ncollections; jcollection++) {
-
-      // if same collection use own bin
-      if (icollection == jcollection) jbin = ibin;
-          else jbin = coord2bin(x[i], jcollection);
-
-      // if same size: uses half stencil so check central bin
-      if (cutcollectionsq[icollection][icollection] == cutcollectionsq[jcollection][jcollection]){
-
-        if (icollection == jcollection) js = bins[i];
-        else js = binhead_multi[jcollection][jbin];
-
-        // if same collection,
-        //   if j is owned atom, store it, since j is beyond i in linked list
-        //   if j is ghost, only store if j coords are "above and to the right" of i
-
-        // if different collections,
-        //   if j is owned atom, store it if j > i
-        //   if j is ghost, only store if j coords are "above and to the right" of i
-
-        for (j = js; j >= 0; j = bins[j]) {
-          if ((icollection != jcollection) && (j < i)) continue;
-
-          if (j >= nlocal) {
-            if (x[j][2] < ztmp) continue;
-            if (x[j][2] == ztmp) {
-              if (x[j][1] < ytmp) continue;
-              if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
-            }
-          }
-
-          jtype = type[j];
-          if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-          delx = xtmp - x[j][0];
-          dely = ytmp - x[j][1];
-          delz = ztmp - x[j][2];
-          rsq = delx*delx + dely*dely + delz*delz;
-          radsum = radi + radius[j];
-          cutdistsq = (radsum+skin) * (radsum+skin);
-
-          if (rsq <= cutdistsq) {
-            jh = j;
-            if (history && rsq < radsum*radsum)
-              jh = jh ^ mask_history;
-
-            if (molecular != Atom::ATOMIC) {
-              if (!moltemplate)
-                which = find_special(special[i],nspecial[i],tag[j]);
-              else if (imol >= 0)
-                which = find_special(onemols[imol]->special[iatom],
-                                     onemols[imol]->nspecial[iatom],
-                                     tag[j]-tagprev);
-              else which = 0;
-              if (which == 0) neighptr[n++] = jh;
-              else if (domain->minimum_image_check(delx,dely,delz))
-                neighptr[n++] = jh;
-              else if (which > 0) neighptr[n++] = jh ^ (which << SBBITS);
-            } else neighptr[n++] = jh;
-          }
-        }
-      }
-
-      // for all collections, loop over all atoms in other bins in stencil, store every pair
-      // stencil is empty if i larger than j
-      // stencil is half if i same size as j
-      // stencil is full if i smaller than j
-
-      s = stencil_multi[icollection][jcollection];
-      ns = nstencil_multi[icollection][jcollection];
-
-      for (k = 0; k < ns; k++) {
-        js = binhead_multi[jcollection][jbin + s[k]];
-        for (j = js; j >= 0; j = bins[j]) {
-
-          jtype = type[j];
-          if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-          delx = xtmp - x[j][0];
-          dely = ytmp - x[j][1];
-          delz = ztmp - x[j][2];
-          rsq = delx*delx + dely*dely + delz*delz;
-          radsum = radi + radius[j];
-          cutdistsq = (radsum+skin) * (radsum+skin);
-
-          if (rsq <= cutdistsq) {
-            jh = j;
-            if (history && rsq < radsum*radsum)
-              jh = jh ^ mask_history;
-
-            if (molecular != Atom::ATOMIC) {
-              if (!moltemplate)
-                which = find_special(special[i],nspecial[i],tag[j]);
-              else if (imol >= 0)
-                which = find_special(onemols[imol]->special[iatom],
-                                     onemols[imol]->nspecial[iatom],
-                                     tag[j]-tagprev);
-              else which = 0;
-              if (which == 0) neighptr[n++] = jh;
-              else if (domain->minimum_image_check(delx,dely,delz))
-                neighptr[n++] = jh;
-              else if (which > 0) neighptr[n++] = jh ^ (which << SBBITS);
-            } else neighptr[n++] = jh;
-          }
-        }
-      }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = inum;
-}
diff --git a/src/npair_half_size_multi_newton.h b/src/npair_half_size_multi_newton.h
deleted file mode 100644
index 2f892e37ee..0000000000
--- a/src/npair_half_size_multi_newton.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/size/multi/newton,
-           NPairHalfSizeMultiNewton,
-           NP_HALF | NP_SIZE | NP_MULTI | NP_NEWTON | NP_ORTHO);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_SIZE_MULTI_NEWTON_H
-#define LMP_NPAIR_HALF_SIZE_MULTI_NEWTON_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfSizeMultiNewton : public NPair {
- public:
-  NPairHalfSizeMultiNewton(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_half_size_multi_newton_tri.cpp b/src/npair_half_size_multi_newton_tri.cpp
deleted file mode 100644
index aa0d8e3f42..0000000000
--- a/src/npair_half_size_multi_newton_tri.cpp
+++ /dev/null
@@ -1,190 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_size_multi_newton_tri.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "force.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neighbor.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfSizeMultiNewtonTri::NPairHalfSizeMultiNewtonTri(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   size particles
-   binned neighbor list construction with Newton's 3rd law for triclinic
-   multi stencil is icollection-jcollection dependent
-   each owned atom i checks its own bin and other bins in triclinic stencil
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfSizeMultiNewtonTri::build(NeighList *list)
-{
-  int i,j,jh,k,n,itype,jtype,icollection,jcollection,ibin,jbin,ns,js;
-  int which,imol,iatom,moltemplate;
-  tagint itag,jtag,tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  double radi,radsum,cutdistsq;
-  int *neighptr,*s;
-
-  const double delta = 0.01 * force->angstrom;
-
-  int *collection = neighbor->collection;
-  double **x = atom->x;
-  double *radius = atom->radius;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  if (includegroup) nlocal = atom->nfirst;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE) moltemplate = 1;
-  else moltemplate = 0;
-
-  int history = list->history;
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int mask_history = 1 << HISTBITS;
-
-  int inum = 0;
-  ipage->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = 0;
-    neighptr = ipage->vget();
-
-    itag = tag[i];
-    itype = type[i];
-    icollection = collection[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    radi = radius[i];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    ibin = atom2bin[i];
-
-    // loop through stencils for all collections
-
-    for (jcollection = 0; jcollection < ncollections; jcollection++) {
-
-      // if same collection use own bin
-
-      if (icollection == jcollection) jbin = ibin;
-      else jbin = coord2bin(x[i], jcollection);
-
-      // loop over all atoms in bins in stencil
-      // stencil is empty if i larger than j
-      // stencil is half if i same size as j
-      // stencil is full if i smaller than j
-      // if half: pairs for atoms j "below" i are excluded
-      // below = lower z or (equal z and lower y) or (equal zy and lower x)
-      //         (equal zyx and j <= i)
-      // latter excludes self-self interaction but allows superposed atoms
-
-      s = stencil_multi[icollection][jcollection];
-      ns = nstencil_multi[icollection][jcollection];
-
-      for (k = 0; k < ns; k++) {
-        js = binhead_multi[jcollection][jbin + s[k]];
-        for (j = js; j >= 0; j = bins[j]) {
-
-          // if same size (same collection), exclude half of interactions
-
-          if (cutcollectionsq[icollection][icollection] ==
-              cutcollectionsq[jcollection][jcollection]) {
-            if (j <= i) continue;
-            if (j >= nlocal) {
-              jtag = tag[j];
-              if (itag > jtag) {
-                if ((itag+jtag) % 2 == 0) continue;
-              } else if (itag < jtag) {
-                if ((itag+jtag) % 2 == 1) continue;
-              } else {
-                if (fabs(x[j][2]-ztmp) > delta) {
-                  if (x[j][2] < ztmp) continue;
-                } else if (fabs(x[j][1]-ytmp) > delta) {
-                  if (x[j][1] < ytmp) continue;
-                } else {
-                  if (x[j][0] < xtmp) continue;
-                }
-              }
-            }
-          }
-
-          jtype = type[j];
-          if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-          delx = xtmp - x[j][0];
-          dely = ytmp - x[j][1];
-          delz = ztmp - x[j][2];
-          rsq = delx*delx + dely*dely + delz*delz;
-          radsum = radi + radius[j];
-          cutdistsq = (radsum+skin) * (radsum+skin);
-
-          if (rsq <= cutdistsq) {
-            jh = j;
-            if (history && rsq < radsum*radsum)
-              jh = jh ^ mask_history;
-
-            if (molecular != Atom::ATOMIC) {
-              if (!moltemplate)
-                which = find_special(special[i],nspecial[i],tag[j]);
-              else if (imol >= 0)
-                which = find_special(onemols[imol]->special[iatom],
-                                     onemols[imol]->nspecial[iatom],
-                                     tag[j]-tagprev);
-              else which = 0;
-              if (which == 0) neighptr[n++] = jh;
-              else if (domain->minimum_image_check(delx,dely,delz))
-                neighptr[n++] = jh;
-              else if (which > 0) neighptr[n++] = jh ^ (which << SBBITS);
-            } else neighptr[n++] = jh;
-          }
-        }
-      }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = inum;
-}
diff --git a/src/npair_half_size_multi_newton_tri.h b/src/npair_half_size_multi_newton_tri.h
deleted file mode 100644
index 70563147dc..0000000000
--- a/src/npair_half_size_multi_newton_tri.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/size/multi/newton/tri,
-           NPairHalfSizeMultiNewtonTri,
-           NP_HALF | NP_SIZE | NP_MULTI | NP_NEWTON | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_SIZE_MULTI_NEWTON_TRI_H
-#define LMP_NPAIR_HALF_SIZE_MULTI_NEWTON_TRI_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfSizeMultiNewtonTri : public NPair {
- public:
-  NPairHalfSizeMultiNewtonTri(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_half_size_multi_old_newtoff.cpp b/src/npair_half_size_multi_old_newtoff.cpp
deleted file mode 100644
index 158ddc0f10..0000000000
--- a/src/npair_half_size_multi_old_newtoff.cpp
+++ /dev/null
@@ -1,149 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_size_multi_old_newtoff.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfSizeMultiOldNewtoff::NPairHalfSizeMultiOldNewtoff(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   size particles
-   binned neighbor list construction with partial Newton's 3rd law
-   each owned atom i checks own bin and other bins in stencil
-   multi-type stencil is itype dependent and is distance checked
-   pair stored once if i,j are both owned and i < j
-   pair stored by me if j is ghost (also stored by proc owning j)
-------------------------------------------------------------------------- */
-
-void NPairHalfSizeMultiOldNewtoff::build(NeighList *list)
-{
-  int i,j,jh,k,n,itype,jtype,ibin,ns,which,imol,iatom,moltemplate;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  double radi,radsum,cutdistsq;
-  int *neighptr,*s;
-  double *cutsq,*distsq;
-
-  double **x = atom->x;
-  double *radius = atom->radius;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  if (includegroup) nlocal = atom->nfirst;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE) moltemplate = 1;
-  else moltemplate = 0;
-
-  int history = list->history;
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int mask_history = 1 << HISTBITS;
-
-  int inum = 0;
-  ipage->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = 0;
-    neighptr = ipage->vget();
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    radi = radius[i];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over all atoms in other bins in stencil including self
-    // only store pair if i < j
-    // skip if i,j neighbor cutoff is less than bin distance
-    // stores own/own pairs only once
-    // stores own/ghost pairs on both procs
-
-    ibin = atom2bin[i];
-    s = stencil_multi_old[itype];
-    distsq = distsq_multi_old[itype];
-    cutsq = cutneighsq[itype];
-    ns = nstencil_multi_old[itype];
-    for (k = 0; k < ns; k++) {
-      for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) {
-        if (j <= i) continue;
-        jtype = type[j];
-        if (cutsq[jtype] < distsq[k]) continue;
-
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-        radsum = radi + radius[j];
-        cutdistsq = (radsum+skin) * (radsum+skin);
-
-        if (rsq <= cutdistsq) {
-          jh = j;
-          if (history && rsq < radsum*radsum)
-            jh = jh ^ mask_history;
-
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = jh;
-            else if (domain->minimum_image_check(delx,dely,delz))
-              neighptr[n++] = jh;
-            else if (which > 0) neighptr[n++] = jh ^ (which << SBBITS);
-          } else neighptr[n++] = jh;
-        }
-      }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = inum;
-}
diff --git a/src/npair_half_size_multi_old_newtoff.h b/src/npair_half_size_multi_old_newtoff.h
deleted file mode 100644
index 051d5d47de..0000000000
--- a/src/npair_half_size_multi_old_newtoff.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/size/multi/old/newtoff,
-           NPairHalfSizeMultiOldNewtoff,
-           NP_HALF | NP_SIZE | NP_MULTI_OLD | NP_NEWTOFF | NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_SIZE_MULTI_OLD_NEWTOFF_H
-#define LMP_NPAIR_HALF_SIZE_MULTI_OLD_NEWTOFF_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfSizeMultiOldNewtoff : public NPair {
- public:
-  NPairHalfSizeMultiOldNewtoff(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_half_size_multi_old_newton.cpp b/src/npair_half_size_multi_old_newton.cpp
deleted file mode 100644
index cbb75f568d..0000000000
--- a/src/npair_half_size_multi_old_newton.cpp
+++ /dev/null
@@ -1,187 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_size_multi_old_newton.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfSizeMultiOldNewton::NPairHalfSizeMultiOldNewton(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   size particles
-   binned neighbor list construction with full Newton's 3rd law
-   each owned atom i checks its own bin and other bins in Newton stencil
-   multi-type stencil is itype dependent and is distance checked
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfSizeMultiOldNewton::build(NeighList *list)
-{
-  int i,j,jh,k,n,itype,jtype,ibin,ns,which,imol,iatom,moltemplate;
-  tagint tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  double radi,radsum,cutdistsq;
-  int *neighptr,*s;
-  double *cutsq,*distsq;
-
-  double **x = atom->x;
-  double *radius = atom->radius;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  if (includegroup) nlocal = atom->nfirst;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE) moltemplate = 1;
-  else moltemplate = 0;
-
-  int history = list->history;
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int mask_history = 1 << HISTBITS;
-
-  int inum = 0;
-  ipage->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = 0;
-    neighptr = ipage->vget();
-
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    radi = radius[i];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over rest of atoms in i's bin, ghosts are at end of linked list
-    // if j is owned atom, store it, since j is beyond i in linked list
-    // if j is ghost, only store if j coords are "above and to the right" of i
-
-    for (j = bins[i]; j >= 0; j = bins[j]) {
-      if (j >= nlocal) {
-        if (x[j][2] < ztmp) continue;
-        if (x[j][2] == ztmp) {
-          if (x[j][1] < ytmp) continue;
-          if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
-        }
-      }
-
-      jtype = type[j];
-      if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
-      radsum = radi + radius[j];
-      cutdistsq = (radsum+skin) * (radsum+skin);
-
-      if (rsq <= cutdistsq) {
-        jh = j;
-        if (history && rsq < radsum*radsum)
-          jh = jh ^ mask_history;
-
-        if (molecular != Atom::ATOMIC) {
-          if (!moltemplate)
-            which = find_special(special[i],nspecial[i],tag[j]);
-          else if (imol >= 0)
-            which = find_special(onemols[imol]->special[iatom],
-                                 onemols[imol]->nspecial[iatom],
-                                 tag[j]-tagprev);
-          else which = 0;
-          if (which == 0) neighptr[n++] = jh;
-          else if (domain->minimum_image_check(delx,dely,delz))
-            neighptr[n++] = jh;
-          else if (which > 0) neighptr[n++] = jh ^ (which << SBBITS);
-        } else neighptr[n++] = jh;
-      }
-    }
-
-    // loop over all atoms in other bins in stencil, store every pair
-    // skip if i,j neighbor cutoff is less than bin distance
-
-    ibin = atom2bin[i];
-    s = stencil_multi_old[itype];
-    distsq = distsq_multi_old[itype];
-    cutsq = cutneighsq[itype];
-    ns = nstencil_multi_old[itype];
-    for (k = 0; k < ns; k++) {
-      for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) {
-        jtype = type[j];
-        if (cutsq[jtype] < distsq[k]) continue;
-
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-        radsum = radi + radius[j];
-        cutdistsq = (radsum+skin) * (radsum+skin);
-
-        if (rsq <= cutdistsq) {
-        if (history && rsq < radsum*radsum)
-            j = j ^ mask_history;
-
-        if (molecular != Atom::ATOMIC) {
-          if (!moltemplate)
-            which = find_special(special[i],nspecial[i],tag[j]);
-          else if (imol >= 0)
-            which = find_special(onemols[imol]->special[iatom],
-                                 onemols[imol]->nspecial[iatom],
-                                 tag[j]-tagprev);
-          else which = 0;
-          if (which == 0) neighptr[n++] = j;
-          else if (domain->minimum_image_check(delx,dely,delz))
-            neighptr[n++] = j;
-          else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-        } else neighptr[n++] = j;
-        }
-      }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = inum;
-}
diff --git a/src/npair_half_size_multi_old_newton.h b/src/npair_half_size_multi_old_newton.h
deleted file mode 100644
index 082e752e80..0000000000
--- a/src/npair_half_size_multi_old_newton.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/size/multi/old/newton,
-           NPairHalfSizeMultiOldNewton,
-           NP_HALF | NP_SIZE | NP_MULTI_OLD | NP_NEWTON | NP_ORTHO);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_SIZE_MULTI_OLD_NEWTON_H
-#define LMP_NPAIR_HALF_SIZE_MULTI_OLD_NEWTON_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfSizeMultiOldNewton : public NPair {
- public:
-  NPairHalfSizeMultiOldNewton(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_half_size_multi_old_newton_tri.cpp b/src/npair_half_size_multi_old_newton_tri.cpp
deleted file mode 100644
index 848a19aa39..0000000000
--- a/src/npair_half_size_multi_old_newton_tri.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_size_multi_old_newton_tri.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "force.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfSizeMultiOldNewtonTri::NPairHalfSizeMultiOldNewtonTri(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   binned neighbor list construction with Newton's 3rd law for triclinic
-   each owned atom i checks its own bin and other bins in triclinic stencil
-   multi-type stencil is itype dependent and is distance checked
-   every pair stored exactly once by some processor
-------------------------------------------------------------------------- */
-
-void NPairHalfSizeMultiOldNewtonTri::build(NeighList *list)
-{
-  int i,j,jh,k,n,itype,jtype,ibin,ns,which,imol,iatom,moltemplate;
-  tagint itag,jtag,tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  double radi,radsum,cutdistsq;
-  int *neighptr,*s;
-  double *cutsq,*distsq;
-
-  const double delta = 0.01 * force->angstrom;
-
-  double **x = atom->x;
-  double *radius = atom->radius;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  if (includegroup) nlocal = atom->nfirst;
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE) moltemplate = 1;
-  else moltemplate = 0;
-
-  int history = list->history;
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int mask_history = 1 << HISTBITS;
-
-  int inum = 0;
-  ipage->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = 0;
-    neighptr = ipage->vget();
-
-    itag = tag[i];
-    itype = type[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    radi = radius[i];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over all atoms in bins in stencil
-    // for triclinic, bin stencil is full in all 3 dims
-    // must use itag/jtag to eliminate half the I/J interactions
-    // cannot use I/J exact coord comparision
-    //   b/c transforming orthog -> lambda -> orthog for ghost atoms
-    //   with an added PBC offset can shift all 3 coords by epsilon
-
-    ibin = atom2bin[i];
-    s = stencil_multi_old[itype];
-    distsq = distsq_multi_old[itype];
-    cutsq = cutneighsq[itype];
-    ns = nstencil_multi_old[itype];
-    for (k = 0; k < ns; k++) {
-      for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) {
-        jtype = type[j];
-        if (cutsq[jtype] < distsq[k]) continue;
-
-        if (j <= i) continue;
-        if (j >= nlocal) {
-          jtag = tag[j];
-          if (itag > jtag) {
-            if ((itag+jtag) % 2 == 0) continue;
-          } else if (itag < jtag) {
-            if ((itag+jtag) % 2 == 1) continue;
-          } else {
-            if (fabs(x[j][2]-ztmp) > delta) {
-              if (x[j][2] < ztmp) continue;
-            } else if (fabs(x[j][1]-ytmp) > delta) {
-              if (x[j][1] < ytmp) continue;
-            } else {
-              if (x[j][0] < xtmp) continue;
-            }
-          }
-        }
-
-        if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
-        radsum = radi + radius[j];
-        cutdistsq = (radsum+skin) * (radsum+skin);
-
-        if (rsq <= cutdistsq) {
-          jh = j;
-          if (history && rsq < radsum*radsum)
-            jh = jh ^ mask_history;
-
-          if (molecular != Atom::ATOMIC) {
-            if (!moltemplate)
-              which = find_special(special[i],nspecial[i],tag[j]);
-            else if (imol >= 0)
-              which = find_special(onemols[imol]->special[iatom],
-                                   onemols[imol]->nspecial[iatom],
-                                   tag[j]-tagprev);
-            else which = 0;
-            if (which == 0) neighptr[n++] = jh;
-            else if (domain->minimum_image_check(delx,dely,delz))
-              neighptr[n++] = jh;
-            else if (which > 0) neighptr[n++] = jh ^ (which << SBBITS);
-          } else neighptr[n++] = jh;
-        }
-      }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = inum;
-}
diff --git a/src/npair_half_size_multi_old_newton_tri.h b/src/npair_half_size_multi_old_newton_tri.h
deleted file mode 100644
index 354832e07d..0000000000
--- a/src/npair_half_size_multi_old_newton_tri.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/size/multi/old/newton/tri,
-           NPairHalfSizeMultiOldNewtonTri,
-           NP_HALF | NP_SIZE | NP_MULTI_OLD | NP_NEWTON | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_SIZE_MULTI_OLD_NEWTON_TRI_H
-#define LMP_NPAIR_HALF_SIZE_MULTI_OLD_NEWTON_TRI_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfSizeMultiOldNewtonTri : public NPair {
- public:
-  NPairHalfSizeMultiOldNewtonTri(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_half_size_nsq_newtoff.cpp b/src/npair_half_size_nsq_newtoff.cpp
deleted file mode 100644
index acbb4cf7dd..0000000000
--- a/src/npair_half_size_nsq_newtoff.cpp
+++ /dev/null
@@ -1,137 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_size_nsq_newtoff.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "group.h"
-#include "molecule.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfSizeNsqNewtoff::NPairHalfSizeNsqNewtoff(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   size particles
-   N^2 / 2 search for neighbor pairs with partial Newton's 3rd law
-   pair added to list if atoms i and j are both owned and i < j
-   pair added if j is ghost (also stored by proc owning j)
-------------------------------------------------------------------------- */
-
-void NPairHalfSizeNsqNewtoff::build(NeighList *list)
-{
-  int i, j, jh, n, bitmask, which, imol, iatom, moltemplate;
-  tagint tagprev;
-  double xtmp, ytmp, ztmp, delx, dely, delz, rsq;
-  double radi, radsum, cutsq;
-  int *neighptr;
-
-  double **x = atom->x;
-  double *radius = atom->radius;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *tag = atom->tag;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  int nall = nlocal + atom->nghost;
-  if (includegroup) {
-    nlocal = atom->nfirst;
-    bitmask = group->bitmask[includegroup];
-  }
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE)
-    moltemplate = 1;
-  else
-    moltemplate = 0;
-
-  int history = list->history;
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int mask_history = 1 << HISTBITS;
-
-  int inum = 0;
-  ipage->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = 0;
-    neighptr = ipage->vget();
-
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    radi = radius[i];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over remaining atoms, owned and ghost
-
-    for (j = i + 1; j < nall; j++) {
-      if (includegroup && !(mask[j] & bitmask)) continue;
-      if (exclude && exclusion(i, j, type[i], type[j], mask, molecule)) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx * delx + dely * dely + delz * delz;
-      radsum = radi + radius[j];
-      cutsq = (radsum + skin) * (radsum + skin);
-
-      if (rsq <= cutsq) {
-        jh = j;
-        if (history && rsq < radsum * radsum) jh = jh ^ mask_history;
-
-        if (molecular != Atom::ATOMIC) {
-          if (!moltemplate)
-            which = find_special(special[i], nspecial[i], tag[j]);
-          else if (imol >= 0)
-            which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom],
-                                 tag[j] - tagprev);
-          else
-            which = 0;
-          if (which == 0)
-            neighptr[n++] = jh;
-          else if (domain->minimum_image_check(delx, dely, delz))
-            neighptr[n++] = jh;
-          else if (which > 0)
-            neighptr[n++] = jh ^ (which << SBBITS);
-        } else
-          neighptr[n++] = jh;
-      }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
-  }
-  list->inum = inum;
-}
diff --git a/src/npair_half_size_nsq_newtoff.h b/src/npair_half_size_nsq_newtoff.h
deleted file mode 100644
index d84d7d62dd..0000000000
--- a/src/npair_half_size_nsq_newtoff.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(half/size/nsq/newtoff,
-           NPairHalfSizeNsqNewtoff,
-           NP_HALF | NP_SIZE | NP_NSQ | NP_NEWTOFF | NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALF_SIZE_NSQ_NEWTOFF_H
-#define LMP_NPAIR_HALF_SIZE_NSQ_NEWTOFF_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalfSizeNsqNewtoff : public NPair {
- public:
-  NPairHalfSizeNsqNewtoff(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_half_size_nsq_newton.cpp b/src/npair_half_size_nsq_newton.cpp
deleted file mode 100644
index ce0c7f9562..0000000000
--- a/src/npair_half_size_nsq_newton.cpp
+++ /dev/null
@@ -1,172 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_half_size_nsq_newton.h"
-
-#include "atom.h"
-#include "atom_vec.h"
-#include "domain.h"
-#include "error.h"
-#include "force.h"
-#include "molecule.h"
-#include "group.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalfSizeNsqNewton::NPairHalfSizeNsqNewton(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   size particles
-   N^2 / 2 search for neighbor pairs with full Newton's 3rd law
-   pair added to list if atoms i and j are both owned and i < j
-   if j is ghost only me or other proc adds pair
-   decision based on itag,jtag tests
-------------------------------------------------------------------------- */
-
-void NPairHalfSizeNsqNewton::build(NeighList *list)
-{
-  int i,j,jh,n,bitmask,which,imol,iatom,moltemplate;
-  tagint itag,jtag,tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  double radi,radsum,cutsq;
-  int *neighptr;
-
-  const double delta = 0.01 * force->angstrom;
-  const int triclinic = domain->triclinic;
-
-  double **x = atom->x;
-  double *radius = atom->radius;
-  tagint *tag = atom->tag;
-  int *type = atom->type;
-  int *mask = atom->mask;
-  tagint *molecule = atom->molecule;
-  tagint **special = atom->special;
-  int **nspecial = atom->nspecial;
-  int nlocal = atom->nlocal;
-  int nall = nlocal + atom->nghost;
-  if (includegroup) {
-    nlocal = atom->nfirst;
-    bitmask = group->bitmask[includegroup];
-  }
-
-  int *molindex = atom->molindex;
-  int *molatom = atom->molatom;
-  Molecule **onemols = atom->avec->onemols;
-  if (molecular == Atom::TEMPLATE) moltemplate = 1;
-  else moltemplate = 0;
-
-  int history = list->history;
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int mask_history = 1 << HISTBITS;
-
-  int inum = 0;
-  ipage->reset();
-
-  for (i = 0; i < nlocal; i++) {
-    n = 0;
-    neighptr = ipage->vget();
-
-    itag = tag[i];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    radi = radius[i];
-    if (moltemplate) {
-      imol = molindex[i];
-      iatom = molatom[i];
-      tagprev = tag[i] - iatom - 1;
-    }
-
-    // loop over remaining atoms, owned and ghost
-    // use itag/jtap comparision to eliminate half the interactions
-    // itag = jtag is possible for long cutoffs that include images of self
-    // for triclinic, must use delta to eliminate half the I/J interactions
-    // cannot use I/J exact coord comparision as for orthog
-    //   b/c transforming orthog -> lambda -> orthog for ghost atoms
-    //   with an added PBC offset can shift all 3 coords by epsilon
-
-    for (j = i+1; j < nall; j++) {
-      if (includegroup && !(mask[j] & bitmask)) continue;
-
-      if (j >= nlocal) {
-        jtag = tag[j];
-        if (itag > jtag) {
-          if ((itag+jtag) % 2 == 0) continue;
-        } else if (itag < jtag) {
-          if ((itag+jtag) % 2 == 1) continue;
-        } else if (triclinic) {
-          if (fabs(x[j][2]-ztmp) > delta) {
-            if (x[j][2] < ztmp) continue;
-          } else if (fabs(x[j][1]-ytmp) > delta) {
-            if (x[j][1] < ytmp) continue;
-          } else {
-            if (x[j][0] < xtmp) continue;
-          }
-        } else {
-          if (x[j][2] < ztmp) continue;
-          if (x[j][2] == ztmp) {
-            if (x[j][1] < ytmp) continue;
-            if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
-          }
-        }
-      }
-
-      if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
-      radsum = radi + radius[j];
-      cutsq = (radsum+skin) * (radsum+skin);
-
-      if (rsq <= cutsq) {
-        jh = j;
-        if (history && rsq < radsum*radsum)
-          jh = jh ^ mask_history;
-
-        if (molecular != Atom::ATOMIC) {
-          if (!moltemplate)
-            which = find_special(special[i],nspecial[i],tag[j]);
-          else if (imol >= 0)
-            which = find_special(onemols[imol]->special[iatom],
-                                 onemols[imol]->nspecial[iatom],
-                                 tag[j]-tagprev);
-          else which = 0;
-          if (which == 0) neighptr[n++] = jh;
-          else if (domain->minimum_image_check(delx,dely,delz))
-            neighptr[n++] = jh;
-          else if (which > 0) neighptr[n++] = jh ^ (which << SBBITS);
-        } else neighptr[n++] = jh;
-      }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = inum;
-}
diff --git a/src/npair_halffull.cpp b/src/npair_halffull.cpp
new file mode 100644
index 0000000000..b3d8d4f50e
--- /dev/null
+++ b/src/npair_halffull.cpp
@@ -0,0 +1,160 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "npair_halffull.h"
+
+#include "atom.h"
+#include "error.h"
+#include "force.h"
+#include "my_page.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+template<int NEWTON, int TRI, int TRIM>
+NPairHalffull<NEWTON, TRI, TRIM>::NPairHalffull(LAMMPS *lmp) : NPair(lmp) {}
+
+/* ----------------------------------------------------------------------
+   build half list from full list
+   pair stored once if i,j are both owned and i < j
+   works if full list is a skip list
+
+   Newtoff:
+     pair stored by me if j is ghost (also stored by proc owning j)
+     works for owned (non-ghost) list, also for ghost list
+     if ghost, also store neighbors of ghost atoms & set inum,gnum correctly
+   Newton:
+     if j is ghost, only store if j coords are "above and to the right" of i
+     use i < j < nlocal to eliminate half the local/local interactions
+   Newton + Triclinic:
+     must use delta to eliminate half the local/ghost interactions
+     cannot use I/J exact coord comparision as for orthog
+       b/c transforming orthog -> lambda -> orthog for ghost atoms
+       with an added PBC offset can shift all 3 coords by epsilon
+------------------------------------------------------------------------- */
+
+template<int NEWTON, int TRI, int TRIM>
+void NPairHalffull<NEWTON, TRI, TRIM>::build(NeighList *list)
+{
+  int i, j, ii, jj, n, jnum, joriginal;
+  int *neighptr, *jlist;
+  double xtmp, ytmp, ztmp, delx, dely, delz, rsq;
+
+  const double delta = 0.01 * force->angstrom;
+
+  double **x = atom->x;
+  int nlocal = atom->nlocal;
+
+  int *ilist = list->ilist;
+  int *numneigh = list->numneigh;
+  int **firstneigh = list->firstneigh;
+  MyPage<int> *ipage = list->ipage;
+
+  int *ilist_full = list->listfull->ilist;
+  int *numneigh_full = list->listfull->numneigh;
+  int **firstneigh_full = list->listfull->firstneigh;
+  int inum_full = list->listfull->inum;
+  if (!NEWTON)
+    if (list->ghost) inum_full += list->listfull->gnum;
+
+  int inum = 0;
+  ipage->reset();
+
+  double cutsq_custom = cutoff_custom * cutoff_custom;
+
+  // loop over atoms in full list
+
+  for (ii = 0; ii < inum_full; ii++) {
+    n = 0;
+    neighptr = ipage->vget();
+
+    // loop over parent full list
+
+    i = ilist_full[ii];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+
+    jlist = firstneigh_full[i];
+    jnum = numneigh_full[i];
+
+    for (jj = 0; jj < jnum; jj++) {
+      joriginal = jlist[jj];
+      j = joriginal & NEIGHMASK;
+
+      if (NEWTON) {
+        if (j < nlocal) {
+          if (i > j) continue;
+        } else if (TRI) {
+          if (fabs(x[j][2]-ztmp) > delta) {
+            if (x[j][2] < ztmp) continue;
+          } else if (fabs(x[j][1]-ytmp) > delta) {
+            if (x[j][1] < ytmp) continue;
+          } else {
+            if (x[j][0] < xtmp) continue;
+          }
+        } else {
+          if (x[j][2] < ztmp) continue;
+          if (x[j][2] == ztmp) {
+            if (x[j][1] < ytmp) continue;
+            if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
+          }
+        }
+
+        if (TRIM) {
+          delx = xtmp - x[j][0];
+          dely = ytmp - x[j][1];
+          delz = ztmp - x[j][2];
+          rsq = delx * delx + dely * dely + delz * delz;
+
+          if (rsq > cutsq_custom) continue;
+        }
+
+        neighptr[n++] = joriginal;
+      } else {
+        if (j > i) {
+          if (TRIM) {
+            delx = xtmp - x[j][0];
+            dely = ytmp - x[j][1];
+            delz = ztmp - x[j][2];
+            rsq = delx * delx + dely * dely + delz * delz;
+
+            if (rsq > cutsq_custom) continue;
+          }
+
+          neighptr[n++] = joriginal;
+        }
+      }
+    }
+
+    ilist[inum++] = i;
+    firstneigh[i] = neighptr;
+    numneigh[i] = n;
+    ipage->vgot(n);
+    if (ipage->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
+  }
+  list->inum = inum;
+  if (!NEWTON)
+    if (list->ghost) list->gnum = list->listfull->gnum;
+}
+
+namespace LAMMPS_NS {
+template class NPairHalffull<0,0,0>;
+template class NPairHalffull<1,0,0>;
+template class NPairHalffull<1,1,0>;
+template class NPairHalffull<0,0,1>;
+template class NPairHalffull<1,0,1>;
+template class NPairHalffull<1,1,1>;
+}
diff --git a/src/npair_halffull.h b/src/npair_halffull.h
new file mode 100644
index 0000000000..41d2e37dc4
--- /dev/null
+++ b/src/npair_halffull.h
@@ -0,0 +1,131 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef NPAIR_CLASS
+// clang-format off
+typedef NPairHalffull<0, 0, 0> NPairHalffullNewtoff;
+NPairStyle(halffull/newtoff,
+           NPairHalffullNewtoff,
+           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF |
+           NP_ORTHO | NP_TRI);
+
+typedef NPairHalffull<0, 0, 0> NPairHalffullNewtoff;
+NPairStyle(halffull/newtoff/skip,
+           NPairHalffullNewtoff,
+           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF |
+           NP_ORTHO | NP_TRI | NP_SKIP);
+
+typedef NPairHalffull<0, 0, 0> NPairHalffullNewtoff;
+NPairStyle(halffull/newtoff/ghost,
+           NPairHalffullNewtoff,
+           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF |
+           NP_ORTHO | NP_TRI | NP_GHOST);
+
+typedef NPairHalffull<0, 0, 0> NPairHalffullNewtoff;
+NPairStyle(halffull/newtoff/skip/ghost,
+           NPairHalffullNewtoff,
+           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF |
+           NP_ORTHO | NP_TRI | NP_SKIP | NP_GHOST);
+
+typedef NPairHalffull<1, 0, 0> NPairHalffullNewton;
+NPairStyle(halffull/newton,
+           NPairHalffullNewton,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
+           NP_ORTHO);
+
+typedef NPairHalffull<1, 1, 0> NPairHalffullNewtonTri;
+NPairStyle(halffull/newton/tri,
+           NPairHalffullNewtonTri,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
+           NP_TRI);
+
+typedef NPairHalffull<1, 0, 0> NPairHalffullNewton;
+NPairStyle(halffull/newton/skip,
+           NPairHalffullNewton,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
+           NP_ORTHO | NP_SKIP);
+
+typedef NPairHalffull<1, 1, 0> NPairHalffullNewtonTri;
+NPairStyle(halffull/newton/skip/tri,
+           NPairHalffullNewtonTri,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
+           NP_TRI | NP_SKIP);
+
+typedef NPairHalffull<0, 0, 1> NPairHalffullTrimNewtoff;
+NPairStyle(halffull/trim/newtoff,
+           NPairHalffullTrimNewtoff,
+           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF |
+           NP_ORTHO | NP_TRI | NP_TRIM);
+
+typedef NPairHalffull<0, 0, 1> NPairHalffullTrimNewtoff;
+NPairStyle(halffull/trim/newtoff/skip,
+           NPairHalffullTrimNewtoff,
+           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF |
+           NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM);
+
+typedef NPairHalffull<0, 0, 1> NPairHalffullTrimNewtoff;
+NPairStyle(halffull/trim/newtoff/ghost,
+           NPairHalffullTrimNewtoff,
+           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF |
+           NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM);
+
+typedef NPairHalffull<0, 0, 1> NPairHalffullTrimNewtoff;
+NPairStyle(halffull/trim/newtoff/skip/ghost,
+           NPairHalffullTrimNewtoff,
+           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF |
+           NP_ORTHO | NP_TRI | NP_SKIP | NP_GHOST | NP_TRIM);
+
+typedef NPairHalffull<1, 0, 1> NPairHalffullTrimNewton;
+NPairStyle(halffull/trim/newton,
+           NPairHalffullTrimNewton,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
+           NP_ORTHO | NP_TRIM);
+
+typedef NPairHalffull<1, 1, 1> NPairHalffullTrimNewtonTri;
+NPairStyle(halffull/trim/newton/tri,
+           NPairHalffullTrimNewtonTri,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
+           NP_TRI | NP_TRIM);
+
+typedef NPairHalffull<1, 0, 1> NPairHalffullTrimNewton;
+NPairStyle(halffull/trim/newton/skip,
+           NPairHalffullTrimNewton,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
+           NP_ORTHO | NP_SKIP | NP_TRIM);
+
+typedef NPairHalffull<1, 1, 1> NPairHalffullTrimNewtonTri;
+NPairStyle(halffull/trim/newton/tri/skip,
+           NPairHalffullTrimNewtonTri,
+           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
+           NP_TRI | NP_SKIP | NP_TRIM);
+// clang-format on
+#else
+
+#ifndef LMP_NPAIR_HALFFULL_H
+#define LMP_NPAIR_HALFFULL_H
+
+#include "npair.h"
+
+namespace LAMMPS_NS {
+
+template<int NEWTON, int TRI, int TRIM>
+class NPairHalffull : public NPair {
+ public:
+  NPairHalffull(class LAMMPS *);
+  void build(class NeighList *) override;
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/npair_halffull_newtoff.cpp b/src/npair_halffull_newtoff.cpp
deleted file mode 100644
index a5f0e1624a..0000000000
--- a/src/npair_halffull_newtoff.cpp
+++ /dev/null
@@ -1,81 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_halffull_newtoff.h"
-
-#include "error.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalffullNewtoff::NPairHalffullNewtoff(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   build half list from full list
-   pair stored once if i,j are both owned and i < j
-   pair stored by me if j is ghost (also stored by proc owning j)
-   works if full list is a skip list
-   works for owned (non-ghost) list, also for ghost list
-   if ghost, also store neighbors of ghost atoms & set inum,gnum correctly
-------------------------------------------------------------------------- */
-
-void NPairHalffullNewtoff::build(NeighList *list)
-{
-  int i, j, ii, jj, n, jnum, joriginal;
-  int *neighptr, *jlist;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int *ilist_full = list->listfull->ilist;
-  int *numneigh_full = list->listfull->numneigh;
-  int **firstneigh_full = list->listfull->firstneigh;
-  int inum_full = list->listfull->inum;
-  if (list->ghost) inum_full += list->listfull->gnum;
-
-  int inum = 0;
-  ipage->reset();
-
-  // loop over atoms in full list
-
-  for (ii = 0; ii < inum_full; ii++) {
-    n = 0;
-    neighptr = ipage->vget();
-
-    // loop over parent full list
-
-    i = ilist_full[ii];
-    jlist = firstneigh_full[i];
-    jnum = numneigh_full[i];
-
-    for (jj = 0; jj < jnum; jj++) {
-      joriginal = jlist[jj];
-      j = joriginal & NEIGHMASK;
-      if (j > i) neighptr[n++] = joriginal;
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = inum;
-  if (list->ghost) list->gnum = list->listfull->gnum;
-}
diff --git a/src/npair_halffull_newtoff.h b/src/npair_halffull_newtoff.h
deleted file mode 100644
index d2b42f2370..0000000000
--- a/src/npair_halffull_newtoff.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(halffull/newtoff,
-           NPairHalffullNewtoff,
-           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF |
-           NP_ORTHO | NP_TRI);
-
-NPairStyle(halffull/newtoff/skip,
-           NPairHalffullNewtoff,
-           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF |
-           NP_ORTHO | NP_TRI | NP_SKIP);
-
-NPairStyle(halffull/newtoff/ghost,
-           NPairHalffullNewtoff,
-           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF |
-           NP_ORTHO | NP_TRI | NP_GHOST);
-
-NPairStyle(halffull/newtoff/skip/ghost,
-           NPairHalffullNewtoff,
-           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF |
-           NP_ORTHO | NP_TRI | NP_SKIP | NP_GHOST);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALFFULL_NEWTOFF_H
-#define LMP_NPAIR_HALFFULL_NEWTOFF_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalffullNewtoff : public NPair {
- public:
-  NPairHalffullNewtoff(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_halffull_newton.cpp b/src/npair_halffull_newton.cpp
deleted file mode 100644
index 12320c46f3..0000000000
--- a/src/npair_halffull_newton.cpp
+++ /dev/null
@@ -1,114 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_halffull_newton.h"
-
-#include "atom.h"
-#include "domain.h"
-#include "error.h"
-#include "force.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalffullNewton::NPairHalffullNewton(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   build half list from full list
-   pair stored once if i,j are both owned and i < j
-   if j is ghost, only store if j coords are "above and to the right" of i
-   works if full list is a skip list
-------------------------------------------------------------------------- */
-
-void NPairHalffullNewton::build(NeighList *list)
-{
-  int i, j, ii, jj, n, jnum, joriginal;
-  int *neighptr, *jlist;
-  double xtmp, ytmp, ztmp;
-
-  const double delta = 0.01 * force->angstrom;
-  const int triclinic = domain->triclinic;
-
-  double **x = atom->x;
-  int nlocal = atom->nlocal;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int *ilist_full = list->listfull->ilist;
-  int *numneigh_full = list->listfull->numneigh;
-  int **firstneigh_full = list->listfull->firstneigh;
-  int inum_full = list->listfull->inum;
-
-  int inum = 0;
-  ipage->reset();
-
-  // loop over parent full list
-
-  for (ii = 0; ii < inum_full; ii++) {
-    n = 0;
-    neighptr = ipage->vget();
-
-    i = ilist_full[ii];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-
-    // loop over full neighbor list
-    // use i < j < nlocal to eliminate half the local/local interactions
-    // for triclinic, must use delta to eliminate half the local/ghost interactions
-    // cannot use I/J exact coord comparision as for orthog
-    //   b/c transforming orthog -> lambda -> orthog for ghost atoms
-    //   with an added PBC offset can shift all 3 coords by epsilon
-
-    jlist = firstneigh_full[i];
-    jnum = numneigh_full[i];
-
-    for (jj = 0; jj < jnum; jj++) {
-      joriginal = jlist[jj];
-      j = joriginal & NEIGHMASK;
-
-      if (j < nlocal) {
-        if (i > j) continue;
-      } else if (triclinic) {
-        if (fabs(x[j][2]-ztmp) > delta) {
-          if (x[j][2] < ztmp) continue;
-        } else if (fabs(x[j][1]-ytmp) > delta) {
-          if (x[j][1] < ytmp) continue;
-        } else {
-          if (x[j][0] < xtmp) continue;
-        }
-      } else {
-        if (x[j][2] < ztmp) continue;
-        if (x[j][2] == ztmp) {
-          if (x[j][1] < ytmp) continue;
-          if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
-        }
-      }
-
-      neighptr[n++] = joriginal;
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
-  }
-  list->inum = inum;
-}
diff --git a/src/npair_halffull_newton.h b/src/npair_halffull_newton.h
deleted file mode 100644
index 3a9462f8b4..0000000000
--- a/src/npair_halffull_newton.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(halffull/newton,
-           NPairHalffullNewton,
-           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
-           NP_ORTHO | NP_TRI);
-
-NPairStyle(halffull/newton/skip,
-           NPairHalffullNewton,
-           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
-           NP_ORTHO | NP_TRI | NP_SKIP);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALFFULL_NEWTON_H
-#define LMP_NPAIR_HALFFULL_NEWTON_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalffullNewton : public NPair {
- public:
-  NPairHalffullNewton(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_halffull_trim_newtoff.cpp b/src/npair_halffull_trim_newtoff.cpp
deleted file mode 100644
index db97bf185a..0000000000
--- a/src/npair_halffull_trim_newtoff.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_halffull_trim_newtoff.h"
-
-#include "atom.h"
-#include "error.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalffullTrimNewtoff::NPairHalffullTrimNewtoff(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   build half list from full list
-   pair stored once if i,j are both owned and i < j
-   pair stored by me if j is ghost (also stored by proc owning j)
-   works if full list is a skip list
-   works for owned (non-ghost) list, also for ghost list
-   if ghost, also store neighbors of ghost atoms & set inum,gnum correctly
-------------------------------------------------------------------------- */
-
-void NPairHalffullTrimNewtoff::build(NeighList *list)
-{
-  int i, j, ii, jj, n, jnum, joriginal;
-  int *neighptr, *jlist;
-  double xtmp, ytmp, ztmp;
-  double delx, dely, delz, rsq;
-
-  double **x = atom->x;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int *ilist_full = list->listfull->ilist;
-  int *numneigh_full = list->listfull->numneigh;
-  int **firstneigh_full = list->listfull->firstneigh;
-  int inum_full = list->listfull->inum;
-  if (list->ghost) inum_full += list->listfull->gnum;
-
-  int inum = 0;
-  ipage->reset();
-
-  double cutsq_custom = cutoff_custom * cutoff_custom;
-
-  // loop over atoms in full list
-
-  for (ii = 0; ii < inum_full; ii++) {
-    n = 0;
-    neighptr = ipage->vget();
-
-    // loop over parent full list
-
-    i = ilist_full[ii];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-
-    jlist = firstneigh_full[i];
-    jnum = numneigh_full[i];
-
-    for (jj = 0; jj < jnum; jj++) {
-      joriginal = jlist[jj];
-      j = joriginal & NEIGHMASK;
-      if (j > i) {
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx * delx + dely * dely + delz * delz;
-
-        if (rsq > cutsq_custom) continue;
-
-        neighptr[n++] = joriginal;
-      }
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = inum;
-  if (list->ghost) list->gnum = list->listfull->gnum;
-}
diff --git a/src/npair_halffull_trim_newtoff.h b/src/npair_halffull_trim_newtoff.h
deleted file mode 100644
index ca7726c837..0000000000
--- a/src/npair_halffull_trim_newtoff.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(halffull/trim/newtoff,
-           NPairHalffullTrimNewtoff,
-           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF |
-           NP_ORTHO | NP_TRI | NP_TRIM);
-
-NPairStyle(halffull/trim/newtoff/skip,
-           NPairHalffullTrimNewtoff,
-           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF |
-           NP_ORTHO | NP_TRI | NP_TRIM | NP_SKIP);
-
-NPairStyle(halffull/trim/newtoff/ghost,
-           NPairHalffullTrimNewtoff,
-           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF |
-           NP_ORTHO | NP_TRI | NP_TRIM | NP_GHOST);
-
-NPairStyle(halffull/trim/newtoff/skip/ghost,
-           NPairHalffullTrimNewtoff,
-           NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_HALF |
-           NP_ORTHO | NP_TRI | NP_TRIM | NP_SKIP | NP_GHOST);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALFFULL_TRIM_NEWTOFF_H
-#define LMP_NPAIR_HALFFULL_TRIM_NEWTOFF_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalffullTrimNewtoff : public NPair {
- public:
-  NPairHalffullTrimNewtoff(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_halffull_trim_newton.cpp b/src/npair_halffull_trim_newton.cpp
deleted file mode 100644
index 56cef00b25..0000000000
--- a/src/npair_halffull_trim_newton.cpp
+++ /dev/null
@@ -1,124 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_halffull_trim_newton.h"
-
-#include "atom.h"
-#include "domain.h"
-#include "error.h"
-#include "force.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairHalffullTrimNewton::NPairHalffullTrimNewton(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   build half list from full list
-   pair stored once if i,j are both owned and i < j
-   if j is ghost, only store if j coords are "above and to the right" of i
-   works if full list is a skip list
-------------------------------------------------------------------------- */
-
-void NPairHalffullTrimNewton::build(NeighList *list)
-{
-  int i, j, ii, jj, n, jnum, joriginal;
-  int *neighptr, *jlist;
-  double xtmp, ytmp, ztmp;
-  double delx, dely, delz, rsq;
-
-  const double delta = 0.01 * force->angstrom;
-  const int triclinic = domain->triclinic;
-
-  double **x = atom->x;
-  int nlocal = atom->nlocal;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int *ilist_full = list->listfull->ilist;
-  int *numneigh_full = list->listfull->numneigh;
-  int **firstneigh_full = list->listfull->firstneigh;
-  int inum_full = list->listfull->inum;
-
-  int inum = 0;
-  ipage->reset();
-
-  double cutsq_custom = cutoff_custom * cutoff_custom;
-
-  // loop over parent full list
-
-  for (ii = 0; ii < inum_full; ii++) {
-    n = 0;
-    neighptr = ipage->vget();
-
-    i = ilist_full[ii];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-
-    // loop over full neighbor list
-    // use i < j < nlocal to eliminate half the local/local interactions
-    // for triclinic, must use delta to eliminate half the local/ghost interactions
-    // cannot use I/J exact coord comparision as for orthog
-    //   b/c transforming orthog -> lambda -> orthog for ghost atoms
-    //   with an added PBC offset can shift all 3 coords by epsilon
-
-    jlist = firstneigh_full[i];
-    jnum = numneigh_full[i];
-
-    for (jj = 0; jj < jnum; jj++) {
-      joriginal = jlist[jj];
-      j = joriginal & NEIGHMASK;
-
-      if (j < nlocal) {
-        if (i > j) continue;
-      } else if (triclinic) {
-        if (fabs(x[j][2]-ztmp) > delta) {
-          if (x[j][2] < ztmp) continue;
-        } else if (fabs(x[j][1]-ytmp) > delta) {
-          if (x[j][1] < ytmp) continue;
-        } else {
-          if (x[j][0] < xtmp) continue;
-        }
-      } else {
-        if (x[j][2] < ztmp) continue;
-        if (x[j][2] == ztmp) {
-          if (x[j][1] < ytmp) continue;
-          if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
-        }
-      }
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx * delx + dely * dely + delz * delz;
-
-      if (rsq > cutsq_custom) continue;
-
-      neighptr[n++] = joriginal;
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
-  }
-  list->inum = inum;
-}
diff --git a/src/npair_halffull_trim_newton.h b/src/npair_halffull_trim_newton.h
deleted file mode 100644
index 5eb5aa3cd3..0000000000
--- a/src/npair_halffull_trim_newton.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(halffull/trim/newton,
-           NPairHalffullTrimNewton,
-           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
-           NP_ORTHO | NP_TRI | NP_TRIM);
-
-NPairStyle(halffull/trim/newton/skip,
-           NPairHalffullTrimNewton,
-           NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
-           NP_ORTHO | NP_TRI | NP_TRIM | NP_SKIP);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_HALFFULL_TRIM_NEWTON_H
-#define LMP_NPAIR_HALFFULL_TRIM_NEWTON_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairHalffullTrimNewton : public NPair {
- public:
-  NPairHalffullTrimNewton(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_multi.cpp b/src/npair_multi.cpp
new file mode 100644
index 0000000000..b5f813c0a8
--- /dev/null
+++ b/src/npair_multi.cpp
@@ -0,0 +1,301 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "npair_multi.h"
+
+#include "atom.h"
+#include "atom_vec.h"
+#include "domain.h"
+#include "error.h"
+#include "force.h"
+#include "molecule.h"
+#include "my_page.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+using namespace NeighConst;
+
+/* ---------------------------------------------------------------------- */
+
+template<int HALF, int NEWTON, int TRI, int SIZE, int ATOMONLY>
+NPairMulti<HALF, NEWTON, TRI, SIZE, ATOMONLY>::NPairMulti(LAMMPS *lmp) : NPair(lmp) {}
+
+/* ----------------------------------------------------------------------
+   multi stencil is icollection-jcollection dependent
+   Full:
+     binned neighbor list construction for all neighbors
+     every neighbor pair appears in list of both atoms i and j
+   Half + Newtoff:
+     binned neighbor list construction with partial Newton's 3rd law
+     each owned atom i checks own bin and other bins in stencil
+     pair stored once if i,j are both owned and i < j
+     pair stored by me if j is ghost (also stored by proc owning j)
+   Half + Newton:
+     binned neighbor list construction with full Newton's 3rd law
+     each owned atom i checks its own bin and other bins in Newton stencil
+     every pair stored exactly once by some processor
+------------------------------------------------------------------------- */
+
+template<int HALF, int NEWTON, int TRI, int SIZE, int ATOMONLY>
+void NPairMulti<HALF, NEWTON, TRI, SIZE, ATOMONLY>::build(NeighList *list)
+{
+  int i, j, jh, js, k, n, itype, jtype, ibin, jbin, icollection, jcollection, which, ns, imol, iatom, moltemplate;
+  tagint itag, jtag, tagprev;
+  double xtmp, ytmp, ztmp, delx, dely, delz, rsq, radsum, cut, cutsq;
+  int *neighptr, *s;
+
+  const double delta = 0.01 * force->angstrom;
+
+  int *collection = neighbor->collection;
+  double **x = atom->x;
+  double *radius = atom->radius;
+  int *type = atom->type;
+  int *mask = atom->mask;
+  tagint *tag = atom->tag;
+  tagint *molecule = atom->molecule;
+  tagint **special = atom->special;
+  int **nspecial = atom->nspecial;
+  int nlocal = atom->nlocal;
+  if (includegroup) nlocal = atom->nfirst;
+
+  int *molindex = atom->molindex;
+  int *molatom = atom->molatom;
+  Molecule **onemols = atom->avec->onemols;
+  if (!ATOMONLY) {
+    if (molecular == Atom::TEMPLATE)
+      moltemplate = 1;
+    else
+      moltemplate = 0;
+  }
+
+  int history = list->history;
+  int mask_history = 1 << HISTBITS;
+
+  int *ilist = list->ilist;
+  int *numneigh = list->numneigh;
+  int **firstneigh = list->firstneigh;
+  MyPage<int> *ipage = list->ipage;
+
+  int inum = 0;
+  ipage->reset();
+
+  for (i = 0; i < nlocal; i++) {
+    n = 0;
+    neighptr = ipage->vget();
+
+    itag = tag[i];
+    itype = type[i];
+    icollection = collection[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    if (!ATOMONLY) {
+      if (moltemplate) {
+        imol = molindex[i];
+        iatom = molatom[i];
+        tagprev = tag[i] - iatom - 1;
+      }
+    }
+
+    ibin = atom2bin[i];
+
+    // loop through stencils for all collections
+
+    for (jcollection = 0; jcollection < ncollections; jcollection++) {
+
+      // Use own bin for same collection
+      if (icollection == jcollection) jbin = ibin;
+      else jbin = coord2bin(x[i], jcollection);
+
+      s = stencil_multi[icollection][jcollection];
+      ns = nstencil_multi[icollection][jcollection];
+
+      for (k = 0; k < ns; k++) {
+        js = binhead_multi[jcollection][jbin + s[k]];
+
+        // For half-newton-ortho, first check self bin (k == 0, always half)
+        // if checking its own binlist, skip all before i in linked list
+        if (HALF && NEWTON && !TRI)
+          if ((k == 0) && (icollection == jcollection)) js = bins[i];
+
+        for (j = js; j >= 0; j = bins[j]) {
+          if (!HALF) {
+            // Full neighbor list, only uses full stencils
+            // only skip i = j
+            if (i == j) continue;
+          } else if (!NEWTON) {
+            // Half neighbor list, newton off, only uses full stencils
+            // only store pair if i < j
+            // stores own/own pairs only once
+            // stores own/ghost pairs on both procs
+            if (j <= i) continue;
+          } else if (TRI) {
+            // Half neighbor list, newton on, triclinic, only uses full stencils
+            // If different sizes -> full stencil (accept all, one-way search)
+            // If same size -> half stencil, exclude half of interactions
+            //     stencil is empty if i larger than j
+            //     stencil is full if i smaller than j
+            //     stencil is full if i same size as j
+            //   for i smaller than j:
+            //     must use itag/jtag to eliminate half the I/J interactions
+            //     cannot use I/J exact coord comparision
+            //       b/c transforming orthog -> lambda -> orthog for ghost atoms
+            //   with an added PBC offset can shift all 3 coords by epsilon
+
+            if (flag_same_multi[icollection][jcollection]) {
+              if (j <= i) continue;
+              if (j >= nlocal) {
+                jtag = tag[j];
+                if (itag > jtag) {
+                  if ((itag + jtag) % 2 == 0) continue;
+                } else if (itag < jtag) {
+                  if ((itag + jtag) % 2 == 1) continue;
+                } else {
+                  if (fabs(x[j][2] - ztmp) > delta) {
+                    if (x[j][2] < ztmp) continue;
+                  } else if (fabs(x[j][1] - ytmp) > delta) {
+                    if (x[j][1] < ytmp) continue;
+                  } else {
+                    if (x[j][0] < xtmp) continue;
+                  }
+                }
+              }
+            }
+          } else {
+            // Half neighbor list, newton on, orthonormal, uses a mix of stencils
+            // If different sizes -> full stencil (accept all, one-way search)
+            // If same size -> half stencil (first includes a self bin search)
+            if (k == 0 && flag_same_multi[icollection][jcollection]) {
+              // if same collection,
+              //   if j is owned atom, store it, since j is beyond i in linked list
+              //   if j is ghost, only store if j coords are "above and to the right" of i
+
+              // if different collections,
+              //   if j is owned atom, store it if j > i
+              //   if j is ghost, only store if j coords are "above and to the right" of i
+
+              if ((icollection != jcollection) && (j < i)) continue;
+
+              if (j >= nlocal) {
+                if (x[j][2] < ztmp) continue;
+                if (x[j][2] == ztmp) {
+                  if (x[j][1] < ytmp) continue;
+                  if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
+                }
+              }
+            }
+          }
+
+          jtype = type[j];
+          if (exclude && exclusion(i, j, itype, jtype, mask, molecule)) continue;
+
+          delx = xtmp - x[j][0];
+          dely = ytmp - x[j][1];
+          delz = ztmp - x[j][2];
+          rsq = delx * delx + dely * dely + delz * delz;
+
+          if (SIZE) {
+            radsum = radius[i] + radius[j];
+            cut = radsum + skin;
+            cutsq = cut * cut;
+
+            if (ATOMONLY) {
+              if (rsq <= cutsq) {
+                jh = j;
+                if (history && rsq < (radsum * radsum))
+                  jh = jh ^ mask_history;
+                neighptr[n++] = jh;
+              }
+            } else {
+              if (rsq <= cutsq) {
+                jh = j;
+                if (history && rsq < (radsum * radsum))
+                  jh = jh ^ mask_history;
+
+                if (molecular != Atom::ATOMIC) {
+                  if (!moltemplate)
+                    which = find_special(special[i], nspecial[i], tag[j]);
+                  else if (imol >= 0)
+                    which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom],
+                                         tag[j] - tagprev);
+                  else
+                    which = 0;
+                  if (which == 0)
+                    neighptr[n++] = jh;
+                  else if (domain->minimum_image_check(delx, dely, delz))
+                    neighptr[n++] = jh;
+                  else if (which > 0)
+                    neighptr[n++] = jh ^ (which << SBBITS);
+                } else
+                  neighptr[n++] = jh;
+              }
+            }
+          } else {
+            if (ATOMONLY) {
+              if (rsq <= cutneighsq[itype][jtype]) neighptr[n++] = j;
+            } else {
+              if (rsq <= cutneighsq[itype][jtype]) {
+                if (molecular != Atom::ATOMIC) {
+                  if (!moltemplate)
+                    which = find_special(special[i], nspecial[i], tag[j]);
+                  else if (imol >= 0)
+                    which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom],
+                                         tag[j] - tagprev);
+                  else
+                    which = 0;
+                  if (which == 0)
+                    neighptr[n++] = j;
+                  else if (domain->minimum_image_check(delx, dely, delz))
+                    neighptr[n++] = j;
+                  else if (which > 0)
+                    neighptr[n++] = j ^ (which << SBBITS);
+                } else
+                  neighptr[n++] = j;
+              }
+            }
+          }
+        }
+      }
+    }
+
+    ilist[inum++] = i;
+    firstneigh[i] = neighptr;
+    numneigh[i] = n;
+    ipage->vgot(n);
+    if (ipage->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
+  }
+
+  list->inum = inum;
+  list->gnum = 0;
+}
+
+namespace LAMMPS_NS {
+template class NPairMulti<0,1,0,0,0>;
+template class NPairMulti<1,0,0,0,0>;
+template class NPairMulti<1,1,0,0,0>;
+template class NPairMulti<1,1,1,0,0>;
+template class NPairMulti<0,1,0,1,0>;
+template class NPairMulti<1,0,0,1,0>;
+template class NPairMulti<1,1,0,1,0>;
+template class NPairMulti<1,1,1,1,0>;
+template class NPairMulti<0,1,0,0,1>;
+template class NPairMulti<1,0,0,0,1>;
+template class NPairMulti<1,1,0,0,1>;
+template class NPairMulti<1,1,1,0,1>;
+template class NPairMulti<0,1,0,1,1>;
+template class NPairMulti<1,0,0,1,1>;
+template class NPairMulti<1,1,0,1,1>;
+template class NPairMulti<1,1,1,1,1>;
+}
diff --git a/src/npair_multi.h b/src/npair_multi.h
new file mode 100644
index 0000000000..a82352f840
--- /dev/null
+++ b/src/npair_multi.h
@@ -0,0 +1,119 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef NPAIR_CLASS
+// clang-format off
+typedef NPairMulti<0, 1, 0, 0, 0> NPairFullMulti;
+NPairStyle(full/multi,
+           NPairFullMulti,
+           NP_FULL | NP_MULTI | NP_MOLONLY |
+           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairMulti<1, 0, 0, 0, 0> NPairHalfMultiNewtoff;
+NPairStyle(half/multi/newtoff,
+           NPairHalfMultiNewtoff,
+           NP_HALF | NP_MULTI | NP_MOLONLY | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairMulti<1, 1, 0, 0, 0> NPairHalfMultiNewton;
+NPairStyle(half/multi/newton,
+           NPairHalfMultiNewton,
+           NP_HALF | NP_MULTI | NP_MOLONLY | NP_NEWTON | NP_ORTHO);
+
+typedef NPairMulti<1, 1, 1, 0, 0> NPairHalfMultiNewtonTri;
+NPairStyle(half/multi/newton/tri,
+           NPairHalfMultiNewtonTri,
+           NP_HALF | NP_MULTI | NP_MOLONLY | NP_NEWTON | NP_TRI);
+
+typedef NPairMulti<0, 1, 0, 1, 0> NPairFullSizeMulti;
+NPairStyle(full/size/multi,
+           NPairFullSizeMulti,
+           NP_FULL | NP_SIZE | NP_MULTI | NP_MOLONLY |
+           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairMulti<1, 0, 0, 1, 0> NPairHalfSizeMultiNewtoff;
+NPairStyle(half/size/multi/newtoff,
+           NPairHalfSizeMultiNewtoff,
+           NP_HALF | NP_SIZE | NP_MULTI | NP_MOLONLY | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairMulti<1, 1, 0, 1, 0> NPairHalfSizeMultiNewton;
+NPairStyle(half/size/multi/newton,
+           NPairHalfSizeMultiNewton,
+           NP_HALF | NP_SIZE | NP_MULTI | NP_MOLONLY | NP_NEWTON | NP_ORTHO);
+
+typedef NPairMulti<1, 1, 1, 1, 0> NPairHalfSizeMultiNewtonTri;
+NPairStyle(half/size/multi/newton/tri,
+           NPairHalfSizeMultiNewtonTri,
+           NP_HALF | NP_SIZE | NP_MULTI | NP_MOLONLY | NP_NEWTON | NP_TRI);
+
+typedef NPairMulti<0, 1, 0, 0, 1> NPairFullMultiAtomonly;
+NPairStyle(full/multi/atomonly,
+           NPairFullMultiAtomonly,
+           NP_FULL | NP_MULTI | NP_ATOMONLY |
+           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairMulti<1, 0, 0, 0, 1> NPairHalfMultiAtomonlyNewtoff;
+NPairStyle(half/multi/atomonly/newtoff,
+           NPairHalfMultiAtomonlyNewtoff,
+           NP_HALF | NP_MULTI | NP_ATOMONLY | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairMulti<1, 1, 0, 0, 1> NPairHalfMultiAtomonlyNewton;
+NPairStyle(half/multi/atomonly/newton,
+           NPairHalfMultiAtomonlyNewton,
+           NP_HALF | NP_MULTI | NP_ATOMONLY | NP_NEWTON | NP_ORTHO);
+
+typedef NPairMulti<1, 1, 1, 0, 1> NPairHalfMultiAtomonlyNewtonTri;
+NPairStyle(half/multi/atomonly/newton/tri,
+           NPairHalfMultiAtomonlyNewtonTri,
+           NP_HALF | NP_MULTI | NP_ATOMONLY | NP_NEWTON | NP_TRI);
+
+typedef NPairMulti<0, 1, 0, 1, 1> NPairFullSizeMultiAtomonly;
+NPairStyle(full/size/multi/atomonly,
+           NPairFullSizeMultiAtomonly,
+           NP_FULL | NP_SIZE | NP_MULTI | NP_ATOMONLY |
+           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairMulti<1, 0, 0, 1, 1> NPairHalfSizeMultiAtomonlyNewtoff;
+NPairStyle(half/size/multi/atomonly/newtoff,
+           NPairHalfSizeMultiAtomonlyNewtoff,
+           NP_HALF | NP_SIZE | NP_MULTI | NP_ATOMONLY | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairMulti<1, 1, 0, 1, 1> NPairHalfSizeMultiAtomonlyNewton;
+NPairStyle(half/size/multi/atomonly/newton,
+           NPairHalfSizeMultiAtomonlyNewton,
+           NP_HALF | NP_SIZE | NP_MULTI | NP_ATOMONLY | NP_NEWTON | NP_ORTHO);
+
+typedef NPairMulti<1, 1, 1, 1, 1> NPairHalfSizeMultiAtomonlyNewtonTri;
+NPairStyle(half/size/multi/atomonly/newton/tri,
+           NPairHalfSizeMultiAtomonlyNewtonTri,
+           NP_HALF | NP_SIZE | NP_MULTI | NP_ATOMONLY | NP_NEWTON | NP_TRI);
+// clang-format on
+#else
+
+#ifndef LMP_NPAIR_MULTI_H
+#define LMP_NPAIR_MULTI_H
+
+#include "npair.h"
+
+namespace LAMMPS_NS {
+
+template<int HALF, int NEWTON, int TRI, int SIZE, int ATOMONLY>
+class NPairMulti : public NPair {
+ public:
+  NPairMulti(class LAMMPS *);
+  void build(class NeighList *) override;
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/npair_multi_old.cpp b/src/npair_multi_old.cpp
new file mode 100644
index 0000000000..a4ca1e7361
--- /dev/null
+++ b/src/npair_multi_old.cpp
@@ -0,0 +1,255 @@
+// clang-format off
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "npair_multi_old.h"
+
+#include "atom.h"
+#include "atom_vec.h"
+#include "domain.h"
+#include "error.h"
+#include "force.h"
+#include "molecule.h"
+#include "my_page.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+template<int HALF, int NEWTON, int TRI, int SIZE>
+NPairMultiOld<HALF, NEWTON, TRI, SIZE>::NPairMultiOld(LAMMPS *lmp) : NPair(lmp) {}
+
+/* ----------------------------------------------------------------------
+  multi/old-type stencil is itype dependent and is distance checked
+  Full:
+    binned neighbor list construction for all neighbors
+    multi-type stencil is itype dependent and is distance checked
+    every neighbor pair appears in list of both atoms i and j
+  Half + newtoff:
+    binned neighbor list construction with partial Newton's 3rd law
+    each owned atom i checks own bin and other bins in stencil
+    multi-type stencil is itype dependent and is distance checked
+    pair stored once if i,j are both owned and i < j
+    pair stored by me if j is ghost (also stored by proc owning j)
+  Half + newton:
+    binned neighbor list construction with full Newton's 3rd law
+    each owned atom i checks its own bin and other bins in Newton stencil
+    multi-type stencil is itype dependent and is distance checked
+    every pair stored exactly once by some processor
+------------------------------------------------------------------------- */
+
+template<int HALF, int NEWTON, int TRI, int SIZE>
+void NPairMultiOld<HALF, NEWTON, TRI, SIZE>::build(NeighList *list)
+{
+  int i, j, jh, k, n, itype, jtype, ibin, bin_start, which, ns, imol, iatom, moltemplate;
+  tagint itag, jtag, tagprev;
+  double xtmp, ytmp, ztmp, delx, dely, delz, rsq, radsum, cut, cutsq;
+  int *neighptr, *s;
+  double *cutnsq, *distsq;
+
+  const double delta = 0.01 * force->angstrom;
+
+  double **x = atom->x;
+  double *radius = atom->radius;
+  int *type = atom->type;
+  int *mask = atom->mask;
+  tagint *tag = atom->tag;
+  tagint *molecule = atom->molecule;
+  tagint **special = atom->special;
+  int **nspecial = atom->nspecial;
+  int nlocal = atom->nlocal;
+  if (includegroup) nlocal = atom->nfirst;
+
+  int *molindex = atom->molindex;
+  int *molatom = atom->molatom;
+  Molecule **onemols = atom->avec->onemols;
+  if (molecular == Atom::TEMPLATE)
+    moltemplate = 1;
+  else
+    moltemplate = 0;
+
+  int history = list->history;
+  int mask_history = 1 << HISTBITS;
+
+  int *ilist = list->ilist;
+  int *numneigh = list->numneigh;
+  int **firstneigh = list->firstneigh;
+  MyPage<int> *ipage = list->ipage;
+
+  int inum = 0;
+  ipage->reset();
+
+  for (i = 0; i < nlocal; i++) {
+    n = 0;
+    neighptr = ipage->vget();
+
+    itag = tag[i];
+    itype = type[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    if (moltemplate) {
+      imol = molindex[i];
+      iatom = molatom[i];
+      tagprev = tag[i] - iatom - 1;
+    }
+
+    ibin = atom2bin[i];
+    s = stencil_multi_old[itype];
+    distsq = distsq_multi_old[itype];
+    cutnsq = cutneighsq[itype];
+    ns = nstencil_multi_old[itype];
+    for (k = 0; k < ns; k++) {
+      bin_start = binhead[ibin + s[k]];
+      if (HALF && NEWTON && (!TRI)) {
+        if (k == 0) {
+          // Half neighbor list, newton on, orthonormal
+          // loop over rest of atoms in i's bin, ghosts are at end of linked list
+          bin_start = bins[i];
+        }
+      }
+
+      for (j = bin_start; j >= 0; j = bins[j]) {
+        if (!HALF) {
+          // Full neighbor list
+          // only skip i = j
+          if (i == j) continue;
+        } else if (!NEWTON) {
+          // Half neighbor list, newton off
+          // only store pair if i < j
+          // stores own/own pairs only once
+          // stores own/ghost pairs on both procs
+          if (j <= i) continue;
+        } else if (TRI) {
+          // Half neighbor list, newton on, triclinic
+          // for triclinic, bin stencil is full in all 3 dims
+          // must use itag/jtag to eliminate half the I/J interactions
+          // cannot use I/J exact coord comparision
+          //   b/c transforming orthog -> lambda -> orthog for ghost atoms
+          //   with an added PBC offset can shift all 3 coords by epsilon
+          if (j <= i) continue;
+          if (j >= nlocal) {
+            jtag = tag[j];
+            if (itag > jtag) {
+              if ((itag + jtag) % 2 == 0) continue;
+            } else if (itag < jtag) {
+              if ((itag + jtag) % 2 == 1) continue;
+            } else {
+              if (fabs(x[j][2] - ztmp) > delta) {
+                if (x[j][2] < ztmp) continue;
+              } else if (fabs(x[j][1] - ytmp) > delta) {
+                if (x[j][1] < ytmp) continue;
+              } else {
+                if (x[j][0] < xtmp) continue;
+              }
+            }
+          }
+        } else {
+          // Half neighbor list, newton on, orthonormal
+          // store every pair for every bin in stencil,except for i's bin
+
+          if (k == 0) {
+            // if j is owned atom, store it, since j is beyond i in linked list
+            // if j is ghost, only store if j coords are "above and to the "right" of i
+            if (j >= nlocal) {
+              if (x[j][2] < ztmp) continue;
+              if (x[j][2] == ztmp) {
+                if (x[j][1] < ytmp) continue;
+                if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
+              }
+            }
+          }
+        }
+
+        jtype = type[j];
+        if (cutnsq[jtype] < distsq[k]) continue;
+
+        if (exclude && exclusion(i, j, itype, jtype, mask, molecule)) continue;
+
+        delx = xtmp - x[j][0];
+        dely = ytmp - x[j][1];
+        delz = ztmp - x[j][2];
+        rsq = delx * delx + dely * dely + delz * delz;
+
+        if (SIZE) {
+          radsum = radius[i] + radius[j];
+          cut = radsum + skin;
+          cutsq = cut * cut;
+
+          if (rsq <= cutsq) {
+            jh = j;
+            if (history && rsq < radsum * radsum)
+              jh = jh ^ mask_history;
+
+            if (molecular != Atom::ATOMIC) {
+              if (!moltemplate)
+                which = find_special(special[i], nspecial[i], tag[j]);
+              else if (imol >= 0)
+                which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom],
+                                     tag[j] - tagprev);
+              else
+                which = 0;
+              if (which == 0)
+                neighptr[n++] = jh;
+              else if (domain->minimum_image_check(delx, dely, delz))
+                neighptr[n++] = jh;
+              else if (which > 0)
+                neighptr[n++] = jh ^ (which << SBBITS);
+            } else
+              neighptr[n++] = jh;
+          }
+        } else {
+          if (rsq <= cutneighsq[itype][jtype]) {
+            if (molecular != Atom::ATOMIC) {
+              if (!moltemplate)
+                which = find_special(special[i], nspecial[i], tag[j]);
+              else if (imol >= 0)
+                which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom],
+                                     tag[j] - tagprev);
+              else
+                which = 0;
+              if (which == 0)
+                neighptr[n++] = j;
+              else if (domain->minimum_image_check(delx, dely, delz))
+                neighptr[n++] = j;
+              else if (which > 0)
+                neighptr[n++] = j ^ (which << SBBITS);
+            } else
+              neighptr[n++] = j;
+          }
+        }
+      }
+    }
+
+    ilist[inum++] = i;
+    firstneigh[i] = neighptr;
+    numneigh[i] = n;
+    ipage->vgot(n);
+    if (ipage->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
+  }
+
+  list->inum = inum;
+  list->gnum = 0;
+}
+
+namespace LAMMPS_NS {
+template class NPairMultiOld<0,1,0,0>;
+template class NPairMultiOld<1,0,0,0>;
+template class NPairMultiOld<1,1,0,0>;
+template class NPairMultiOld<1,1,1,0>;
+template class NPairMultiOld<0,1,0,1>;
+template class NPairMultiOld<1,0,0,1>;
+template class NPairMultiOld<1,1,0,1>;
+template class NPairMultiOld<1,1,1,1>;
+}
diff --git a/src/npair_multi_old.h b/src/npair_multi_old.h
new file mode 100644
index 0000000000..f01844ed57
--- /dev/null
+++ b/src/npair_multi_old.h
@@ -0,0 +1,77 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef NPAIR_CLASS
+// clang-format off
+typedef NPairMultiOld<0, 1, 0, 0> NPairFullMultiOld;
+NPairStyle(full/multi/old,
+           NPairFullMultiOld,
+           NP_FULL | NP_MULTI_OLD |
+           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairMultiOld<1, 0, 0, 0> NPairHalfMultiOldNewtoff;
+NPairStyle(half/multi/old/newtoff,
+           NPairHalfMultiOldNewtoff,
+           NP_HALF | NP_MULTI_OLD | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairMultiOld<1, 1, 0, 0> NPairHalfMultiOldNewton;
+NPairStyle(half/multi/old/newton,
+           NPairHalfMultiOldNewton,
+           NP_HALF | NP_MULTI_OLD | NP_NEWTON | NP_ORTHO);
+
+typedef NPairMultiOld<1, 1, 1, 0> NPairHalfMultiOldNewtonTri;
+NPairStyle(half/multi/old/newton/tri,
+           NPairHalfMultiOldNewtonTri,
+           NP_HALF | NP_MULTI_OLD | NP_NEWTON | NP_TRI);
+
+typedef NPairMultiOld<0, 1, 0, 1> NPairFullSizeMultiOld;
+NPairStyle(full/size/multi/old,
+           NPairFullSizeMultiOld,
+           NP_FULL | NP_SIZE | NP_MULTI_OLD |
+           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairMultiOld<1, 0, 0, 1> NPairHalfSizeMultiOldNewtoff;
+NPairStyle(half/size/multi/old/newtoff,
+           NPairHalfSizeMultiOldNewtoff,
+           NP_HALF | NP_SIZE | NP_MULTI_OLD | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairMultiOld<1, 1, 0, 1> NPairHalfSizeMultiOldNewton;
+NPairStyle(half/size/multi/old/newton,
+           NPairHalfSizeMultiOldNewton,
+           NP_HALF | NP_SIZE | NP_MULTI_OLD | NP_NEWTON | NP_ORTHO);
+
+typedef NPairMultiOld<1, 1, 1, 1> NPairHalfSizeMultiOldNewtonTri;
+NPairStyle(half/size/multi/old/newton/tri,
+           NPairHalfSizeMultiOldNewtonTri,
+           NP_HALF | NP_SIZE | NP_MULTI_OLD | NP_NEWTON | NP_TRI);
+// clang-format on
+#else
+
+#ifndef LMP_NPAIR_MULTI_OLD_H
+#define LMP_NPAIR_MULTI_OLD_H
+
+#include "npair.h"
+
+namespace LAMMPS_NS {
+
+template<int HALF, int NEWTON, int TRI, int SIZE>
+class NPairMultiOld : public NPair {
+ public:
+  NPairMultiOld(class LAMMPS *);
+  void build(class NeighList *) override;
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/npair_nsq.cpp b/src/npair_nsq.cpp
new file mode 100644
index 0000000000..c2f43d9cb0
--- /dev/null
+++ b/src/npair_nsq.cpp
@@ -0,0 +1,230 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "npair_nsq.h"
+
+#include "atom.h"
+#include "atom_vec.h"
+#include "domain.h"
+#include "error.h"
+#include "force.h"
+#include "group.h"
+#include "molecule.h"
+#include "my_page.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+
+using namespace LAMMPS_NS;
+using namespace NeighConst;
+
+/* ---------------------------------------------------------------------- */
+
+template<int HALF, int NEWTON, int TRI, int SIZE>
+NPairNsq<HALF, NEWTON, TRI, SIZE>::NPairNsq(LAMMPS *lmp) : NPair(lmp) {}
+
+/* ----------------------------------------------------------------------
+   Full:
+     N^2 search for all neighbors
+     every neighbor pair appears in list of both atoms i and j
+   Half + Newtoff:
+     N^2 / 2 search for neighbor pairs with partial Newton's 3rd law
+     pair stored once if i,j are both owned and i < j
+     pair stored by me if j is ghost (also stored by proc owning j)
+   Half + Newton:
+     N^2 / 2 search for neighbor pairs with full Newton's 3rd law
+     every pair stored exactly once by some processor
+     decision on ghost atoms based on itag, jtag tests
+   Half + Newton + Tri:
+     use itag/jtap comparision to eliminate half the interactions
+     for triclinic, must use delta to eliminate half the I/J interactions
+     cannot use I/J exact coord comparision as for orthog
+     b/c transforming orthog -> lambda -> orthog for ghost atoms
+     with an added PBC offset can shift all 3 coords by epsilon
+------------------------------------------------------------------------- */
+
+template<int HALF, int NEWTON, int TRI, int SIZE>
+void NPairNsq<HALF, NEWTON, TRI, SIZE>::build(NeighList *list)
+{
+  int i, j, jh, jstart, n, itype, jtype, which, bitmask, imol, iatom, moltemplate;
+  tagint itag, jtag, tagprev;
+  double xtmp, ytmp, ztmp, delx, dely, delz, rsq, radsum, cut, cutsq;
+  int *neighptr;
+
+  const double delta = 0.01 * force->angstrom;
+
+  double **x = atom->x;
+  double *radius = atom->radius;
+  int *type = atom->type;
+  int *mask = atom->mask;
+  tagint *tag = atom->tag;
+  tagint *molecule = atom->molecule;
+  tagint **special = atom->special;
+  int **nspecial = atom->nspecial;
+  int nlocal = atom->nlocal;
+  int nall = nlocal + atom->nghost;
+  if (includegroup) {
+    nlocal = atom->nfirst;
+    bitmask = group->bitmask[includegroup];
+  }
+
+  int *molindex = atom->molindex;
+  int *molatom = atom->molatom;
+  Molecule **onemols = atom->avec->onemols;
+  if (molecular == Atom::TEMPLATE)
+    moltemplate = 1;
+  else
+    moltemplate = 0;
+
+  int history = list->history;
+  int mask_history = 1 << HISTBITS;
+
+  int *ilist = list->ilist;
+  int *numneigh = list->numneigh;
+  int **firstneigh = list->firstneigh;
+  MyPage<int> *ipage = list->ipage;
+
+  int inum = 0;
+  ipage->reset();
+
+  for (i = 0; i < nlocal; i++) {
+    n = 0;
+    neighptr = ipage->vget();
+
+    itag = tag[i];
+    itype = type[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    if (moltemplate) {
+      imol = molindex[i];
+      iatom = molatom[i];
+      tagprev = tag[i] - iatom - 1;
+    }
+
+    // Full: loop over all atoms, owned and ghost, skip i = j
+    // Half: loop over remaining atoms, owned and ghost
+    //   Newtoff: only store pair if i < j
+    //   Newton: itag = jtag is possible for long cutoffs that include images of self
+
+    if (!HALF) jstart = 0;
+    else jstart = i + 1;
+
+    for (j = jstart; j < nall; j++) {
+      if (includegroup && !(mask[j] & bitmask)) continue;
+
+      if (!HALF) {
+        // Full neighbor list
+        if (i == j) continue;
+      } else if (NEWTON) {
+        // Half neighbor list, newton on
+        if (j >= nlocal) {
+          jtag = tag[j];
+          if (itag > jtag) {
+            if ((itag + jtag) % 2 == 0) continue;
+          } else if (itag < jtag) {
+            if ((itag + jtag) % 2 == 1) continue;
+          } else if (TRI) {
+            if (fabs(x[j][2] - ztmp) > delta) {
+              if (x[j][2] < ztmp) continue;
+            } else if (fabs(x[j][1] - ytmp) > delta) {
+              if (x[j][1] < ytmp) continue;
+            } else {
+              if (x[j][0] < xtmp) continue;
+            }
+          } else {
+            if (x[j][2] < ztmp) continue;
+            if (x[j][2] == ztmp) {
+              if (x[j][1] < ytmp) continue;
+              if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
+            }
+          }
+        }
+      }
+
+      jtype = type[j];
+      if (exclude && exclusion(i, j, itype, jtype, mask, molecule)) continue;
+
+      delx = xtmp - x[j][0];
+      dely = ytmp - x[j][1];
+      delz = ztmp - x[j][2];
+      rsq = delx * delx + dely * dely + delz * delz;
+
+      if (SIZE) {
+        radsum = radius[i] + radius[j];
+        cut = radsum + skin;
+        cutsq = cut * cut;
+
+        if (rsq <= cutsq) {
+          jh = j;
+          if (history && rsq < radsum * radsum) jh = jh ^ mask_history;
+
+          if (molecular != Atom::ATOMIC) {
+            if (!moltemplate)
+              which = find_special(special[i], nspecial[i], tag[j]);
+            else if (imol >= 0)
+              which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom],
+                                   tag[j] - tagprev);
+            else
+              which = 0;
+            if (which == 0)
+              neighptr[n++] = jh;
+            else if (domain->minimum_image_check(delx, dely, delz))
+              neighptr[n++] = jh;
+            else if (which > 0)
+              neighptr[n++] = jh ^ (which << SBBITS);
+          } else
+            neighptr[n++] = jh;
+        }
+      } else {
+        if (rsq <= cutneighsq[itype][jtype]) {
+          if (molecular != Atom::ATOMIC) {
+            if (!moltemplate)
+              which = find_special(special[i], nspecial[i], tag[j]);
+            else if (imol >= 0)
+              which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom],
+                                   tag[j] - tagprev);
+            else
+              which = 0;
+            if (which == 0)
+              neighptr[n++] = j;
+            else if (domain->minimum_image_check(delx, dely, delz))
+              neighptr[n++] = j;
+            else if (which > 0)
+              neighptr[n++] = j ^ (which << SBBITS);
+          } else
+            neighptr[n++] = j;
+        }
+      }
+    }
+
+    ilist[inum++] = i;
+    firstneigh[i] = neighptr;
+    numneigh[i] = n;
+    ipage->vgot(n);
+    if (ipage->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
+  }
+
+  list->inum = inum;
+  if (!HALF) list->gnum = 0;
+}
+
+namespace LAMMPS_NS {
+template class NPairNsq<0,1,0,0>;
+template class NPairNsq<1,0,0,0>;
+template class NPairNsq<1,1,0,0>;
+template class NPairNsq<1,1,1,0>;
+template class NPairNsq<0,1,0,1>;
+template class NPairNsq<1,0,0,1>;
+template class NPairNsq<1,1,0,1>;
+template class NPairNsq<1,1,1,1>;
+}
diff --git a/src/npair_nsq.h b/src/npair_nsq.h
new file mode 100644
index 0000000000..7c4a22e1a7
--- /dev/null
+++ b/src/npair_nsq.h
@@ -0,0 +1,75 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef NPAIR_CLASS
+// clang-format off
+typedef NPairNsq<0, 1, 0, 0> NPairFullNsq;
+NPairStyle(full/nsq,
+           NPairFullNsq,
+           NP_FULL | NP_NSQ | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairNsq<1, 0, 0, 0> NPairHalfNsqNewtoff;
+NPairStyle(half/nsq/newtoff,
+           NPairHalfNsqNewtoff,
+           NP_HALF | NP_NSQ | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairNsq<1, 1, 0, 0> NPairHalfNsqNewton;
+NPairStyle(half/nsq/newton,
+           NPairHalfNsqNewton,
+           NP_HALF | NP_NSQ | NP_NEWTON | NP_ORTHO);
+
+typedef NPairNsq<1, 1, 1, 0> NPairHalfNsqNewtonTri;
+NPairStyle(half/nsq/newton/tri,
+           NPairHalfNsqNewtonTri,
+           NP_HALF | NP_NSQ | NP_NEWTON | NP_TRI);
+
+typedef NPairNsq<0, 1, 0, 1> NPairFullSizeNsq;
+NPairStyle(full/size/nsq,
+           NPairFullSizeNsq,
+           NP_FULL | NP_SIZE | NP_NSQ | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairNsq<1, 0, 0, 1> NPairHalfSizeNsqNewtoff;
+NPairStyle(half/size/nsq/newtoff,
+           NPairHalfSizeNsqNewtoff,
+           NP_HALF | NP_SIZE | NP_NSQ | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairNsq<1, 1, 0, 1> NPairHalfSizeNsqNewton;
+NPairStyle(half/size/nsq/newton,
+           NPairHalfSizeNsqNewton,
+           NP_HALF | NP_SIZE | NP_NSQ | NP_NEWTON | NP_ORTHO);
+
+typedef NPairNsq<1, 1, 1, 1> NPairHalfSizeNsqNewtonTri;
+NPairStyle(half/size/nsq/newton/tri,
+           NPairHalfSizeNsqNewtonTri,
+           NP_HALF | NP_SIZE | NP_NSQ | NP_NEWTON | NP_TRI);
+// clang-format on
+#else
+
+#ifndef LMP_NPAIR_NSQ_H
+#define LMP_NPAIR_NSQ_H
+
+#include "npair.h"
+
+namespace LAMMPS_NS {
+
+template<int HALF, int NEWTON, int TRI, int SIZE>
+class NPairNsq : public NPair {
+ public:
+  NPairNsq(class LAMMPS *);
+  void build(class NeighList *) override;
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/npair_half_nsq_newtoff_ghost.cpp b/src/npair_nsq_ghost.cpp
similarity index 65%
rename from src/npair_half_nsq_newtoff_ghost.cpp
rename to src/npair_nsq_ghost.cpp
index cef9d5203c..47009deff7 100644
--- a/src/npair_half_nsq_newtoff_ghost.cpp
+++ b/src/npair_nsq_ghost.cpp
@@ -12,33 +12,42 @@
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
-#include "npair_half_nsq_newtoff_ghost.h"
+#include "npair_nsq_ghost.h"
 #include "neigh_list.h"
 #include "atom.h"
 #include "atom_vec.h"
 #include "group.h"
 #include "molecule.h"
+#include "neighbor.h"
 #include "domain.h"
 #include "my_page.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
+using namespace NeighConst;
 
 /* ---------------------------------------------------------------------- */
 
-NPairHalfNsqNewtoffGhost::NPairHalfNsqNewtoffGhost(LAMMPS *lmp) : NPair(lmp) {}
+template<int HALF>
+NPairNsqGhost<HALF>::NPairNsqGhost(LAMMPS *lmp) : NPair(lmp) {}
 
 /* ----------------------------------------------------------------------
-   N^2 / 2 search for neighbor pairs with partial Newton's 3rd law
-   include neighbors of ghost atoms, but no "special neighbors" for ghosts
-   pair stored once if i,j are both owned and i < j
-   pair stored by me if i owned and j ghost (also stored by proc owning j)
-   pair stored once if i,j are both ghost and i < j
+   Full:
+     N^2 search for all neighbors
+     include neighbors of ghost atoms, but no "special neighbors" for ghosts
+     every neighbor pair appears in list of both atoms i and j
+   Half + Newtoff:
+     N^2 / 2 search for neighbor pairs with partial Newton's 3rd law
+     include neighbors of ghost atoms, but no "special neighbors" for ghosts
+     pair stored once if i,j are both owned and i < j
+     pair stored by me if i owned and j ghost (also stored by proc owning j)
+     pair stored once if i,j are both ghost and i < j
 ------------------------------------------------------------------------- */
 
-void NPairHalfNsqNewtoffGhost::build(NeighList *list)
+template<int HALF>
+void NPairNsqGhost<HALF>::build(NeighList *list)
 {
-  int i,j,n,itype,jtype,which,bitmask,imol,iatom,moltemplate;
+  int i,j,jstart,n,itype,jtype,which,bitmask,imol,iatom,moltemplate;
   tagint tagprev;
   double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
   int *neighptr;
@@ -72,7 +81,6 @@ void NPairHalfNsqNewtoffGhost::build(NeighList *list)
   ipage->reset();
 
   // loop over owned & ghost atoms, storing neighbors
-
   for (i = 0; i < nall; i++) {
     n = 0;
     neighptr = ipage->vget();
@@ -87,23 +95,33 @@ void NPairHalfNsqNewtoffGhost::build(NeighList *list)
       tagprev = tag[i] - iatom - 1;
     }
 
-    // loop over remaining atoms, owned and ghost
-    // only store pair if i < j
-    // stores own/own pairs only once
-    // stores own/ghost pairs with owned atom only, on both procs
-    // stores ghost/ghost pairs only once
+    // loop over all atoms, owned and ghost
+    // Full:
+    //   skip i = j
+    // Half:
+    //   only store pair if i < j
+    //   stores own/own pairs only once
+    //   stores own/ghost pairs with owned atom only, on both procs
+    //   stores ghost/ghost pairs only once
     // no molecular test when i = ghost atom
 
+    if (HALF) jstart = i + 1;
+    else jstart = 0;
+
     if (i < nlocal) {
-      for (j = i+1; j < nall; j++) {
-        if (includegroup && !(mask[j] & bitmask)) continue;
+      for (j = jstart; j < nall; j++) {
+        if (includegroup && !(mask[j] & bitmask)) continue; // JTC: missing in original full version
+        if (!HALF) {
+          if (i == j) continue;
+        }
+
         jtype = type[j];
         if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
 
         delx = xtmp - x[j][0];
         dely = ytmp - x[j][1];
         delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
+        rsq = delx * delx + dely * dely + delz * delz;
 
         if (rsq <= cutneighsq[itype][jtype]) {
           if (molecular != Atom::ATOMIC) {
@@ -121,19 +139,26 @@ void NPairHalfNsqNewtoffGhost::build(NeighList *list)
           } else neighptr[n++] = j;
         }
       }
-
     } else {
-      for (j = i+1; j < nall; j++) {
-        if (includegroup && !(mask[j] & bitmask)) continue;
+      for (j = jstart; j < nall; j++) {
+        if (includegroup && !(mask[j] & bitmask)) continue; // JTC: missing in original full version
+        if (!HALF) {
+          if (i == j) continue;
+        }
+
         jtype = type[j];
         if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
 
         delx = xtmp - x[j][0];
         dely = ytmp - x[j][1];
         delz = ztmp - x[j][2];
-        rsq = delx*delx + dely*dely + delz*delz;
+        rsq = delx * delx + dely * dely + delz * delz;
 
-        if (rsq <= cutneighsq[itype][jtype]) neighptr[n++] = j;
+        if (HALF) {
+          if (rsq <= cutneighsq[itype][jtype]) neighptr[n++] = j;
+        } else {
+          if (rsq <= cutneighghostsq[itype][jtype]) neighptr[n++] = j;
+        }
       }
     }
 
@@ -148,3 +173,8 @@ void NPairHalfNsqNewtoffGhost::build(NeighList *list)
   list->inum = atom->nlocal;
   list->gnum = inum - atom->nlocal;
 }
+
+namespace LAMMPS_NS {
+template class NPairNsqGhost<0>;
+template class NPairNsqGhost<1>;
+}
diff --git a/src/npair_half_nsq_newtoff_ghost.h b/src/npair_nsq_ghost.h
similarity index 71%
rename from src/npair_half_nsq_newtoff_ghost.h
rename to src/npair_nsq_ghost.h
index 86f3f9e36f..516f0bd929 100644
--- a/src/npair_half_nsq_newtoff_ghost.h
+++ b/src/npair_nsq_ghost.h
@@ -13,22 +13,29 @@
 
 #ifdef NPAIR_CLASS
 // clang-format off
+typedef NPairNsqGhost<0> NPairFullNsqGhost;
+NPairStyle(full/nsq/ghost,
+           NPairFullNsqGhost,
+           NP_FULL | NP_NSQ | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI);
+
+typedef NPairNsqGhost<1> NPairHalfNsqNewtoffGhost;
 NPairStyle(half/nsq/newtoff/ghost,
            NPairHalfNsqNewtoffGhost,
            NP_HALF | NP_NSQ | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI);
 // clang-format on
 #else
 
-#ifndef LMP_NPAIR_HALF_NSQ_NEWTOFF_GHOST_H
-#define LMP_NPAIR_HALF_NSQ_NEWTOFF_GHOST_H
+#ifndef LMP_NPAIR_NSQ_GHOST_H
+#define LMP_NPAIR_NSQ_GHOST_H
 
 #include "npair.h"
 
 namespace LAMMPS_NS {
 
-class NPairHalfNsqNewtoffGhost : public NPair {
+template<int HALF>
+class NPairNsqGhost : public NPair {
  public:
-  NPairHalfNsqNewtoffGhost(class LAMMPS *);
+  NPairNsqGhost(class LAMMPS *);
   void build(class NeighList *) override;
 };
 
diff --git a/src/npair_respa_bin.cpp b/src/npair_respa_bin.cpp
new file mode 100644
index 0000000000..f2fb9f7486
--- /dev/null
+++ b/src/npair_respa_bin.cpp
@@ -0,0 +1,261 @@
+// clang-format off
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "npair_respa_bin.h"
+
+#include "atom.h"
+#include "atom_vec.h"
+#include "domain.h"
+#include "error.h"
+#include "force.h"
+#include "molecule.h"
+#include "my_page.h"
+#include "neigh_list.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+template<int NEWTON, int TRI>
+NPairRespaBin<NEWTON, TRI>::NPairRespaBin(LAMMPS *lmp) : NPair(lmp) {}
+
+/* ----------------------------------------------------------------------
+   multiple respa lists
+   Newtoff
+     binned neighbor list construction with partial Newton's 3rd law
+     each owned atom i checks own bin and surrounding bins in non-Newton stencil
+     pair stored once if i,j are both owned and i < j
+     pair stored by me if j is ghost (also stored by proc owning j)
+   Newton
+     binned neighbor list construction with full Newton's 3rd law
+     each owned atom i checks its own bin and other bins in Newton stencil
+     every pair stored exactly once by some processor
+------------------------------------------------------------------------- */
+
+template<int NEWTON, int TRI>
+void NPairRespaBin<NEWTON, TRI>::build(NeighList *list)
+{
+  int i, j, k, n, itype, jtype, ibin, bin_start, n_inner, n_middle, imol, iatom, moltemplate;
+  tagint itag, jtag, tagprev;
+  double xtmp, ytmp, ztmp, delx, dely, delz, rsq;
+  int *neighptr, *neighptr_inner, *neighptr_middle;
+
+  const double delta = 0.01 * force->angstrom;
+
+  double **x = atom->x;
+  int *type = atom->type;
+  int *mask = atom->mask;
+  tagint *tag = atom->tag;
+  tagint *molecule = atom->molecule;
+  tagint **special = atom->special;
+  int **nspecial = atom->nspecial;
+  int nlocal = atom->nlocal;
+  if (includegroup) nlocal = atom->nfirst;
+
+  int *molindex = atom->molindex;
+  int *molatom = atom->molatom;
+  Molecule **onemols = atom->avec->onemols;
+  if (molecular == Atom::TEMPLATE)
+    moltemplate = 1;
+  else
+    moltemplate = 0;
+
+  int *ilist = list->ilist;
+  int *numneigh = list->numneigh;
+  int **firstneigh = list->firstneigh;
+  MyPage<int> *ipage = list->ipage;
+
+  int *ilist_inner = list->ilist_inner;
+  int *numneigh_inner = list->numneigh_inner;
+  int **firstneigh_inner = list->firstneigh_inner;
+  MyPage<int> *ipage_inner = list->ipage_inner;
+
+  int *ilist_middle, *numneigh_middle, **firstneigh_middle;
+  MyPage<int> *ipage_middle;
+  int respamiddle = list->respamiddle;
+  if (respamiddle) {
+    ilist_middle = list->ilist_middle;
+    numneigh_middle = list->numneigh_middle;
+    firstneigh_middle = list->firstneigh_middle;
+    ipage_middle = list->ipage_middle;
+  }
+
+  int inum = 0;
+  int which = 0;
+  int minchange = 0;
+  ipage->reset();
+  ipage_inner->reset();
+  if (respamiddle) ipage_middle->reset();
+
+  for (i = 0; i < nlocal; i++) {
+    n = n_inner = 0;
+    neighptr = ipage->vget();
+    neighptr_inner = ipage_inner->vget();
+    if (respamiddle) {
+      n_middle = 0;
+      neighptr_middle = ipage_middle->vget();
+    }
+
+    itag = tag[i];
+    itype = type[i];
+    xtmp = x[i][0];
+    ytmp = x[i][1];
+    ztmp = x[i][2];
+    if (moltemplate) {
+      imol = molindex[i];
+      iatom = molatom[i];
+      tagprev = tag[i] - iatom - 1;
+    }
+
+    ibin = atom2bin[i];
+
+    for (k = 0; k < nstencil; k++) {
+      bin_start = binhead[ibin+stencil[k]];
+      if (NEWTON && (!TRI)) {
+        if (k == 0) {
+          // Half neighbor list, newton on, orthonormal
+          // loop over rest of atoms in i's bin, ghosts are at end of linked list
+          bin_start = bins[i];
+        }
+      }
+
+      for (j = bin_start; j >= 0; j = bins[j]) {
+        if (!NEWTON) {
+          // Half neighbor list, newton off
+          // only store pair if i < j
+          // stores own/own pairs only once
+          // stores own/ghost pairs on both procs
+          if (j <= i) continue;
+        } else if (TRI) {
+          // Half neighbor list, newton on, triclinic
+          // for triclinic, bin stencil is full in all 3 dims
+          // must use itag/jtag to eliminate half the I/J interactions
+          // cannot use I/J exact coord comparision
+          //   b/c transforming orthog -> lambda -> orthog for ghost atoms
+          //   with an added PBC offset can shift all 3 coords by epsilon
+          if (j <= i) continue;
+          if (j >= nlocal) {
+            jtag = tag[j];
+            if (itag > jtag) {
+              if ((itag + jtag) % 2 == 0) continue;
+            } else if (itag < jtag) {
+              if ((itag + jtag) % 2 == 1) continue;
+            } else {
+              if (fabs(x[j][2] - ztmp) > delta) {
+                if (x[j][2] < ztmp) continue;
+              } else if (fabs(x[j][1] - ytmp) > delta) {
+                if (x[j][1] < ytmp) continue;
+              } else {
+                if (x[j][0] < xtmp) continue;
+              }
+            }
+          }
+        } else {
+          // Half neighbor list, newton on, orthonormal
+          // store every pair for every bin in stencil,except for i's bin
+
+          if (k == 0) {
+            // if j is owned atom, store it, since j is beyond i in linked list
+            // if j is ghost, only store if j coords are "above and to the "right" of i
+            if (j >= nlocal) {
+              if (x[j][2] < ztmp) continue;
+              if (x[j][2] == ztmp) {
+                if (x[j][1] < ytmp) continue;
+                if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
+              }
+            }
+          }
+        }
+
+        jtype = type[j];
+        if (exclude && exclusion(i, j, itype, jtype, mask, molecule)) continue;
+
+        delx = xtmp - x[j][0];
+        dely = ytmp - x[j][1];
+        delz = ztmp - x[j][2];
+        rsq = delx * delx + dely * dely + delz * delz;
+
+        if (rsq <= cutneighsq[itype][jtype]) {
+          if (molecular != Atom::ATOMIC) {
+            if (!moltemplate)
+              which = find_special(special[i], nspecial[i], tag[j]);
+            else if (imol >= 0)
+              which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom],
+                                   tag[j] - tagprev);
+            else
+              which = 0;
+            if (which == 0)
+              neighptr[n++] = j;
+            else if ((minchange = domain->minimum_image_check(delx, dely, delz)))
+              neighptr[n++] = j;
+            else if (which > 0)
+              neighptr[n++] = j ^ (which << SBBITS);
+          } else
+            neighptr[n++] = j;
+
+          if (rsq < cut_inner_sq) {
+            if (which == 0)
+              neighptr_inner[n_inner++] = j;
+            else if (minchange)
+              neighptr_inner[n_inner++] = j;
+            else if (which > 0)
+              neighptr_inner[n_inner++] = j ^ (which << SBBITS);
+          }
+
+          if (respamiddle &&
+              rsq < cut_middle_sq && rsq > cut_middle_inside_sq) {
+            if (which == 0)
+              neighptr_middle[n_middle++] = j;
+            else if (minchange)
+              neighptr_middle[n_middle++] = j;
+            else if (which > 0)
+              neighptr_middle[n_middle++] = j ^ (which << SBBITS);
+          }
+        }
+      }
+    }
+
+    ilist[inum] = i;
+    firstneigh[i] = neighptr;
+    numneigh[i] = n;
+    ipage->vgot(n);
+    if (ipage->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
+
+    ilist_inner[inum] = i;
+    firstneigh_inner[i] = neighptr_inner;
+    numneigh_inner[i] = n_inner;
+    ipage_inner->vgot(n_inner);
+    if (ipage_inner->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
+
+    if (respamiddle) {
+      ilist_middle[inum] = i;
+      firstneigh_middle[i] = neighptr_middle;
+      numneigh_middle[i] = n_middle;
+      ipage_middle->vgot(n_middle);
+      if (ipage_middle->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
+    }
+
+    inum++;
+  }
+
+  list->inum = inum;
+  list->inum_inner = inum;
+  if (respamiddle) list->inum_middle = inum;
+}
+
+namespace LAMMPS_NS {
+template class NPairRespaBin<0,0>;
+template class NPairRespaBin<1,0>;
+template class NPairRespaBin<1,1>;
+}
diff --git a/src/npair_half_respa_bin_newtoff.h b/src/npair_respa_bin.h
similarity index 62%
rename from src/npair_half_respa_bin_newtoff.h
rename to src/npair_respa_bin.h
index a6d8b10de0..d721275eb4 100644
--- a/src/npair_half_respa_bin_newtoff.h
+++ b/src/npair_respa_bin.h
@@ -13,22 +13,34 @@
 
 #ifdef NPAIR_CLASS
 // clang-format off
+typedef NPairRespaBin<0, 0> NPairHalfRespaBinNewtoff;
 NPairStyle(half/respa/bin/newtoff,
            NPairHalfRespaBinNewtoff,
            NP_HALF | NP_RESPA | NP_BIN | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairRespaBin<1, 0> NPairHalfRespaBinNewton;
+NPairStyle(half/respa/bin/newton,
+           NPairHalfRespaBinNewton,
+           NP_HALF | NP_RESPA | NP_BIN | NP_NEWTON | NP_ORTHO);
+
+typedef NPairRespaBin<1, 1> NPairHalfRespaBinNewtonTri;
+NPairStyle(half/respa/bin/newton/tri,
+           NPairHalfRespaBinNewtonTri,
+           NP_HALF | NP_RESPA | NP_BIN | NP_NEWTON | NP_TRI);
 // clang-format on
 #else
 
-#ifndef LMP_NPAIR_HALF_RESPA_BIN_NEWTOFF_H
-#define LMP_NPAIR_HALF_RESPA_BIN_NEWTOFF_H
+#ifndef LMP_NPAIR_RESPA_BIN_H
+#define LMP_NPAIR_RESPA_BIN_H
 
 #include "npair.h"
 
 namespace LAMMPS_NS {
 
-class NPairHalfRespaBinNewtoff : public NPair {
+template<int NEWTON, int TRI>
+class NPairRespaBin : public NPair {
  public:
-  NPairHalfRespaBinNewtoff(class LAMMPS *);
+  NPairRespaBin(class LAMMPS *);
   void build(class NeighList *) override;
 };
 
diff --git a/src/npair_half_respa_nsq_newton.cpp b/src/npair_respa_nsq.cpp
similarity index 56%
rename from src/npair_half_respa_nsq_newton.cpp
rename to src/npair_respa_nsq.cpp
index ae56d62fb5..9ca166a491 100644
--- a/src/npair_half_respa_nsq_newton.cpp
+++ b/src/npair_respa_nsq.cpp
@@ -12,7 +12,7 @@
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
-#include "npair_half_respa_nsq_newton.h"
+#include "npair_respa_nsq.h"
 
 #include "atom.h"
 #include "atom_vec.h"
@@ -28,26 +28,39 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-NPairHalfRespaNsqNewton::NPairHalfRespaNsqNewton(LAMMPS *lmp) : NPair(lmp) {}
+template<int NEWTON, int TRI>
+NPairRespaNsq<NEWTON, TRI>::NPairRespaNsq(LAMMPS *lmp) : NPair(lmp) {}
 
 /* ----------------------------------------------------------------------
    multiple respa lists
-   N^2 / 2 search for neighbor pairs with full Newton's 3rd law
-   pair added to list if atoms i and j are both owned and i < j
-   if j is ghost only me or other proc adds pair
-   decision based on itag,jtag tests
+   Newtoff
+     N^2 / 2 search for neighbor pairs with partial Newton's 3rd law
+     pair added to list if atoms i and j are both owned and i < j
+     pair added if j is ghost (also stored by proc owning j)
+  Newton
+     N^2 / 2 search for neighbor pairs with full Newton's 3rd law
+     pair added to list if atoms i and j are both owned and i < j
+     if j is ghost only me or other proc adds pair
+     decision based on itag,jtag tests
+     use itag/jtag comparision to eliminate half the interactions
+     itag = jtag is possible for long cutoffs that include images of self
+  Newton + Triclinic:
+     for triclinic, must use delta to eliminate half the I/J interactions
+     cannot use I/J exact coord comparision as for orthog
+     b/c transforming orthog -> lambda -> orthog for ghost atoms
+     with an added PBC offset can shift all 3 coords by epsilon
+
 ------------------------------------------------------------------------- */
 
-void NPairHalfRespaNsqNewton::build(NeighList *list)
+template<int NEWTON, int TRI>
+void NPairRespaNsq<NEWTON, TRI>::build(NeighList *list)
 {
-  int i,j,n,itype,jtype,n_inner,n_middle,bitmask;
-  int imol,iatom,moltemplate;
-  tagint itag,jtag,tagprev;
-  double xtmp,ytmp,ztmp,delx,dely,delz,rsq;
-  int *neighptr,*neighptr_inner,*neighptr_middle;
+  int i, j, n, itype, jtype, n_inner, n_middle, bitmask, imol, iatom, moltemplate;
+  tagint itag, jtag, tagprev;
+  double xtmp, ytmp, ztmp, delx, dely, delz, rsq;
+  int *neighptr, *neighptr_inner, *neighptr_middle;
 
   const double delta = 0.01 * force->angstrom;
-  const int triclinic = domain->triclinic;
 
   double **x = atom->x;
   int *type = atom->type;
@@ -79,7 +92,7 @@ void NPairHalfRespaNsqNewton::build(NeighList *list)
   int **firstneigh_inner = list->firstneigh_inner;
   MyPage<int> *ipage_inner = list->ipage_inner;
 
-  int *ilist_middle,*numneigh_middle,**firstneigh_middle;
+  int *ilist_middle, *numneigh_middle, **firstneigh_middle;
   MyPage<int> *ipage_middle;
   int respamiddle = list->respamiddle;
   if (respamiddle) {
@@ -105,8 +118,8 @@ void NPairHalfRespaNsqNewton::build(NeighList *list)
       neighptr_middle = ipage_middle->vget();
     }
 
-    itag = tag[i];
     itype = type[i];
+    itag = tag[i];
     xtmp = x[i][0];
     ytmp = x[i][1];
     ztmp = x[i][2];
@@ -124,65 +137,74 @@ void NPairHalfRespaNsqNewton::build(NeighList *list)
     //   b/c transforming orthog -> lambda -> orthog for ghost atoms
     //   with an added PBC offset can shift all 3 coords by epsilon
 
-    for (j = i+1; j < nall; j++) {
+    for (j = i + 1; j < nall; j++) {
       if (includegroup && !(mask[j] & bitmask)) continue;
 
-      if (j >= nlocal) {
-        jtag = tag[j];
-        if (itag > jtag) {
-          if ((itag+jtag) % 2 == 0) continue;
-        } else if (itag < jtag) {
-          if ((itag+jtag) % 2 == 1) continue;
-        } else if (triclinic) {
-          if (fabs(x[j][2]-ztmp) > delta) {
-            if (x[j][2] < ztmp) continue;
-          } else if (fabs(x[j][1]-ytmp) > delta) {
-            if (x[j][1] < ytmp) continue;
+      if (NEWTON) {
+        if (j >= nlocal) {
+          jtag = tag[j];
+          if (itag > jtag) {
+            if ((itag + jtag) % 2 == 0) continue;
+          } else if (itag < jtag) {
+            if ((itag + jtag) % 2 == 1) continue;
+          } else if (TRI) {
+            if (fabs(x[j][2] - ztmp) > delta) {
+              if (x[j][2] < ztmp) continue;
+            } else if (fabs(x[j][1] - ytmp) > delta) {
+              if (x[j][1] < ytmp) continue;
+            } else {
+              if (x[j][0] < xtmp) continue;
+            }
           } else {
-            if (x[j][0] < xtmp) continue;
-          }
-        } else {
-          if (x[j][2] < ztmp) continue;
-          if (x[j][2] == ztmp) {
-            if (x[j][1] < ytmp) continue;
-            if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
+            if (x[j][2] < ztmp) continue;
+            if (x[j][2] == ztmp) {
+              if (x[j][1] < ytmp) continue;
+              if (x[j][1] == ytmp && x[j][0] < xtmp) continue;
+            }
           }
         }
       }
 
       jtype = type[j];
-      if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
+      if (exclude && exclusion(i, j, itype, jtype, mask, molecule)) continue;
 
       delx = xtmp - x[j][0];
       dely = ytmp - x[j][1];
       delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+      rsq = delx * delx + dely * dely + delz * delz;
 
       if (rsq <= cutneighsq[itype][jtype]) {
         if (molecular != Atom::ATOMIC) {
           if (!moltemplate)
-            which = find_special(special[i],nspecial[i],tag[j]);
+            which = find_special(special[i], nspecial[i], tag[j]);
           else if (imol >= 0)
-            which = find_special(onemols[imol]->special[iatom],
-                                 onemols[imol]->nspecial[iatom],
-                                 tag[j]-tagprev);
-          else which = 0;
-          if (which == 0) neighptr[n++] = j;
-          else if ((minchange = domain->minimum_image_check(delx,dely,delz)))
+            which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom],
+                                 tag[j] - tagprev);
+          else
+            which = 0;
+          if (which == 0)
             neighptr[n++] = j;
-          else if (which > 0) neighptr[n++] = j ^ (which << SBBITS);
-        } else neighptr[n++] = j;
+          else if ((minchange = domain->minimum_image_check(delx, dely, delz)))
+            neighptr[n++] = j;
+          else if (which > 0)
+            neighptr[n++] = j ^ (which << SBBITS);
+        } else
+          neighptr[n++] = j;
 
         if (rsq < cut_inner_sq) {
-          if (which == 0) neighptr_inner[n_inner++] = j;
-          else if (minchange) neighptr_inner[n_inner++] = j;
-          else if (which > 0) neighptr_inner[n_inner++] = j ^ (which << SBBITS);
+          if (which == 0)
+            neighptr_inner[n_inner++] = j;
+          else if (minchange)
+            neighptr_inner[n_inner++] = j;
+          else if (which > 0)
+            neighptr_inner[n_inner++] = j ^ (which << SBBITS);
         }
 
-        if (respamiddle &&
-            rsq < cut_middle_sq && rsq > cut_middle_inside_sq) {
-          if (which == 0) neighptr_middle[n_middle++] = j;
-          else if (minchange) neighptr_middle[n_middle++] = j;
+        if (respamiddle && (rsq < cut_middle_sq) && (rsq > cut_middle_inside_sq)) {
+          if (which == 0)
+            neighptr_middle[n_middle++] = j;
+          else if (minchange)
+            neighptr_middle[n_middle++] = j;
           else if (which > 0)
             neighptr_middle[n_middle++] = j ^ (which << SBBITS);
         }
@@ -193,23 +215,20 @@ void NPairHalfRespaNsqNewton::build(NeighList *list)
     firstneigh[i] = neighptr;
     numneigh[i] = n;
     ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
+    if (ipage->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
 
     ilist_inner[inum] = i;
     firstneigh_inner[i] = neighptr_inner;
     numneigh_inner[i] = n_inner;
     ipage_inner->vgot(n_inner);
-    if (ipage_inner->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
+    if (ipage_inner->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
 
     if (respamiddle) {
       ilist_middle[inum] = i;
       firstneigh_middle[i] = neighptr_middle;
       numneigh_middle[i] = n_middle;
       ipage_middle->vgot(n_middle);
-      if (ipage_middle->status())
-        error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
+      if (ipage_middle->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
     }
 
     inum++;
@@ -219,3 +238,9 @@ void NPairHalfRespaNsqNewton::build(NeighList *list)
   list->inum_inner = inum;
   if (respamiddle) list->inum_middle = inum;
 }
+
+namespace LAMMPS_NS {
+template class NPairRespaNsq<0,0>;
+template class NPairRespaNsq<1,0>;
+template class NPairRespaNsq<1,1>;
+}
diff --git a/src/npair_half_respa_nsq_newtoff.h b/src/npair_respa_nsq.h
similarity index 62%
rename from src/npair_half_respa_nsq_newtoff.h
rename to src/npair_respa_nsq.h
index e0f3ae8380..83bed2e8f2 100644
--- a/src/npair_half_respa_nsq_newtoff.h
+++ b/src/npair_respa_nsq.h
@@ -13,22 +13,34 @@
 
 #ifdef NPAIR_CLASS
 // clang-format off
+typedef NPairRespaNsq<0,0> NPairHalfRespaNsqNewtoff;
 NPairStyle(half/respa/nsq/newtoff,
            NPairHalfRespaNsqNewtoff,
            NP_HALF | NP_RESPA | NP_NSQ | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairRespaNsq<1,0> NPairHalfRespaNsqNewton;
+NPairStyle(half/respa/nsq/newton,
+           NPairHalfRespaNsqNewton,
+           NP_HALF | NP_RESPA | NP_NSQ | NP_NEWTON | NP_ORTHO);
+
+typedef NPairRespaNsq<1,1> NPairHalfRespaNsqNewtonTri;
+NPairStyle(half/respa/nsq/newton/tri,
+           NPairHalfRespaNsqNewtonTri,
+           NP_HALF | NP_RESPA | NP_NSQ | NP_NEWTON | NP_TRI);
 // clang-format on
 #else
 
-#ifndef LMP_NPAIR_HALF_RESPA_NSQ_NEWTOFF_H
-#define LMP_NPAIR_HALF_RESPA_NSQ_NEWTOFF_H
+#ifndef LMP_NPAIR_RESPA_NSQ_H
+#define LMP_NPAIR_RESPA_NSQ_H
 
 #include "npair.h"
 
 namespace LAMMPS_NS {
 
-class NPairHalfRespaNsqNewtoff : public NPair {
+template<int NEWTON, int TRI>
+class NPairRespaNsq : public NPair {
  public:
-  NPairHalfRespaNsqNewtoff(class LAMMPS *);
+  NPairRespaNsq(class LAMMPS *);
   void build(class NeighList *) override;
 };
 
diff --git a/src/npair_skip.cpp b/src/npair_skip.cpp
index d9d4fa491f..6afb43bc16 100644
--- a/src/npair_skip.cpp
+++ b/src/npair_skip.cpp
@@ -22,7 +22,8 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-NPairSkip::NPairSkip(LAMMPS *lmp) : NPair(lmp) {}
+template<int TRIM>
+NPairSkipTemp<TRIM>::NPairSkipTemp(LAMMPS *lmp) : NPair(lmp) {}
 
 /* ----------------------------------------------------------------------
    build skip list for subset of types from parent list
@@ -32,7 +33,8 @@ NPairSkip::NPairSkip(LAMMPS *lmp) : NPair(lmp) {}
    if ghost, also store neighbors of ghost atoms & set inum,gnum correctly
 ------------------------------------------------------------------------- */
 
-void NPairSkip::build(NeighList *list)
+template<int TRIM>
+void NPairSkipTemp<TRIM>::build(NeighList *list)
 {
   int i, j, ii, jj, n, itype, jnum, joriginal;
   int *neighptr, *jlist;
@@ -57,6 +59,11 @@ void NPairSkip::build(NeighList *list)
   int inum = 0;
   ipage->reset();
 
+  double **x = atom->x;
+  double xtmp, ytmp, ztmp;
+  double delx, dely, delz, rsq;
+  double cutsq_custom = cutoff_custom * cutoff_custom;
+
   // loop over atoms in other list
   // skip I atom entirely if iskip is set for type[I]
   // skip I,J pair if ijskip is set for type[I],type[J]
@@ -66,6 +73,12 @@ void NPairSkip::build(NeighList *list)
     itype = type[i];
     if (iskip[itype]) continue;
 
+    if (TRIM) {
+      xtmp = x[i][0];
+      ytmp = x[i][1];
+      ztmp = x[i][2];
+    }
+
     n = 0;
     neighptr = ipage->vget();
 
@@ -78,6 +91,15 @@ void NPairSkip::build(NeighList *list)
       joriginal = jlist[jj];
       j = joriginal & NEIGHMASK;
       if (ijskip[itype][type[j]]) continue;
+
+      if (TRIM) {
+        delx = xtmp - x[j][0];
+        dely = ytmp - x[j][1];
+        delz = ztmp - x[j][2];
+        rsq = delx * delx + dely * dely + delz * delz;
+        if (rsq > cutsq_custom) continue;
+      }
+
       neighptr[n++] = joriginal;
     }
 
@@ -100,3 +122,8 @@ void NPairSkip::build(NeighList *list)
     list->gnum = inum - num;
   }
 }
+
+namespace LAMMPS_NS {
+template class NPairSkipTemp<0>;
+template class NPairSkipTemp<1>;
+}
diff --git a/src/npair_skip.h b/src/npair_skip.h
index 4e85174730..cb0d201555 100644
--- a/src/npair_skip.h
+++ b/src/npair_skip.h
@@ -13,17 +13,46 @@
 
 #ifdef NPAIR_CLASS
 // clang-format off
+typedef NPairSkipTemp<0> NPairSkip;
 NPairStyle(skip,
            NPairSkip,
            NP_SKIP | NP_HALF | NP_FULL |
            NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
            NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
 
+typedef NPairSkipTemp<0> NPairSkip;
 NPairStyle(skip/ghost,
            NPairSkip,
            NP_SKIP | NP_HALF | NP_FULL |
            NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
            NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_GHOST);
+
+typedef NPairSkipTemp<0> NPairSkipSize;
+NPairStyle(skip/half/size,
+           NPairSkipSize,
+           NP_SKIP | NP_SIZE | NP_HALF | NP_FULL | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
+           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairSkipTemp<1> NPairSkipTrim;
+NPairStyle(skip/trim,
+           NPairSkipTrim,
+           NP_SKIP | NP_HALF | NP_FULL |
+           NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
+           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM);
+
+typedef NPairSkipTemp<1> NPairSkipTrim;
+NPairStyle(skip/ghost/trim,
+           NPairSkipTrim,
+           NP_SKIP | NP_HALF | NP_FULL |
+           NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
+           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM);
+
+typedef NPairSkipTemp<1> NPairSkipTrimSize;
+NPairStyle(skip/trim/half/size,
+           NPairSkipTrimSize,
+           NP_SKIP | NP_SIZE | NP_HALF | NP_FULL | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
+           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM);
+
 // clang-format on
 #else
 
@@ -34,9 +63,10 @@ NPairStyle(skip/ghost,
 
 namespace LAMMPS_NS {
 
-class NPairSkip : public NPair {
+template<int TRIM>
+class NPairSkipTemp : public NPair {
  public:
-  NPairSkip(class LAMMPS *);
+  NPairSkipTemp(class LAMMPS *);
   void build(class NeighList *) override;
 };
 
diff --git a/src/npair_skip_respa.cpp b/src/npair_skip_respa.cpp
index 022c0d5f60..4c3dda91eb 100644
--- a/src/npair_skip_respa.cpp
+++ b/src/npair_skip_respa.cpp
@@ -23,7 +23,8 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-NPairSkipRespa::NPairSkipRespa(LAMMPS *lmp) : NPair(lmp) {}
+template<int TRIM>
+NPairSkipRespaTemp<TRIM>::NPairSkipRespaTemp(LAMMPS *lmp) : NPair(lmp) {}
 
 /* ----------------------------------------------------------------------
    build skip list for subset of types from parent list
@@ -31,10 +32,11 @@ NPairSkipRespa::NPairSkipRespa(LAMMPS *lmp) : NPair(lmp) {}
    this is for respa lists, copy the inner/middle values from parent
 ------------------------------------------------------------------------- */
 
-void NPairSkipRespa::build(NeighList *list)
+template<int TRIM>
+void NPairSkipRespaTemp<TRIM>::build(NeighList *list)
 {
-  int i,j,ii,jj,n,itype,jnum,joriginal,n_inner,n_middle;
-  int *neighptr,*jlist,*neighptr_inner,*neighptr_middle;
+  int i, j, ii, jj, n, itype, jnum, joriginal, n_inner, n_middle;
+  int *neighptr, *jlist, *neighptr_inner, *neighptr_middle;
 
   int *type = atom->type;
 
@@ -58,9 +60,9 @@ void NPairSkipRespa::build(NeighList *list)
   int *numneigh_inner_skip = list->listskip->numneigh_inner;
   int **firstneigh_inner_skip = list->listskip->firstneigh_inner;
 
-  int *ilist_middle,*numneigh_middle,**firstneigh_middle;
+  int *ilist_middle, *numneigh_middle, **firstneigh_middle;
   MyPage<int> *ipage_middle;
-  int *numneigh_middle_skip,**firstneigh_middle_skip;
+  int *numneigh_middle_skip, **firstneigh_middle_skip;
   int respamiddle = list->respamiddle;
   if (respamiddle) {
     ilist_middle = list->ilist_middle;
@@ -76,6 +78,11 @@ void NPairSkipRespa::build(NeighList *list)
   ipage_inner->reset();
   if (respamiddle) ipage_middle->reset();
 
+  double **x = atom->x;
+  double xtmp, ytmp, ztmp;
+  double delx, dely, delz, rsq;
+  double cutsq_custom = cutoff_custom * cutoff_custom;
+
   // loop over atoms in other list
   // skip I atom entirely if iskip is set for type[I]
   // skip I,J pair if ijskip is set for type[I],type[J]
@@ -85,6 +92,12 @@ void NPairSkipRespa::build(NeighList *list)
     itype = type[i];
     if (iskip[itype]) continue;
 
+    if (TRIM) {
+      xtmp = x[i][0];
+      ytmp = x[i][1];
+      ztmp = x[i][2];
+    }
+
     n = n_inner = 0;
     neighptr = ipage->vget();
     neighptr_inner = ipage_inner->vget();
@@ -102,6 +115,15 @@ void NPairSkipRespa::build(NeighList *list)
       joriginal = jlist[jj];
       j = joriginal & NEIGHMASK;
       if (ijskip[itype][type[j]]) continue;
+
+      if (TRIM) {
+        delx = xtmp - x[j][0];
+        dely = ytmp - x[j][1];
+        delz = ztmp - x[j][2];
+        rsq = delx * delx + dely * dely + delz * delz;
+        if (rsq > cutsq_custom) continue;
+      }
+
       neighptr[n++] = joriginal;
     }
 
@@ -114,6 +136,15 @@ void NPairSkipRespa::build(NeighList *list)
       joriginal = jlist[jj];
       j = joriginal & NEIGHMASK;
       if (ijskip[itype][type[j]]) continue;
+
+      if (TRIM) {
+        delx = xtmp - x[j][0];
+        dely = ytmp - x[j][1];
+        delz = ztmp - x[j][2];
+        rsq = delx * delx + dely * dely + delz * delz;
+        if (rsq > cutsq_custom) continue;
+      }
+
       neighptr_inner[n_inner++] = joriginal;
     }
 
@@ -127,6 +158,15 @@ void NPairSkipRespa::build(NeighList *list)
         joriginal = jlist[jj];
         j = joriginal & NEIGHMASK;
         if (ijskip[itype][type[j]]) continue;
+
+        if (TRIM) {
+          delx = xtmp - x[j][0];
+          dely = ytmp - x[j][1];
+          delz = ztmp - x[j][2];
+          rsq = delx * delx + dely * dely + delz * delz;
+          if (rsq > cutsq_custom) continue;
+        }
+
         neighptr_middle[n_middle++] = joriginal;
       }
     }
@@ -135,23 +175,20 @@ void NPairSkipRespa::build(NeighList *list)
     firstneigh[i] = neighptr;
     numneigh[i] = n;
     ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
+    if (ipage->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
 
     ilist_inner[inum] = i;
     firstneigh_inner[i] = neighptr_inner;
     numneigh_inner[i] = n_inner;
     ipage_inner->vgot(n);
-    if (ipage_inner->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
+    if (ipage_inner->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
 
     if (respamiddle) {
       ilist_middle[inum] = i;
       firstneigh_middle[i] = neighptr_middle;
       numneigh_middle[i] = n_middle;
       ipage_middle->vgot(n);
-      if (ipage_middle->status())
-        error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
+      if (ipage_middle->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
     }
 
     inum++;
@@ -161,3 +198,8 @@ void NPairSkipRespa::build(NeighList *list)
   list->inum_inner = inum;
   if (respamiddle) list->inum_middle = inum;
 }
+
+namespace LAMMPS_NS {
+template class NPairSkipRespaTemp<0>;
+template class NPairSkipRespaTemp<1>;
+}
diff --git a/src/npair_skip_respa.h b/src/npair_skip_respa.h
index 822fcc290b..af25e84faf 100644
--- a/src/npair_skip_respa.h
+++ b/src/npair_skip_respa.h
@@ -13,11 +13,20 @@
 
 #ifdef NPAIR_CLASS
 // clang-format off
+typedef NPairSkipRespaTemp<0> NPairSkipRespa;
 NPairStyle(skip/half/respa,
            NPairSkipRespa,
            NP_SKIP | NP_RESPA | NP_HALF | NP_FULL |
            NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
            NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairSkipRespaTemp<1> NPairSkipTrimRespa;
+NPairStyle(skip/trim/half/respa,
+           NPairSkipTrimRespa,
+           NP_SKIP | NP_RESPA | NP_HALF | NP_FULL |
+           NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
+           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM);
+
 // clang-format on
 #else
 
@@ -28,9 +37,10 @@ NPairStyle(skip/half/respa,
 
 namespace LAMMPS_NS {
 
-class NPairSkipRespa : public NPair {
+template<int TRIM>
+class NPairSkipRespaTemp : public NPair {
  public:
-  NPairSkipRespa(class LAMMPS *);
+  NPairSkipRespaTemp(class LAMMPS *);
   void build(class NeighList *) override;
 };
 
diff --git a/src/npair_skip_size.cpp b/src/npair_skip_size.cpp
deleted file mode 100644
index 22883b4e60..0000000000
--- a/src/npair_skip_size.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_skip_size.h"
-
-#include "atom.h"
-#include "error.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairSkipSize::NPairSkipSize(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   build skip list for subset of types from parent list
-   iskip and ijskip flag which atom types and type pairs to skip
-------------------------------------------------------------------------- */
-
-void NPairSkipSize::build(NeighList *list)
-{
-  int i, j, ii, jj, n, itype, jnum, joriginal;
-  int *neighptr, *jlist;
-
-  int *type = atom->type;
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int *ilist_skip = list->listskip->ilist;
-  int *numneigh_skip = list->listskip->numneigh;
-  int **firstneigh_skip = list->listskip->firstneigh;
-  int inum_skip = list->listskip->inum;
-
-  int *iskip = list->iskip;
-  int **ijskip = list->ijskip;
-
-  int inum = 0;
-  ipage->reset();
-
-  // loop over atoms in other list
-  // skip I atom entirely if iskip is set for type[I]
-  // skip I,J pair if ijskip is set for type[I],type[J]
-
-  for (ii = 0; ii < inum_skip; ii++) {
-    i = ilist_skip[ii];
-    itype = type[i];
-    if (iskip[itype]) continue;
-
-    n = 0;
-    neighptr = ipage->vget();
-
-    // loop over parent non-skip size list
-
-    jlist = firstneigh_skip[i];
-    jnum = numneigh_skip[i];
-
-    for (jj = 0; jj < jnum; jj++) {
-      joriginal = jlist[jj];
-      j = joriginal & NEIGHMASK;
-      if (ijskip[itype][type[j]]) continue;
-      neighptr[n++] = joriginal;
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = inum;
-}
diff --git a/src/npair_skip_size.h b/src/npair_skip_size.h
deleted file mode 100644
index 9c2f23447b..0000000000
--- a/src/npair_skip_size.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(skip/half/size,
-           NPairSkipSize,
-           NP_SKIP | NP_SIZE | NP_HALF | NP_FULL | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
-           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_SKIP_SIZE_H
-#define LMP_NPAIR_SKIP_SIZE_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairSkipSize : public NPair {
- public:
-  NPairSkipSize(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_skip_size_off2on.cpp b/src/npair_skip_size_off2on.cpp
index f1b6d2f4fb..89e633b238 100644
--- a/src/npair_skip_size_off2on.cpp
+++ b/src/npair_skip_size_off2on.cpp
@@ -22,7 +22,8 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-NPairSkipSizeOff2on::NPairSkipSizeOff2on(LAMMPS *lmp) : NPair(lmp) {}
+template<int TRIM>
+NPairSkipSizeOff2onTemp<TRIM>::NPairSkipSizeOff2onTemp(LAMMPS *lmp) : NPair(lmp) {}
 
 /* ----------------------------------------------------------------------
    build skip list for subset of types from parent list
@@ -30,7 +31,8 @@ NPairSkipSizeOff2on::NPairSkipSizeOff2on(LAMMPS *lmp) : NPair(lmp) {}
    parent non-skip list used newton off, this skip list is newton on
 ------------------------------------------------------------------------- */
 
-void NPairSkipSizeOff2on::build(NeighList *list)
+template<int TRIM>
+void NPairSkipSizeOff2onTemp<TRIM>::build(NeighList *list)
 {
   int i, j, ii, jj, n, itype, jnum, joriginal;
   tagint itag, jtag;
@@ -56,6 +58,11 @@ void NPairSkipSizeOff2on::build(NeighList *list)
   int inum = 0;
   ipage->reset();
 
+  double **x = atom->x;
+  double xtmp, ytmp, ztmp;
+  double delx, dely, delz, rsq;
+  double cutsq_custom = cutoff_custom * cutoff_custom;
+
   // loop over atoms in other list
   // skip I atom entirely if iskip is set for type[I]
   // skip I,J pair if ijskip is set for type[I],type[J]
@@ -66,6 +73,12 @@ void NPairSkipSizeOff2on::build(NeighList *list)
     if (iskip[itype]) continue;
     itag = tag[i];
 
+    if (TRIM) {
+      xtmp = x[i][0];
+      ytmp = x[i][1];
+      ztmp = x[i][2];
+    }
+
     n = 0;
     neighptr = ipage->vget();
 
@@ -84,6 +97,14 @@ void NPairSkipSizeOff2on::build(NeighList *list)
       jtag = tag[j];
       if (j >= nlocal && jtag < itag) continue;
 
+      if (TRIM) {
+        delx = xtmp - x[j][0];
+        dely = ytmp - x[j][1];
+        delz = ztmp - x[j][2];
+        rsq = delx * delx + dely * dely + delz * delz;
+        if (rsq > cutsq_custom) continue;
+      }
+
       neighptr[n++] = joriginal;
     }
 
@@ -95,3 +116,8 @@ void NPairSkipSizeOff2on::build(NeighList *list)
   }
   list->inum = inum;
 }
+
+namespace LAMMPS_NS {
+template class NPairSkipSizeOff2onTemp<0>;
+template class NPairSkipSizeOff2onTemp<1>;
+}
diff --git a/src/npair_skip_size_off2on.h b/src/npair_skip_size_off2on.h
index faed76f6c3..b86100ae20 100644
--- a/src/npair_skip_size_off2on.h
+++ b/src/npair_skip_size_off2on.h
@@ -13,11 +13,19 @@
 
 #ifdef NPAIR_CLASS
 // clang-format off
+typedef NPairSkipSizeOff2onTemp<0> NPairSkipSizeOff2on;
 NPairStyle(skip/size/off2on,
            NPairSkipSizeOff2on,
            NP_SKIP | NP_SIZE | NP_OFF2ON | NP_HALF |
            NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
            NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI);
+
+typedef NPairSkipSizeOff2onTemp<1> NPairSkipTrimSizeOff2on;
+NPairStyle(skip/trim/size/off2on,
+           NPairSkipTrimSizeOff2on,
+           NP_SKIP | NP_SIZE | NP_OFF2ON | NP_HALF |
+           NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
+           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM);
 // clang-format on
 #else
 
@@ -28,9 +36,10 @@ NPairStyle(skip/size/off2on,
 
 namespace LAMMPS_NS {
 
-class NPairSkipSizeOff2on : public NPair {
+template<int TRIM>
+class NPairSkipSizeOff2onTemp : public NPair {
  public:
-  NPairSkipSizeOff2on(class LAMMPS *);
+  NPairSkipSizeOff2onTemp(class LAMMPS *);
   void build(class NeighList *) override;
 };
 
diff --git a/src/npair_skip_size_off2on_oneside.cpp b/src/npair_skip_size_off2on_oneside.cpp
index 3300ef1526..7682b90d95 100644
--- a/src/npair_skip_size_off2on_oneside.cpp
+++ b/src/npair_skip_size_off2on_oneside.cpp
@@ -24,7 +24,8 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-NPairSkipSizeOff2onOneside::NPairSkipSizeOff2onOneside(LAMMPS *lmp) :
+template<int TRIM>
+NPairSkipSizeOff2onOnesideTemp<TRIM>::NPairSkipSizeOff2onOnesideTemp(LAMMPS *lmp) :
   NPair(lmp) {}
 
 /* ----------------------------------------------------------------------
@@ -34,10 +35,11 @@ NPairSkipSizeOff2onOneside::NPairSkipSizeOff2onOneside(LAMMPS *lmp) :
      this skip list is newton on and onesided
 ------------------------------------------------------------------------- */
 
-void NPairSkipSizeOff2onOneside::build(NeighList *list)
+template<int TRIM>
+void NPairSkipSizeOff2onOnesideTemp<TRIM>::build(NeighList *list)
 {
-  int i,j,ii,jj,itype,jnum,joriginal,flip,tmp;
-  int *surf,*jlist;
+  int i, j, ii, jj, itype, jnum, joriginal, flip, tmp;
+  int *surf, *jlist;
 
   int *type = atom->type;
   int nlocal = atom->nlocal;
@@ -61,6 +63,11 @@ void NPairSkipSizeOff2onOneside::build(NeighList *list)
   int inum = 0;
   ipage->reset();
 
+  double **x = atom->x;
+  double xtmp, ytmp, ztmp;
+  double delx, dely, delz, rsq;
+  double cutsq_custom = cutoff_custom * cutoff_custom;
+
   // two loops over parent list required, one to count, one to store
   // because onesided constraint means pair I,J may be stored with I or J
   // so don't know in advance how much space to alloc for each atom's neighs
@@ -76,6 +83,12 @@ void NPairSkipSizeOff2onOneside::build(NeighList *list)
     itype = type[i];
     if (iskip[itype]) continue;
 
+    if (TRIM) {
+      xtmp = x[i][0];
+      ytmp = x[i][1];
+      ztmp = x[i][2];
+    }
+
     // loop over parent non-skip size list
 
     jlist = firstneigh_skip[i];
@@ -86,6 +99,14 @@ void NPairSkipSizeOff2onOneside::build(NeighList *list)
       j = joriginal & NEIGHMASK;
       if (ijskip[itype][type[j]]) continue;
 
+      if (TRIM) {
+        delx = xtmp - x[j][0];
+        dely = ytmp - x[j][1];
+        delz = ztmp - x[j][2];
+        rsq = delx * delx + dely * dely + delz * delz;
+        if (rsq > cutsq_custom) continue;
+      }
+
       // flip I,J if necessary to satisfy onesided constraint
       // do not keep if I is now ghost
 
@@ -107,8 +128,7 @@ void NPairSkipSizeOff2onOneside::build(NeighList *list)
   for (i = 0; i < nlocal; i++) {
     if (numneigh[i] == 0) continue;
     firstneigh[i] = ipage->get(numneigh[i]);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
+    if (ipage->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
   }
 
   // second loop over atoms in other list to store neighbors
@@ -122,6 +142,12 @@ void NPairSkipSizeOff2onOneside::build(NeighList *list)
     itype = type[i];
     if (iskip[itype]) continue;
 
+    if (TRIM) {
+      xtmp = x[i][0];
+      ytmp = x[i][1];
+      ztmp = x[i][2];
+    }
+
     // loop over parent non-skip size list and optionally its history info
 
     jlist = firstneigh_skip[i];
@@ -132,6 +158,14 @@ void NPairSkipSizeOff2onOneside::build(NeighList *list)
       j = joriginal & NEIGHMASK;
       if (ijskip[itype][type[j]]) continue;
 
+      if (TRIM) {
+        delx = xtmp - x[j][0];
+        dely = ytmp - x[j][1];
+        delz = ztmp - x[j][2];
+        rsq = delx * delx + dely * dely + delz * delz;
+        if (rsq > cutsq_custom) continue;
+      }
+
       // flip I,J if necessary to satisfy onesided constraint
       // do not keep if I is now ghost
 
@@ -158,3 +192,8 @@ void NPairSkipSizeOff2onOneside::build(NeighList *list)
 
   list->inum = inum;
 }
+
+namespace LAMMPS_NS {
+template class NPairSkipSizeOff2onOnesideTemp<0>;
+template class NPairSkipSizeOff2onOnesideTemp<1>;
+}
diff --git a/src/npair_skip_size_off2on_oneside.h b/src/npair_skip_size_off2on_oneside.h
index 48eccf7faf..a5259ef04b 100644
--- a/src/npair_skip_size_off2on_oneside.h
+++ b/src/npair_skip_size_off2on_oneside.h
@@ -13,11 +13,19 @@
 
 #ifdef NPAIR_CLASS
 // clang-format off
+typedef NPairSkipSizeOff2onOnesideTemp<0> NPairSkipSizeOff2onOneside;
 NPairStyle(skip/size/off2on/oneside,
            NPairSkipSizeOff2onOneside,
            NP_SKIP | NP_SIZE | NP_OFF2ON | NP_ONESIDE | NP_HALF |
            NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_NEWTON | NP_NEWTOFF |
            NP_ORTHO | NP_TRI);
+
+typedef NPairSkipSizeOff2onOnesideTemp<1> NPairSkipTrimSizeOff2onOneside;
+NPairStyle(skip/trim/size/off2on/oneside,
+           NPairSkipTrimSizeOff2onOneside,
+           NP_SKIP | NP_SIZE | NP_OFF2ON | NP_ONESIDE | NP_HALF |
+           NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_NEWTON | NP_NEWTOFF |
+           NP_ORTHO | NP_TRI | NP_TRIM);
 // clang-format on
 #else
 
@@ -28,9 +36,10 @@ NPairStyle(skip/size/off2on/oneside,
 
 namespace LAMMPS_NS {
 
-class NPairSkipSizeOff2onOneside : public NPair {
+template<int TRIM>
+class NPairSkipSizeOff2onOnesideTemp : public NPair {
  public:
-  NPairSkipSizeOff2onOneside(class LAMMPS *);
+  NPairSkipSizeOff2onOnesideTemp(class LAMMPS *);
   void build(class NeighList *) override;
 };
 
diff --git a/src/npair_skip_trim.cpp b/src/npair_skip_trim.cpp
deleted file mode 100644
index a286a7e19e..0000000000
--- a/src/npair_skip_trim.cpp
+++ /dev/null
@@ -1,118 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_skip_trim.h"
-
-#include "atom.h"
-#include "error.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairSkipTrim::NPairSkipTrim(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   build skip list for subset of types from parent list
-   works for half and full lists
-   works for owned (non-ghost) list, also for ghost list
-   iskip and ijskip flag which atom types and type pairs to skip
-   if ghost, also store neighbors of ghost atoms & set inum,gnum correctly
-------------------------------------------------------------------------- */
-
-void NPairSkipTrim::build(NeighList *list)
-{
-  int i, j, ii, jj, n, itype, jnum, joriginal;
-  int *neighptr, *jlist;
-
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int *ilist_skip = list->listskip->ilist;
-  int *numneigh_skip = list->listskip->numneigh;
-  int **firstneigh_skip = list->listskip->firstneigh;
-  int num_skip = list->listskip->inum;
-  if (list->ghost) num_skip += list->listskip->gnum;
-
-  int *iskip = list->iskip;
-  int **ijskip = list->ijskip;
-
-  int inum = 0;
-  ipage->reset();
-
-  double **x = atom->x;
-  double xtmp, ytmp, ztmp;
-  double delx, dely, delz, rsq;
-  double cutsq_custom = cutoff_custom * cutoff_custom;
-
-  // loop over atoms in other list
-  // skip I atom entirely if iskip is set for type[I]
-  // skip I,J pair if ijskip is set for type[I],type[J]
-
-  for (ii = 0; ii < num_skip; ii++) {
-    i = ilist_skip[ii];
-    itype = type[i];
-    if (iskip[itype]) continue;
-
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-
-    n = 0;
-    neighptr = ipage->vget();
-
-    // loop over parent non-skip list
-
-    jlist = firstneigh_skip[i];
-    jnum = numneigh_skip[i];
-
-    for (jj = 0; jj < jnum; jj++) {
-      joriginal = jlist[jj];
-      j = joriginal & NEIGHMASK;
-      if (ijskip[itype][type[j]]) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx * delx + dely * dely + delz * delz;
-      if (rsq > cutsq_custom) continue;
-
-      neighptr[n++] = joriginal;
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = inum;
-  if (list->ghost) {
-    int num = 0;
-    for (i = 0; i < inum; i++)
-      if (ilist[i] < nlocal)
-        num++;
-      else
-        break;
-    list->inum = num;
-    list->gnum = inum - num;
-  }
-}
diff --git a/src/npair_skip_trim.h b/src/npair_skip_trim.h
deleted file mode 100644
index f2a26d654e..0000000000
--- a/src/npair_skip_trim.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(skip/trim,
-           NPairSkipTrim,
-           NP_SKIP | NP_HALF | NP_FULL |
-           NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
-           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM);
-
-NPairStyle(skip/ghost/trim,
-           NPairSkipTrim,
-           NP_SKIP | NP_HALF | NP_FULL |
-           NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
-           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_SKIP_TRIM_H
-#define LMP_NPAIR_SKIP_TRIM_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairSkipTrim : public NPair {
- public:
-  NPairSkipTrim(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_skip_trim_respa.cpp b/src/npair_skip_trim_respa.cpp
deleted file mode 100644
index 7dd040ca0a..0000000000
--- a/src/npair_skip_trim_respa.cpp
+++ /dev/null
@@ -1,193 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_skip_trim_respa.h"
-
-#include "atom.h"
-#include "error.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairSkipTrimRespa::NPairSkipTrimRespa(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   build skip list for subset of types from parent list
-   iskip and ijskip flag which atom types and type pairs to skip
-   this is for respa lists, copy the inner/middle values from parent
-------------------------------------------------------------------------- */
-
-void NPairSkipTrimRespa::build(NeighList *list)
-{
-  int i,j,ii,jj,n,itype,jnum,joriginal,n_inner,n_middle;
-  int *neighptr,*jlist,*neighptr_inner,*neighptr_middle;
-
-  int *type = atom->type;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int *ilist_skip = list->listskip->ilist;
-  int *numneigh_skip = list->listskip->numneigh;
-  int **firstneigh_skip = list->listskip->firstneigh;
-  int inum_skip = list->listskip->inum;
-
-  int *iskip = list->iskip;
-  int **ijskip = list->ijskip;
-
-  int *ilist_inner = list->ilist_inner;
-  int *numneigh_inner = list->numneigh_inner;
-  int **firstneigh_inner = list->firstneigh_inner;
-  MyPage<int> *ipage_inner = list->ipage_inner;
-  int *numneigh_inner_skip = list->listskip->numneigh_inner;
-  int **firstneigh_inner_skip = list->listskip->firstneigh_inner;
-
-  int *ilist_middle,*numneigh_middle,**firstneigh_middle;
-  MyPage<int> *ipage_middle;
-  int *numneigh_middle_skip,**firstneigh_middle_skip;
-  int respamiddle = list->respamiddle;
-  if (respamiddle) {
-    ilist_middle = list->ilist_middle;
-    numneigh_middle = list->numneigh_middle;
-    firstneigh_middle = list->firstneigh_middle;
-    ipage_middle = list->ipage_middle;
-    numneigh_middle_skip = list->listskip->numneigh_middle;
-    firstneigh_middle_skip = list->listskip->firstneigh_middle;
-  }
-
-  int inum = 0;
-  ipage->reset();
-  ipage_inner->reset();
-  if (respamiddle) ipage_middle->reset();
-
-  double **x = atom->x;
-  double xtmp, ytmp, ztmp;
-  double delx, dely, delz, rsq;
-  double cutsq_custom = cutoff_custom * cutoff_custom;
-
-  // loop over atoms in other list
-  // skip I atom entirely if iskip is set for type[I]
-  // skip I,J pair if ijskip is set for type[I],type[J]
-
-  for (ii = 0; ii < inum_skip; ii++) {
-    i = ilist_skip[ii];
-    itype = type[i];
-    if (iskip[itype]) continue;
-
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-
-    n = n_inner = 0;
-    neighptr = ipage->vget();
-    neighptr_inner = ipage_inner->vget();
-    if (respamiddle) {
-      n_middle = 0;
-      neighptr_middle = ipage_middle->vget();
-    }
-
-    // loop over parent outer rRESPA list
-
-    jlist = firstneigh_skip[i];
-    jnum = numneigh_skip[i];
-
-    for (jj = 0; jj < jnum; jj++) {
-      joriginal = jlist[jj];
-      j = joriginal & NEIGHMASK;
-      if (ijskip[itype][type[j]]) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx * delx + dely * dely + delz * delz;
-      if (rsq > cutsq_custom) continue;
-
-      neighptr[n++] = joriginal;
-    }
-
-    // loop over parent inner rRESPA list
-
-    jlist = firstneigh_inner_skip[i];
-    jnum = numneigh_inner_skip[i];
-
-    for (jj = 0; jj < jnum; jj++) {
-      joriginal = jlist[jj];
-      j = joriginal & NEIGHMASK;
-      if (ijskip[itype][type[j]]) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx * delx + dely * dely + delz * delz;
-      if (rsq > cutsq_custom) continue;
-
-      neighptr_inner[n_inner++] = joriginal;
-    }
-
-    // loop over parent middle rRESPA list
-
-    if (respamiddle) {
-      jlist = firstneigh_middle_skip[i];
-      jnum = numneigh_middle_skip[i];
-
-      for (jj = 0; jj < jnum; jj++) {
-        joriginal = jlist[jj];
-        j = joriginal & NEIGHMASK;
-        if (ijskip[itype][type[j]]) continue;
-
-        delx = xtmp - x[j][0];
-        dely = ytmp - x[j][1];
-        delz = ztmp - x[j][2];
-        rsq = delx * delx + dely * dely + delz * delz;
-        if (rsq > cutsq_custom) continue;
-
-        neighptr_middle[n_middle++] = joriginal;
-      }
-    }
-
-    ilist[inum] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-
-    ilist_inner[inum] = i;
-    firstneigh_inner[i] = neighptr_inner;
-    numneigh_inner[i] = n_inner;
-    ipage_inner->vgot(n);
-    if (ipage_inner->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-
-    if (respamiddle) {
-      ilist_middle[inum] = i;
-      firstneigh_middle[i] = neighptr_middle;
-      numneigh_middle[i] = n_middle;
-      ipage_middle->vgot(n);
-      if (ipage_middle->status())
-        error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-    }
-
-    inum++;
-  }
-
-  list->inum = inum;
-  list->inum_inner = inum;
-  if (respamiddle) list->inum_middle = inum;
-}
diff --git a/src/npair_skip_trim_respa.h b/src/npair_skip_trim_respa.h
deleted file mode 100644
index dcfe71c28d..0000000000
--- a/src/npair_skip_trim_respa.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(skip/trim/half/respa,
-           NPairSkipTrimRespa,
-           NP_SKIP | NP_RESPA | NP_HALF | NP_FULL |
-           NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
-           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_SKIP_TRIM_RESPA_H
-#define LMP_NPAIR_SKIP_TRIM_RESPA_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairSkipTrimRespa : public NPair {
- public:
-  NPairSkipTrimRespa(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_skip_trim_size.cpp b/src/npair_skip_trim_size.cpp
deleted file mode 100644
index fab70a78b5..0000000000
--- a/src/npair_skip_trim_size.cpp
+++ /dev/null
@@ -1,102 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_skip_trim_size.h"
-
-#include "atom.h"
-#include "error.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairSkipTrimSize::NPairSkipTrimSize(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   build skip list for subset of types from parent list
-   iskip and ijskip flag which atom types and type pairs to skip
-------------------------------------------------------------------------- */
-
-void NPairSkipTrimSize::build(NeighList *list)
-{
-  int i, j, ii, jj, n, itype, jnum, joriginal;
-  int *neighptr, *jlist;
-
-  int *type = atom->type;
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int *ilist_skip = list->listskip->ilist;
-  int *numneigh_skip = list->listskip->numneigh;
-  int **firstneigh_skip = list->listskip->firstneigh;
-  int inum_skip = list->listskip->inum;
-
-  int *iskip = list->iskip;
-  int **ijskip = list->ijskip;
-
-  int inum = 0;
-  ipage->reset();
-
-  double **x = atom->x;
-  double xtmp, ytmp, ztmp;
-  double delx, dely, delz, rsq;
-  double cutsq_custom = cutoff_custom * cutoff_custom;
-
-  // loop over atoms in other list
-  // skip I atom entirely if iskip is set for type[I]
-  // skip I,J pair if ijskip is set for type[I],type[J]
-
-  for (ii = 0; ii < inum_skip; ii++) {
-    i = ilist_skip[ii];
-    itype = type[i];
-    if (iskip[itype]) continue;
-
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-
-    n = 0;
-    neighptr = ipage->vget();
-
-    // loop over parent non-skip size list
-
-    jlist = firstneigh_skip[i];
-    jnum = numneigh_skip[i];
-
-    for (jj = 0; jj < jnum; jj++) {
-      joriginal = jlist[jj];
-      j = joriginal & NEIGHMASK;
-      if (ijskip[itype][type[j]]) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx * delx + dely * dely + delz * delz;
-      if (rsq > cutsq_custom) continue;
-
-      neighptr[n++] = joriginal;
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
-  }
-
-  list->inum = inum;
-}
diff --git a/src/npair_skip_trim_size.h b/src/npair_skip_trim_size.h
deleted file mode 100644
index 3b536860ca..0000000000
--- a/src/npair_skip_trim_size.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(skip/trim/half/size,
-           NPairSkipTrimSize,
-           NP_SKIP | NP_SIZE | NP_HALF | NP_FULL | NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
-           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_SKIP_TRIM_SIZE_H
-#define LMP_NPAIR_SKIP_TRIM_SIZE_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairSkipTrimSize : public NPair {
- public:
-  NPairSkipTrimSize(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_skip_trim_size_off2on.cpp b/src/npair_skip_trim_size_off2on.cpp
deleted file mode 100644
index 3e9a1e5f63..0000000000
--- a/src/npair_skip_trim_size_off2on.cpp
+++ /dev/null
@@ -1,112 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_skip_trim_size_off2on.h"
-
-#include "atom.h"
-#include "error.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairSkipTrimSizeOff2on::NPairSkipTrimSizeOff2on(LAMMPS *lmp) : NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   build skip list for subset of types from parent list
-   iskip and ijskip flag which atom types and type pairs to skip
-   parent non-skip list used newton off, this skip list is newton on
-------------------------------------------------------------------------- */
-
-void NPairSkipTrimSizeOff2on::build(NeighList *list)
-{
-  int i, j, ii, jj, n, itype, jnum, joriginal;
-  tagint itag, jtag;
-  int *neighptr, *jlist;
-
-  tagint *tag = atom->tag;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int *ilist_skip = list->listskip->ilist;
-  int *numneigh_skip = list->listskip->numneigh;
-  int **firstneigh_skip = list->listskip->firstneigh;
-  int inum_skip = list->listskip->inum;
-
-  int *iskip = list->iskip;
-  int **ijskip = list->ijskip;
-
-  int inum = 0;
-  ipage->reset();
-
-  double **x = atom->x;
-  double xtmp, ytmp, ztmp;
-  double delx, dely, delz, rsq;
-  double cutsq_custom = cutoff_custom * cutoff_custom;
-
-  // loop over atoms in other list
-  // skip I atom entirely if iskip is set for type[I]
-  // skip I,J pair if ijskip is set for type[I],type[J]
-
-  for (ii = 0; ii < inum_skip; ii++) {
-    i = ilist_skip[ii];
-    itype = type[i];
-    if (iskip[itype]) continue;
-    itag = tag[i];
-
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-
-    n = 0;
-    neighptr = ipage->vget();
-
-    // loop over parent non-skip size list and optionally its history info
-
-    jlist = firstneigh_skip[i];
-    jnum = numneigh_skip[i];
-
-    for (jj = 0; jj < jnum; jj++) {
-      joriginal = jlist[jj];
-      j = joriginal & NEIGHMASK;
-      if (ijskip[itype][type[j]]) continue;
-
-      // only keep I,J when J = ghost if Itag < Jtag
-
-      jtag = tag[j];
-      if (j >= nlocal && jtag < itag) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx * delx + dely * dely + delz * delz;
-      if (rsq > cutsq_custom) continue;
-
-      neighptr[n++] = joriginal;
-    }
-
-    ilist[inum++] = i;
-    firstneigh[i] = neighptr;
-    numneigh[i] = n;
-    ipage->vgot(n);
-    if (ipage->status()) error->one(FLERR, "Neighbor list overflow, boost neigh_modify one");
-  }
-  list->inum = inum;
-}
diff --git a/src/npair_skip_trim_size_off2on.h b/src/npair_skip_trim_size_off2on.h
deleted file mode 100644
index 6e52082329..0000000000
--- a/src/npair_skip_trim_size_off2on.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(skip/trim/size/off2on,
-           NPairSkipTrimSizeOff2on,
-           NP_SKIP | NP_SIZE | NP_OFF2ON | NP_HALF |
-           NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD |
-           NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_TRIM);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_SKIP_TRIM_SIZE_OFF2ON_H
-#define LMP_NPAIR_SKIP_TRIM_SIZE_OFF2ON_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairSkipTrimSizeOff2on : public NPair {
- public:
-  NPairSkipTrimSizeOff2on(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_skip_trim_size_off2on_oneside.cpp b/src/npair_skip_trim_size_off2on_oneside.cpp
deleted file mode 100644
index 9d43ac8087..0000000000
--- a/src/npair_skip_trim_size_off2on_oneside.cpp
+++ /dev/null
@@ -1,185 +0,0 @@
-// clang-format off
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "npair_skip_trim_size_off2on_oneside.h"
-
-#include "atom.h"
-#include "domain.h"
-#include "error.h"
-#include "my_page.h"
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NPairSkipTrimSizeOff2onOneside::NPairSkipTrimSizeOff2onOneside(LAMMPS *lmp) :
-  NPair(lmp) {}
-
-/* ----------------------------------------------------------------------
-   build skip list for subset of types from parent list
-   iskip and ijskip flag which atom types and type pairs to skip
-   parent non-skip list used newton off and was not onesided,
-     this skip list is newton on and onesided
-------------------------------------------------------------------------- */
-
-void NPairSkipTrimSizeOff2onOneside::build(NeighList *list)
-{
-  int i,j,ii,jj,itype,jnum,joriginal,flip,tmp;
-  int *surf,*jlist;
-
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-
-  int *ilist = list->ilist;
-  int *numneigh = list->numneigh;
-  int **firstneigh = list->firstneigh;
-  MyPage<int> *ipage = list->ipage;
-
-  int *ilist_skip = list->listskip->ilist;
-  int *numneigh_skip = list->listskip->numneigh;
-  int **firstneigh_skip = list->listskip->firstneigh;
-  int inum_skip = list->listskip->inum;
-
-  int *iskip = list->iskip;
-  int **ijskip = list->ijskip;
-
-  if (domain->dimension == 2) surf = atom->line;
-  else surf = atom->tri;
-
-  int inum = 0;
-  ipage->reset();
-
-  double **x = atom->x;
-  double xtmp, ytmp, ztmp;
-  double delx, dely, delz, rsq;
-  double cutsq_custom = cutoff_custom * cutoff_custom;
-
-  // two loops over parent list required, one to count, one to store
-  // because onesided constraint means pair I,J may be stored with I or J
-  // so don't know in advance how much space to alloc for each atom's neighs
-
-  // first loop over atoms in other list to count neighbors
-  // skip I atom entirely if iskip is set for type[I]
-  // skip I,J pair if ijskip is set for type[I],type[J]
-
-  for (i = 0; i < nlocal; i++) numneigh[i] = 0;
-
-  for (ii = 0; ii < inum_skip; ii++) {
-    i = ilist_skip[ii];
-    itype = type[i];
-    if (iskip[itype]) continue;
-
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-
-    // loop over parent non-skip size list
-
-    jlist = firstneigh_skip[i];
-    jnum = numneigh_skip[i];
-
-    for (jj = 0; jj < jnum; jj++) {
-      joriginal = jlist[jj];
-      j = joriginal & NEIGHMASK;
-      if (ijskip[itype][type[j]]) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx * delx + dely * dely + delz * delz;
-      if (rsq > cutsq_custom) continue;
-
-      // flip I,J if necessary to satisfy onesided constraint
-      // do not keep if I is now ghost
-
-      if (surf[i] >= 0) {
-        if (j >= nlocal) continue;
-        tmp = i;
-        i = j;
-        j = tmp;
-        flip = 1;
-      } else flip = 0;
-
-      numneigh[i]++;
-      if (flip) i = j;
-    }
-  }
-
-  // allocate all per-atom neigh list chunks
-
-  for (i = 0; i < nlocal; i++) {
-    if (numneigh[i] == 0) continue;
-    firstneigh[i] = ipage->get(numneigh[i]);
-    if (ipage->status())
-      error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
-  }
-
-  // second loop over atoms in other list to store neighbors
-  // skip I atom entirely if iskip is set for type[I]
-  // skip I,J pair if ijskip is set for type[I],type[J]
-
-  for (i = 0; i < nlocal; i++) numneigh[i] = 0;
-
-  for (ii = 0; ii < inum_skip; ii++) {
-    i = ilist_skip[ii];
-    itype = type[i];
-    if (iskip[itype]) continue;
-
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-
-    // loop over parent non-skip size list and optionally its history info
-
-    jlist = firstneigh_skip[i];
-    jnum = numneigh_skip[i];
-
-    for (jj = 0; jj < jnum; jj++) {
-      joriginal = jlist[jj];
-      j = joriginal & NEIGHMASK;
-      if (ijskip[itype][type[j]]) continue;
-
-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx * delx + dely * dely + delz * delz;
-      if (rsq > cutsq_custom) continue;
-
-      // flip I,J if necessary to satisfy onesided constraint
-      // do not keep if I is now ghost
-
-      if (surf[i] >= 0) {
-        if (j >= nlocal) continue;
-        tmp = i;
-        i = j;
-        j = tmp;
-        flip = 1;
-      } else flip = 0;
-
-      // store j in neigh list, not joriginal, like other neigh methods
-      // OK, b/c there is no special list flagging for surfs
-
-      firstneigh[i][numneigh[i]] = j;
-      numneigh[i]++;
-      if (flip) i = j;
-    }
-
-    // only add atom I to ilist if it has neighbors
-
-    if (numneigh[i]) ilist[inum++] = i;
-  }
-
-  list->inum = inum;
-}
diff --git a/src/npair_skip_trim_size_off2on_oneside.h b/src/npair_skip_trim_size_off2on_oneside.h
deleted file mode 100644
index 27861123dd..0000000000
--- a/src/npair_skip_trim_size_off2on_oneside.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NPAIR_CLASS
-// clang-format off
-NPairStyle(skip/trim/size/off2on/oneside,
-           NPairSkipTrimSizeOff2onOneside,
-           NP_SKIP | NP_SIZE | NP_OFF2ON | NP_ONESIDE | NP_HALF |
-           NP_NSQ | NP_BIN | NP_MULTI | NP_MULTI_OLD | NP_NEWTON | NP_NEWTOFF |
-           NP_ORTHO | NP_TRI | NP_TRIM);
-// clang-format on
-#else
-
-#ifndef LMP_NPAIR_SKIP_TRIM_SIZE_OFF2ON_ONESIDE_H
-#define LMP_NPAIR_SKIP_TRIM_SIZE_OFF2ON_ONESIDE_H
-
-#include "npair.h"
-
-namespace LAMMPS_NS {
-
-class NPairSkipTrimSizeOff2onOneside : public NPair {
- public:
-  NPairSkipTrimSizeOff2onOneside(class LAMMPS *);
-  void build(class NeighList *) override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/npair_trim.cpp b/src/npair_trim.cpp
index f026466f92..1b25646185 100644
--- a/src/npair_trim.cpp
+++ b/src/npair_trim.cpp
@@ -12,6 +12,7 @@
 ------------------------------------------------------------------------- */
 
 #include "npair_trim.h"
+
 #include "atom.h"
 #include "error.h"
 #include "my_page.h"
diff --git a/src/nstencil.cpp b/src/nstencil.cpp
index 5bbcb1210d..5d2bf5d239 100644
--- a/src/nstencil.cpp
+++ b/src/nstencil.cpp
@@ -84,6 +84,7 @@ NStencil::NStencil(LAMMPS *lmp) : Pointers(lmp)
 
   flag_half_multi = nullptr;
   flag_skip_multi = nullptr;
+  flag_same_multi = nullptr;
   bin_collection_multi = nullptr;
 
   maxcollections = 0;
@@ -122,6 +123,7 @@ NStencil::~NStencil()
     memory->destroy(maxstencil_multi);
     memory->destroy(flag_half_multi);
     memory->destroy(flag_skip_multi);
+    memory->destroy(flag_same_multi);
     memory->destroy(bin_collection_multi);
 
     memory->destroy(stencil_sx_multi);
@@ -289,6 +291,7 @@ void NStencil::create_setup()
       memory->destroy(maxstencil_multi);
       memory->destroy(flag_half_multi);
       memory->destroy(flag_skip_multi);
+      memory->destroy(flag_same_multi);
       memory->destroy(bin_collection_multi);
       memory->destroy(stencil_sx_multi);
       memory->destroy(stencil_sy_multi);
@@ -307,6 +310,8 @@ void NStencil::create_setup()
                      "neighstencil:flag_half_multi");
       memory->create(flag_skip_multi, n, n,
                      "neighstencil:flag_skip_multi");
+      memory->create(flag_same_multi, n, n,
+                     "neighstencil:flag_same_multi");
       memory->create(bin_collection_multi, n, n,
                      "neighstencil:bin_collection_multi");
 
diff --git a/src/nstencil.h b/src/nstencil.h
index 6ae7f05dfb..dcb5219a3f 100644
--- a/src/nstencil.h
+++ b/src/nstencil.h
@@ -45,6 +45,7 @@ class NStencil : protected Pointers {
   // Arrays to store options for multi itype-jtype stencils
   bool **flag_half_multi;    // flag creation of a half stencil for icollection-jcollection
   bool **flag_skip_multi;    // skip creation of icollection-jcollection stencils (for newton on)
+  bool **flag_same_multi;    // flag same size collection (doesn't always correspond to a half, e.g. newton + tri)
   int **bin_collection_multi;    // what collection to use for bin information
 
   NStencil(class LAMMPS *);
diff --git a/src/nstencil_bin.cpp b/src/nstencil_bin.cpp
new file mode 100644
index 0000000000..ccefa16978
--- /dev/null
+++ b/src/nstencil_bin.cpp
@@ -0,0 +1,76 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "nstencil_bin.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+template<int HALF, int DIM_3D, int TRI>
+NStencilBin<HALF, DIM_3D, TRI>::NStencilBin(LAMMPS *lmp) : NStencil(lmp) {}
+
+/* ----------------------------------------------------------------------
+   create stencil based on bin geometry and cutoff
+------------------------------------------------------------------------- */
+
+template<int HALF, int DIM_3D, int TRI>
+void NStencilBin<HALF, DIM_3D, TRI>::create()
+{
+  int i, j, k;
+
+  // For half stencils, only the upper plane is needed
+  // for triclinic, need to use full stencil in all dims
+  //   not a half stencil in y
+  // b/c transforming orthog -> lambda -> orthog for ghost atoms
+  //   with an added PBC offset can shift both coords by epsilon
+  // thus for an I/J owned/ghost pair, the xy coords
+  //   and bin assignments can be different on I proc vs J proc
+
+  int sy_min = sy;
+  int sz_min = sz;
+  if ((!TRI) && HALF && (!DIM_3D)) sy_min = 0;
+  if ((!TRI) && HALF && DIM_3D) sz_min = 0;
+
+  nstencil = 0;
+
+  // Half and ortho stencils include central bin first
+  // This preserves the historical order of the neighbor list
+  // as the old npair classes used to separately parse the central bin first
+  if (HALF && (!TRI)) stencil[nstencil++] = 0;
+
+  for (k = -sz_min; k <= sz; k++) {
+    for (j = -sy_min; j <= sy; j++) {
+      for (i = -sx; i <= sx; i++) {
+
+        // Now only include "upper right" bins for half and ortho stencils
+        if (HALF && (!DIM_3D) && (!TRI))
+          if (j <= 0 && (j != 0 || i <= 0)) continue;
+        if (HALF && DIM_3D && (!TRI))
+          if (k <= 0 && j <= 0 && (j != 0 || i <= 0)) continue;
+
+        if (bin_distance(i, j, k) < cutneighmaxsq)
+          stencil[nstencil++] = k * mbiny * mbinx + j * mbinx + i;
+      }
+    }
+  }
+}
+
+namespace LAMMPS_NS {
+template class NStencilBin<0,0,0>;
+template class NStencilBin<0,1,0>;
+template class NStencilBin<1,0,0>;
+template class NStencilBin<1,0,1>;
+template class NStencilBin<1,1,0>;
+template class NStencilBin<1,1,1>;
+}
diff --git a/src/nstencil_bin.h b/src/nstencil_bin.h
new file mode 100644
index 0000000000..889725dd1a
--- /dev/null
+++ b/src/nstencil_bin.h
@@ -0,0 +1,65 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef NSTENCIL_CLASS
+// clang-format off
+typedef NStencilBin<0, 0, 0> NStencilFullBin2d;
+NStencilStyle(full/bin/2d,
+              NStencilFullBin2d,
+              NS_FULL | NS_BIN | NS_2D | NS_ORTHO | NS_TRI);
+
+typedef NStencilBin<0, 1, 0> NStencilFullBin3d;
+NStencilStyle(full/bin/3d,
+              NStencilFullBin3d,
+              NS_FULL | NS_BIN | NS_3D | NS_ORTHO | NS_TRI);
+
+typedef NStencilBin<1, 0, 0> NStencilHalfBin2d;
+NStencilStyle(half/bin/2d,
+              NStencilHalfBin2d,
+              NS_HALF | NS_BIN | NS_2D | NS_ORTHO);
+
+typedef NStencilBin<1, 0, 1> NStencilHalfBin2dTri;
+NStencilStyle(half/bin/2d/tri,
+              NStencilHalfBin2dTri,
+              NS_HALF | NS_BIN | NS_2D | NS_TRI);
+
+typedef NStencilBin<1, 1, 0> NStencilHalfBin3d;
+NStencilStyle(half/bin/3d,
+              NStencilHalfBin3d,
+              NS_HALF | NS_BIN | NS_3D | NS_ORTHO);
+
+typedef NStencilBin<1, 1, 1> NStencilHalfBin3dTri;
+NStencilStyle(half/bin/3d/tri,
+              NStencilHalfBin3dTri,
+              NS_HALF | NS_BIN | NS_3D | NS_TRI);
+// clang-format on
+#else
+
+#ifndef LMP_NSTENCIL_BIN_H
+#define LMP_NSTENCIL_BIN_H
+
+#include "nstencil.h"
+
+namespace LAMMPS_NS {
+
+template<int HALF, int DIM_3D, int TRI>
+class NStencilBin : public NStencil {
+ public:
+  NStencilBin(class LAMMPS *);
+  void create() override;
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/nstencil_full_bin_2d.cpp b/src/nstencil_full_bin_2d.cpp
deleted file mode 100644
index cbcdc6e797..0000000000
--- a/src/nstencil_full_bin_2d.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "nstencil_full_bin_2d.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NStencilFullBin2d::NStencilFullBin2d(LAMMPS *lmp) : NStencil(lmp) {}
-
-/* ----------------------------------------------------------------------
-   create stencil based on bin geometry and cutoff
-------------------------------------------------------------------------- */
-
-void NStencilFullBin2d::create()
-{
-  int i, j;
-
-  nstencil = 0;
-
-  for (j = -sy; j <= sy; j++)
-    for (i = -sx; i <= sx; i++)
-      if (bin_distance(i, j, 0) < cutneighmaxsq) stencil[nstencil++] = j * mbinx + i;
-}
diff --git a/src/nstencil_full_bin_2d.h b/src/nstencil_full_bin_2d.h
deleted file mode 100644
index c3cdbb3b9b..0000000000
--- a/src/nstencil_full_bin_2d.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NSTENCIL_CLASS
-// clang-format off
-NStencilStyle(full/bin/2d,
-              NStencilFullBin2d,
-              NS_FULL | NS_BIN | NS_2D | NS_ORTHO | NS_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NSTENCIL_FULL_BIN_2D_H
-#define LMP_NSTENCIL_FULL_BIN_2D_H
-
-#include "nstencil.h"
-
-namespace LAMMPS_NS {
-
-class NStencilFullBin2d : public NStencil {
- public:
-  NStencilFullBin2d(class LAMMPS *);
-  void create() override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/nstencil_full_bin_3d.cpp b/src/nstencil_full_bin_3d.cpp
deleted file mode 100644
index e305abc764..0000000000
--- a/src/nstencil_full_bin_3d.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "nstencil_full_bin_3d.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NStencilFullBin3d::NStencilFullBin3d(LAMMPS *lmp) : NStencil(lmp) {}
-
-/* ----------------------------------------------------------------------
-   create stencil based on bin geometry and cutoff
-------------------------------------------------------------------------- */
-
-void NStencilFullBin3d::create()
-{
-  int i, j, k;
-
-  nstencil = 0;
-
-  for (k = -sz; k <= sz; k++)
-    for (j = -sy; j <= sy; j++)
-      for (i = -sx; i <= sx; i++)
-        if (bin_distance(i, j, k) < cutneighmaxsq)
-          stencil[nstencil++] = k * mbiny * mbinx + j * mbinx + i;
-}
diff --git a/src/nstencil_full_bin_3d.h b/src/nstencil_full_bin_3d.h
deleted file mode 100644
index 73da08b840..0000000000
--- a/src/nstencil_full_bin_3d.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NSTENCIL_CLASS
-// clang-format off
-NStencilStyle(full/bin/3d,
-              NStencilFullBin3d,
-              NS_FULL | NS_BIN | NS_3D | NS_ORTHO | NS_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NSTENCIL_FULL_BIN_3D_H
-#define LMP_NSTENCIL_FULL_BIN_3D_H
-
-#include "nstencil.h"
-
-namespace LAMMPS_NS {
-
-class NStencilFullBin3d : public NStencil {
- public:
-  NStencilFullBin3d(class LAMMPS *);
-  void create() override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/nstencil_full_ghost_bin_2d.cpp b/src/nstencil_full_ghost_bin_2d.cpp
deleted file mode 100644
index 0429624cb1..0000000000
--- a/src/nstencil_full_ghost_bin_2d.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "nstencil_full_ghost_bin_2d.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NStencilFullGhostBin2d::NStencilFullGhostBin2d(LAMMPS *lmp) : NStencil(lmp)
-{
-  xyzflag = 1;
-}
-
-/* ----------------------------------------------------------------------
-   create stencil based on bin geometry and cutoff
-------------------------------------------------------------------------- */
-
-void NStencilFullGhostBin2d::create()
-{
-  int i, j;
-
-  nstencil = 0;
-
-  for (j = -sy; j <= sy; j++)
-    for (i = -sx; i <= sx; i++)
-      if (bin_distance(i, j, 0) < cutneighmaxsq) {
-        stencilxyz[nstencil][0] = i;
-        stencilxyz[nstencil][1] = j;
-        stencilxyz[nstencil][2] = 0;
-        stencil[nstencil++] = j * mbinx + i;
-      }
-}
diff --git a/src/nstencil_full_multi_2d.cpp b/src/nstencil_full_multi_2d.cpp
deleted file mode 100644
index b49e245d25..0000000000
--- a/src/nstencil_full_multi_2d.cpp
+++ /dev/null
@@ -1,79 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "nstencil_full_multi_2d.h"
-
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NStencilFullMulti2d::NStencilFullMulti2d(LAMMPS *lmp) : NStencil(lmp) {}
-
-/* ---------------------------------------------------------------------- */
-
-void NStencilFullMulti2d::set_stencil_properties()
-{
-  int n = ncollections;
-  int i, j;
-
-  // Always look up neighbor using full stencil and neighbor's bin
-
-  for (i = 0; i < n; i++) {
-    for (j = 0; j < n; j++) {
-      flag_half_multi[i][j] = false;
-      flag_skip_multi[i][j] = false;
-      bin_collection_multi[i][j] = j;
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   create stencils based on bin geometry and cutoff
-------------------------------------------------------------------------- */
-
-void NStencilFullMulti2d::create()
-{
-  int icollection, jcollection, bin_collection, i, j, ns;
-  int n = ncollections;
-  double cutsq;
-
-  for (icollection = 0; icollection < n; icollection++) {
-    for (jcollection = 0; jcollection < n; jcollection++) {
-      if (flag_skip_multi[icollection][jcollection]) {
-        nstencil_multi[icollection][jcollection] = 0;
-        continue;
-      }
-
-      ns = 0;
-
-      sx = stencil_sx_multi[icollection][jcollection];
-      sy = stencil_sy_multi[icollection][jcollection];
-
-      mbinx = stencil_mbinx_multi[icollection][jcollection];
-      mbiny = stencil_mbiny_multi[icollection][jcollection];
-
-      bin_collection = bin_collection_multi[icollection][jcollection];
-
-      cutsq = cutcollectionsq[icollection][jcollection];
-
-      for (j = -sy; j <= sy; j++)
-        for (i = -sx; i <= sx; i++)
-          if (bin_distance_multi(i, j, 0, bin_collection) < cutsq)
-            stencil_multi[icollection][jcollection][ns++] = j * mbinx + i;
-
-      nstencil_multi[icollection][jcollection] = ns;
-    }
-  }
-}
diff --git a/src/nstencil_full_multi_2d.h b/src/nstencil_full_multi_2d.h
deleted file mode 100644
index 6b9c98bd89..0000000000
--- a/src/nstencil_full_multi_2d.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NSTENCIL_CLASS
-// clang-format off
-NStencilStyle(full/multi/2d,
-              NStencilFullMulti2d, NS_FULL | NS_MULTI | NS_2D | NS_ORTHO | NS_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NSTENCIL_FULL_MULTI_2D_H
-#define LMP_NSTENCIL_FULL_MULTI_2D_H
-
-#include "nstencil.h"
-
-namespace LAMMPS_NS {
-
-class NStencilFullMulti2d : public NStencil {
- public:
-  NStencilFullMulti2d(class LAMMPS *);
-  void create() override;
-
- protected:
-  void set_stencil_properties() override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/nstencil_full_multi_3d.cpp b/src/nstencil_full_multi_3d.cpp
deleted file mode 100644
index d2d5faec62..0000000000
--- a/src/nstencil_full_multi_3d.cpp
+++ /dev/null
@@ -1,83 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "nstencil_full_multi_3d.h"
-
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NStencilFullMulti3d::NStencilFullMulti3d(LAMMPS *lmp) : NStencil(lmp) {}
-
-/* ---------------------------------------------------------------------- */
-
-void NStencilFullMulti3d::set_stencil_properties()
-{
-  int n = ncollections;
-  int i, j;
-
-  // Always look up neighbor using full stencil and neighbor's bin
-  // Stencil cutoff set by i-j cutoff
-
-  for (i = 0; i < n; i++) {
-    for (j = 0; j < n; j++) {
-      flag_half_multi[i][j] = true;
-      flag_skip_multi[i][j] = false;
-      bin_collection_multi[i][j] = j;
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   create stencils based on bin geometry and cutoff
-------------------------------------------------------------------------- */
-
-void NStencilFullMulti3d::create()
-{
-  int icollection, jcollection, bin_collection, i, j, k, ns;
-  int n = ncollections;
-  double cutsq;
-
-  for (icollection = 0; icollection < n; icollection++) {
-    for (jcollection = 0; jcollection < n; jcollection++) {
-      if (flag_skip_multi[icollection][jcollection]) {
-        nstencil_multi[icollection][jcollection] = 0;
-        continue;
-      }
-
-      ns = 0;
-
-      sx = stencil_sx_multi[icollection][jcollection];
-      sy = stencil_sy_multi[icollection][jcollection];
-      sz = stencil_sz_multi[icollection][jcollection];
-
-      mbinx = stencil_mbinx_multi[icollection][jcollection];
-      mbiny = stencil_mbiny_multi[icollection][jcollection];
-      mbinz = stencil_mbinz_multi[icollection][jcollection];
-
-      bin_collection = bin_collection_multi[icollection][jcollection];
-
-      cutsq = cutcollectionsq[icollection][jcollection];
-
-      for (k = -sz; k <= sz; k++)
-        for (j = -sy; j <= sy; j++)
-          for (i = -sx; i <= sx; i++)
-            if (bin_distance_multi(i, j, k, bin_collection) < cutsq)
-              stencil_multi[icollection][jcollection][ns++] = k * mbiny * mbinx + j * mbinx + i;
-
-      nstencil_multi[icollection][jcollection] = ns;
-    }
-  }
-}
diff --git a/src/nstencil_full_multi_3d.h b/src/nstencil_full_multi_3d.h
deleted file mode 100644
index e4d4691139..0000000000
--- a/src/nstencil_full_multi_3d.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NSTENCIL_CLASS
-// clang-format off
-NStencilStyle(full/multi/3d,
-              NStencilFullMulti3d, NS_FULL | NS_MULTI | NS_3D | NS_ORTHO | NS_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NSTENCIL_FULL_MULTI_3D_H
-#define LMP_NSTENCIL_FULL_MULTI_3D_H
-
-#include "nstencil.h"
-
-namespace LAMMPS_NS {
-
-class NStencilFullMulti3d : public NStencil {
- public:
-  NStencilFullMulti3d(class LAMMPS *);
-  void create() override;
-
- protected:
-  void set_stencil_properties() override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/nstencil_full_multi_old_2d.cpp b/src/nstencil_full_multi_old_2d.cpp
deleted file mode 100644
index 80d7275279..0000000000
--- a/src/nstencil_full_multi_old_2d.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "nstencil_full_multi_old_2d.h"
-#include "atom.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NStencilFullMultiOld2d::NStencilFullMultiOld2d(LAMMPS *lmp) : NStencil(lmp) {}
-
-/* ----------------------------------------------------------------------
-   create stencil based on bin geometry and cutoff
-------------------------------------------------------------------------- */
-
-void NStencilFullMultiOld2d::create()
-{
-  int i, j, n;
-  double rsq, typesq;
-  int *s;
-  double *distsq;
-
-  int ntypes = atom->ntypes;
-  for (int itype = 1; itype <= ntypes; itype++) {
-    typesq = cuttypesq[itype];
-    s = stencil_multi_old[itype];
-    distsq = distsq_multi_old[itype];
-    n = 0;
-    for (j = -sy; j <= sy; j++)
-      for (i = -sx; i <= sx; i++) {
-        rsq = bin_distance(i, j, 0);
-        if (rsq < typesq) {
-          distsq[n] = rsq;
-          s[n++] = j * mbinx + i;
-        }
-      }
-    nstencil_multi_old[itype] = n;
-  }
-}
diff --git a/src/nstencil_full_multi_old_2d.h b/src/nstencil_full_multi_old_2d.h
deleted file mode 100644
index 713a88d549..0000000000
--- a/src/nstencil_full_multi_old_2d.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NSTENCIL_CLASS
-// clang-format off
-NStencilStyle(full/multi/old/2d,
-              NStencilFullMultiOld2d,
-              NS_FULL | NS_MULTI_OLD | NS_2D | NS_ORTHO | NS_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NSTENCIL_FULL_MULTI_OLD_2D_H
-#define LMP_NSTENCIL_FULL_MULTI_OLD_2D_H
-
-#include "nstencil.h"
-
-namespace LAMMPS_NS {
-
-class NStencilFullMultiOld2d : public NStencil {
- public:
-  NStencilFullMultiOld2d(class LAMMPS *);
-  void create() override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/nstencil_full_multi_old_3d.cpp b/src/nstencil_full_multi_old_3d.cpp
deleted file mode 100644
index c733bc8e88..0000000000
--- a/src/nstencil_full_multi_old_3d.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "nstencil_full_multi_old_3d.h"
-#include "atom.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NStencilFullMultiOld3d::NStencilFullMultiOld3d(LAMMPS *lmp) : NStencil(lmp) {}
-
-/* ----------------------------------------------------------------------
-   create stencil based on bin geometry and cutoff
-------------------------------------------------------------------------- */
-
-void NStencilFullMultiOld3d::create()
-{
-  int i, j, k, n;
-  double rsq, typesq;
-  int *s;
-  double *distsq;
-
-  int ntypes = atom->ntypes;
-  for (int itype = 1; itype <= ntypes; itype++) {
-    typesq = cuttypesq[itype];
-    s = stencil_multi_old[itype];
-    distsq = distsq_multi_old[itype];
-    n = 0;
-    for (k = -sz; k <= sz; k++)
-      for (j = -sy; j <= sy; j++)
-        for (i = -sx; i <= sx; i++) {
-          rsq = bin_distance(i, j, k);
-          if (rsq < typesq) {
-            distsq[n] = rsq;
-            s[n++] = k * mbiny * mbinx + j * mbinx + i;
-          }
-        }
-    nstencil_multi_old[itype] = n;
-  }
-}
diff --git a/src/nstencil_full_multi_old_3d.h b/src/nstencil_full_multi_old_3d.h
deleted file mode 100644
index 9ebaed6154..0000000000
--- a/src/nstencil_full_multi_old_3d.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NSTENCIL_CLASS
-// clang-format off
-NStencilStyle(full/multi/old/3d,
-              NStencilFullMultiOld3d,
-              NS_FULL | NS_MULTI_OLD | NS_3D | NS_ORTHO | NS_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NSTENCIL_FULL_MULTI_OLD_3D_H
-#define LMP_NSTENCIL_FULL_MULTI_OLD_3D_H
-
-#include "nstencil.h"
-
-namespace LAMMPS_NS {
-
-class NStencilFullMultiOld3d : public NStencil {
- public:
-  NStencilFullMultiOld3d(class LAMMPS *);
-  void create() override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/nstencil_full_ghost_bin_3d.cpp b/src/nstencil_ghost_bin.cpp
similarity index 75%
rename from src/nstencil_full_ghost_bin_3d.cpp
rename to src/nstencil_ghost_bin.cpp
index 866f391b90..81372bedaf 100644
--- a/src/nstencil_full_ghost_bin_3d.cpp
+++ b/src/nstencil_ghost_bin.cpp
@@ -11,13 +11,14 @@
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
-#include "nstencil_full_ghost_bin_3d.h"
+#include "nstencil_ghost_bin.h"
 
 using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-NStencilFullGhostBin3d::NStencilFullGhostBin3d(LAMMPS *lmp) : NStencil(lmp)
+template<int DIM_3D>
+NStencilGhostBin<DIM_3D>::NStencilGhostBin(LAMMPS *lmp) : NStencil(lmp)
 {
   xyzflag = 1;
 }
@@ -26,19 +27,28 @@ NStencilFullGhostBin3d::NStencilFullGhostBin3d(LAMMPS *lmp) : NStencil(lmp)
    create stencil based on bin geometry and cutoff
 ------------------------------------------------------------------------- */
 
-void NStencilFullGhostBin3d::create()
+template<int DIM_3D>
+void NStencilGhostBin<DIM_3D>::create()
 {
   int i, j, k;
 
   nstencil = 0;
 
-  for (k = -sz; k <= sz; k++)
-    for (j = -sy; j <= sy; j++)
-      for (i = -sx; i <= sx; i++)
+  for (k = -sz; k <= sz; k++) {
+    for (j = -sy; j <= sy; j++) {
+      for (i = -sx; i <= sx; i++) {
         if (bin_distance(i, j, k) < cutneighmaxsq) {
           stencilxyz[nstencil][0] = i;
           stencilxyz[nstencil][1] = j;
           stencilxyz[nstencil][2] = k;
           stencil[nstencil++] = k * mbiny * mbinx + j * mbinx + i;
         }
+      }
+    }
+  }
+}
+
+namespace LAMMPS_NS {
+template class NStencilGhostBin<0>;
+template class NStencilGhostBin<1>;
 }
diff --git a/src/nstencil_full_ghost_bin_2d.h b/src/nstencil_ghost_bin.h
similarity index 70%
rename from src/nstencil_full_ghost_bin_2d.h
rename to src/nstencil_ghost_bin.h
index 2907880c92..ed4ae21be9 100644
--- a/src/nstencil_full_ghost_bin_2d.h
+++ b/src/nstencil_ghost_bin.h
@@ -13,22 +13,29 @@
 
 #ifdef NSTENCIL_CLASS
 // clang-format off
+typedef NStencilGhostBin<0> NStencilFullGhostBin2d;
 NStencilStyle(full/ghost/bin/2d,
               NStencilFullGhostBin2d,
               NS_FULL | NS_GHOST | NS_BIN | NS_2D | NS_ORTHO | NS_TRI);
+
+typedef NStencilGhostBin<1> NStencilFullGhostBin3d;
+NStencilStyle(full/ghost/bin/3d,
+              NStencilFullGhostBin3d,
+              NS_FULL | NS_GHOST | NS_BIN | NS_3D | NS_ORTHO | NS_TRI);
 // clang-format on
 #else
 
-#ifndef LMP_NSTENCIL_FULL_GHOST_BIN_2D_H
-#define LMP_NSTENCIL_FULL_GHOST_BIN_2D_H
+#ifndef LMP_NSTENCIL_GHOST_BIN_H
+#define LMP_NSTENCIL_GHOST_BIN_H
 
 #include "nstencil.h"
 
 namespace LAMMPS_NS {
 
-class NStencilFullGhostBin2d : public NStencil {
+template<int DIM_3D>
+class NStencilGhostBin : public NStencil {
  public:
-  NStencilFullGhostBin2d(class LAMMPS *);
+  NStencilGhostBin(class LAMMPS *);
   void create() override;
 };
 
diff --git a/src/nstencil_half_bin_2d.cpp b/src/nstencil_half_bin_2d.cpp
deleted file mode 100644
index c65095a3b0..0000000000
--- a/src/nstencil_half_bin_2d.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "nstencil_half_bin_2d.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NStencilHalfBin2d::NStencilHalfBin2d(LAMMPS *lmp) : NStencil(lmp) {}
-
-/* ----------------------------------------------------------------------
-   create stencil based on bin geometry and cutoff
-------------------------------------------------------------------------- */
-
-void NStencilHalfBin2d::create()
-{
-  int i, j;
-
-  nstencil = 0;
-
-  for (j = 0; j <= sy; j++)
-    for (i = -sx; i <= sx; i++)
-      if (j > 0 || (j == 0 && i > 0))
-        if (bin_distance(i, j, 0) < cutneighmaxsq) stencil[nstencil++] = j * mbinx + i;
-}
diff --git a/src/nstencil_half_bin_2d.h b/src/nstencil_half_bin_2d.h
deleted file mode 100644
index 506136b41d..0000000000
--- a/src/nstencil_half_bin_2d.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NSTENCIL_CLASS
-// clang-format off
-NStencilStyle(half/bin/2d,
-              NStencilHalfBin2d,
-              NS_HALF | NS_BIN | NS_2D | NS_ORTHO);
-// clang-format on
-#else
-
-#ifndef LMP_NSTENCIL_HALF_BIN_2D_H
-#define LMP_NSTENCIL_HALF_BIN_2D_H
-
-#include "nstencil.h"
-
-namespace LAMMPS_NS {
-
-class NStencilHalfBin2d : public NStencil {
- public:
-  NStencilHalfBin2d(class LAMMPS *);
-  void create() override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/nstencil_half_bin_2d_tri.cpp b/src/nstencil_half_bin_2d_tri.cpp
deleted file mode 100644
index 920918fe09..0000000000
--- a/src/nstencil_half_bin_2d_tri.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "nstencil_half_bin_2d_tri.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NStencilHalfBin2dTri::NStencilHalfBin2dTri(LAMMPS *lmp) : NStencil(lmp) {}
-
-/* ----------------------------------------------------------------------
-   create stencil based on bin geometry and cutoff
-------------------------------------------------------------------------- */
-
-void NStencilHalfBin2dTri::create()
-{
-  int i, j;
-
-  // for triclinic, need to use full stencil in all dims
-  //   not a half stencil in y
-  // b/c transforming orthog -> lambda -> orthog for ghost atoms
-  //   with an added PBC offset can shift both coords by epsilon
-  // thus for an I/J owned/ghost pair, the xy coords
-  //   and bin assignments can be different on I proc vs J proc
-
-  nstencil = 0;
-
-  for (j = -sy; j <= sy; j++)
-    for (i = -sx; i <= sx; i++)
-      if (bin_distance(i, j, 0) < cutneighmaxsq)
-	stencil[nstencil++] = j * mbinx + i;
-}
diff --git a/src/nstencil_half_bin_2d_tri.h b/src/nstencil_half_bin_2d_tri.h
deleted file mode 100644
index 2873b7d92f..0000000000
--- a/src/nstencil_half_bin_2d_tri.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NSTENCIL_CLASS
-// clang-format off
-NStencilStyle(half/bin/2d/tri,
-              NStencilHalfBin2dTri,
-              NS_HALF | NS_BIN | NS_2D | NS_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NSTENCIL_HALF_BIN_2D_TRI_H
-#define LMP_NSTENCIL_HALF_BIN_2D_TRI_H
-
-#include "nstencil.h"
-
-namespace LAMMPS_NS {
-
-class NStencilHalfBin2dTri : public NStencil {
- public:
-  NStencilHalfBin2dTri(class LAMMPS *);
-  void create() override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/nstencil_half_bin_3d.cpp b/src/nstencil_half_bin_3d.cpp
deleted file mode 100644
index 90d8e45053..0000000000
--- a/src/nstencil_half_bin_3d.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "nstencil_half_bin_3d.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NStencilHalfBin3d::NStencilHalfBin3d(LAMMPS *lmp) : NStencil(lmp) {}
-
-/* ----------------------------------------------------------------------
-   create stencil based on bin geometry and cutoff
-------------------------------------------------------------------------- */
-
-void NStencilHalfBin3d::create()
-{
-  int i, j, k;
-
-  nstencil = 0;
-
-  for (k = 0; k <= sz; k++)
-    for (j = -sy; j <= sy; j++)
-      for (i = -sx; i <= sx; i++)
-        if (k > 0 || j > 0 || (j == 0 && i > 0))
-          if (bin_distance(i, j, k) < cutneighmaxsq)
-            stencil[nstencil++] = k * mbiny * mbinx + j * mbinx + i;
-}
diff --git a/src/nstencil_half_bin_3d.h b/src/nstencil_half_bin_3d.h
deleted file mode 100644
index 2b612a4a0a..0000000000
--- a/src/nstencil_half_bin_3d.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NSTENCIL_CLASS
-// clang-format off
-NStencilStyle(half/bin/3d,
-              NStencilHalfBin3d,
-              NS_HALF | NS_BIN | NS_3D | NS_ORTHO);
-// clang-format on
-#else
-
-#ifndef LMP_NSTENCIL_HALF_BIN_3D_H
-#define LMP_NSTENCIL_HALF_BIN_3D_H
-
-#include "nstencil.h"
-
-namespace LAMMPS_NS {
-
-class NStencilHalfBin3d : public NStencil {
- public:
-  NStencilHalfBin3d(class LAMMPS *);
-  void create() override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/nstencil_half_bin_3d_tri.h b/src/nstencil_half_bin_3d_tri.h
deleted file mode 100644
index ad24ab0a06..0000000000
--- a/src/nstencil_half_bin_3d_tri.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NSTENCIL_CLASS
-// clang-format off
-NStencilStyle(half/bin/3d/tri,
-              NStencilHalfBin3dTri,
-              NS_HALF | NS_BIN | NS_3D | NS_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NSTENCIL_HALF_BIN_3D_TRI_H
-#define LMP_NSTENCIL_HALF_BIN_3D_TRI_H
-
-#include "nstencil.h"
-
-namespace LAMMPS_NS {
-
-class NStencilHalfBin3dTri : public NStencil {
- public:
-  NStencilHalfBin3dTri(class LAMMPS *);
-  void create() override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/nstencil_half_multi_2d.cpp b/src/nstencil_half_multi_2d.cpp
deleted file mode 100644
index 5932ccae64..0000000000
--- a/src/nstencil_half_multi_2d.cpp
+++ /dev/null
@@ -1,98 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "nstencil_half_multi_2d.h"
-
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NStencilHalfMulti2d::NStencilHalfMulti2d(LAMMPS *lmp) : NStencil(lmp) {}
-
-/* ---------------------------------------------------------------------- */
-
-void NStencilHalfMulti2d::set_stencil_properties()
-{
-  int n = ncollections;
-  int i, j;
-
-  // Cross collections: use full stencil, looking one way through hierarchy
-  // smaller -> larger => use full stencil in larger bin
-  // larger -> smaller => no nstencil required
-  // If cut offs are same, use half stencil
-
-  for (i = 0; i < n; i++) {
-    for (j = 0; j < n; j++) {
-      if (cutcollectionsq[i][i] > cutcollectionsq[j][j]) continue;
-
-      flag_skip_multi[i][j] = false;
-
-      if (cutcollectionsq[i][i] == cutcollectionsq[j][j]) {
-        flag_half_multi[i][j] = true;
-        bin_collection_multi[i][j] = i;
-      } else {
-        flag_half_multi[i][j] = false;
-        bin_collection_multi[i][j] = j;
-      }
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   create stencils based on bin geometry and cutoff
-------------------------------------------------------------------------- */
-
-void NStencilHalfMulti2d::create()
-{
-  int icollection, jcollection, bin_collection, i, j, ns;
-  int n = ncollections;
-  double cutsq;
-
-  for (icollection = 0; icollection < n; icollection++) {
-    for (jcollection = 0; jcollection < n; jcollection++) {
-      if (flag_skip_multi[icollection][jcollection]) {
-        nstencil_multi[icollection][jcollection] = 0;
-        continue;
-      }
-
-      ns = 0;
-
-      sx = stencil_sx_multi[icollection][jcollection];
-      sy = stencil_sy_multi[icollection][jcollection];
-
-      mbinx = stencil_mbinx_multi[icollection][jcollection];
-      mbiny = stencil_mbiny_multi[icollection][jcollection];
-
-      bin_collection = bin_collection_multi[icollection][jcollection];
-
-      cutsq = cutcollectionsq[icollection][jcollection];
-
-      if (flag_half_multi[icollection][jcollection]) {
-        for (j = 0; j <= sy; j++)
-          for (i = -sx; i <= sx; i++)
-            if (j > 0 || (j == 0 && i > 0)) {
-              if (bin_distance_multi(i, j, 0, bin_collection) < cutsq)
-                stencil_multi[icollection][jcollection][ns++] = j * mbinx + i;
-            }
-      } else {
-        for (j = -sy; j <= sy; j++)
-          for (i = -sx; i <= sx; i++)
-            if (bin_distance_multi(i, j, 0, bin_collection) < cutsq)
-              stencil_multi[icollection][jcollection][ns++] = j * mbinx + i;
-      }
-      nstencil_multi[icollection][jcollection] = ns;
-    }
-  }
-}
diff --git a/src/nstencil_half_multi_2d.h b/src/nstencil_half_multi_2d.h
deleted file mode 100644
index a87f517b5b..0000000000
--- a/src/nstencil_half_multi_2d.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NSTENCIL_CLASS
-// clang-format off
-NStencilStyle(half/multi/2d,
-              NStencilHalfMulti2d, NS_HALF | NS_MULTI | NS_2D | NS_ORTHO);
-// clang-format on
-#else
-
-#ifndef LMP_NSTENCIL_HALF_MULTI_2D_H
-#define LMP_NSTENCIL_HALF_MULTI_2D_H
-
-#include "nstencil.h"
-
-namespace LAMMPS_NS {
-
-class NStencilHalfMulti2d : public NStencil {
- public:
-  NStencilHalfMulti2d(class LAMMPS *);
-  void create() override;
-
- protected:
-  void set_stencil_properties() override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/nstencil_half_multi_2d_tri.cpp b/src/nstencil_half_multi_2d_tri.cpp
deleted file mode 100644
index 85bbe94c86..0000000000
--- a/src/nstencil_half_multi_2d_tri.cpp
+++ /dev/null
@@ -1,96 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "nstencil_half_multi_2d_tri.h"
-
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NStencilHalfMulti2dTri::NStencilHalfMulti2dTri(LAMMPS *lmp) : NStencil(lmp) {}
-
-/* ---------------------------------------------------------------------- */
-
-void NStencilHalfMulti2dTri::set_stencil_properties()
-{
-  int n = ncollections;
-  int i, j;
-
-  // Cross collections: use full stencil, looking one way through hierarchy
-  // smaller -> larger => use full stencil in larger bin
-  // larger -> smaller => no nstencil required
-  // If cut offs are same, use half stencil
-
-  for (i = 0; i < n; i++) {
-    for (j = 0; j < n; j++) {
-      if (cutcollectionsq[i][i] > cutcollectionsq[j][j]) continue;
-
-      flag_skip_multi[i][j] = false;
-
-      if (cutcollectionsq[i][i] == cutcollectionsq[j][j]) {
-        flag_half_multi[i][j] = true;
-        bin_collection_multi[i][j] = i;
-      } else {
-        flag_half_multi[i][j] = false;
-        bin_collection_multi[i][j] = j;
-      }
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   create stencils based on bin geometry and cutoff
-------------------------------------------------------------------------- */
-
-void NStencilHalfMulti2dTri::create()
-{
-  int icollection, jcollection, bin_collection, i, j, ns;
-  int n = ncollections;
-  double cutsq;
-
-  for (icollection = 0; icollection < n; icollection++) {
-    for (jcollection = 0; jcollection < n; jcollection++) {
-      if (flag_skip_multi[icollection][jcollection]) {
-        nstencil_multi[icollection][jcollection] = 0;
-        continue;
-      }
-
-      ns = 0;
-
-      sx = stencil_sx_multi[icollection][jcollection];
-      sy = stencil_sy_multi[icollection][jcollection];
-
-      mbinx = stencil_mbinx_multi[icollection][jcollection];
-      mbiny = stencil_mbiny_multi[icollection][jcollection];
-
-      bin_collection = bin_collection_multi[icollection][jcollection];
-
-      cutsq = cutcollectionsq[icollection][jcollection];
-
-      if (flag_half_multi[icollection][jcollection]) {
-        for (j = -sy; j <= sy; j++)
-          for (i = -sx; i <= sx; i++)
-            if (bin_distance_multi(i, j, 0, bin_collection) < cutsq)
-              stencil_multi[icollection][jcollection][ns++] = j * mbinx + i;
-      } else {
-        for (j = -sy; j <= sy; j++)
-          for (i = -sx; i <= sx; i++)
-            if (bin_distance_multi(i, j, 0, bin_collection) < cutsq)
-              stencil_multi[icollection][jcollection][ns++] = j * mbinx + i;
-      }
-      nstencil_multi[icollection][jcollection] = ns;
-    }
-  }
-}
diff --git a/src/nstencil_half_multi_2d_tri.h b/src/nstencil_half_multi_2d_tri.h
deleted file mode 100644
index 6067afbb50..0000000000
--- a/src/nstencil_half_multi_2d_tri.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NSTENCIL_CLASS
-// clang-format off
-NStencilStyle(half/multi/2d/tri,
-              NStencilHalfMulti2dTri, NS_HALF | NS_MULTI | NS_2D | NS_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NSTENCIL_HALF_MULTI_2D_TRI_H
-#define LMP_NSTENCIL_HALF_MULTI_2D_TRI_H
-
-#include "nstencil.h"
-
-namespace LAMMPS_NS {
-
-class NStencilHalfMulti2dTri : public NStencil {
- public:
-  NStencilHalfMulti2dTri(class LAMMPS *);
-  void create() override;
-
- protected:
-  void set_stencil_properties() override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/nstencil_half_multi_3d.cpp b/src/nstencil_half_multi_3d.cpp
deleted file mode 100644
index 8b1a1d85c5..0000000000
--- a/src/nstencil_half_multi_3d.cpp
+++ /dev/null
@@ -1,102 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "nstencil_half_multi_3d.h"
-
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NStencilHalfMulti3d::NStencilHalfMulti3d(LAMMPS *lmp) : NStencil(lmp) {}
-
-/* ---------------------------------------------------------------------- */
-
-void NStencilHalfMulti3d::set_stencil_properties()
-{
-  int n = ncollections;
-  int i, j;
-
-  // Cross collections: use full stencil, looking one way through hierarchy
-  // smaller -> larger => use full stencil in larger bin
-  // larger -> smaller => no nstencil required
-  // If cut offs are same, use half stencil
-
-  for (i = 0; i < n; i++) {
-    for (j = 0; j < n; j++) {
-      if (cutcollectionsq[i][i] > cutcollectionsq[j][j]) continue;
-
-      flag_skip_multi[i][j] = false;
-
-      if (cutcollectionsq[i][i] == cutcollectionsq[j][j]) {
-        flag_half_multi[i][j] = true;
-        bin_collection_multi[i][j] = i;
-      } else {
-        flag_half_multi[i][j] = false;
-        bin_collection_multi[i][j] = j;
-      }
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   create stencils based on bin geometry and cutoff
-------------------------------------------------------------------------- */
-
-void NStencilHalfMulti3d::create()
-{
-  int icollection, jcollection, bin_collection, i, j, k, ns;
-  int n = ncollections;
-  double cutsq;
-
-  for (icollection = 0; icollection < n; icollection++) {
-    for (jcollection = 0; jcollection < n; jcollection++) {
-      if (flag_skip_multi[icollection][jcollection]) {
-        nstencil_multi[icollection][jcollection] = 0;
-        continue;
-      }
-
-      ns = 0;
-
-      sx = stencil_sx_multi[icollection][jcollection];
-      sy = stencil_sy_multi[icollection][jcollection];
-      sz = stencil_sz_multi[icollection][jcollection];
-
-      mbinx = stencil_mbinx_multi[icollection][jcollection];
-      mbiny = stencil_mbiny_multi[icollection][jcollection];
-      mbinz = stencil_mbinz_multi[icollection][jcollection];
-
-      bin_collection = bin_collection_multi[icollection][jcollection];
-
-      cutsq = cutcollectionsq[icollection][jcollection];
-
-      if (flag_half_multi[icollection][jcollection]) {
-        for (k = 0; k <= sz; k++)
-          for (j = -sy; j <= sy; j++)
-            for (i = -sx; i <= sx; i++)
-              if (k > 0 || j > 0 || (j == 0 && i > 0)) {
-                if (bin_distance_multi(i, j, k, bin_collection) < cutsq)
-                  stencil_multi[icollection][jcollection][ns++] = k * mbiny * mbinx + j * mbinx + i;
-              }
-      } else {
-        for (k = -sz; k <= sz; k++)
-          for (j = -sy; j <= sy; j++)
-            for (i = -sx; i <= sx; i++)
-              if (bin_distance_multi(i, j, k, bin_collection) < cutsq)
-                stencil_multi[icollection][jcollection][ns++] = k * mbiny * mbinx + j * mbinx + i;
-      }
-      nstencil_multi[icollection][jcollection] = ns;
-    }
-  }
-}
diff --git a/src/nstencil_half_multi_3d.h b/src/nstencil_half_multi_3d.h
deleted file mode 100644
index c9281cab19..0000000000
--- a/src/nstencil_half_multi_3d.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NSTENCIL_CLASS
-// clang-format off
-NStencilStyle(half/multi/3d,
-              NStencilHalfMulti3d, NS_HALF | NS_MULTI | NS_3D | NS_ORTHO);
-// clang-format on
-#else
-
-#ifndef LMP_NSTENCIL_HALF_MULTI_3D_H
-#define LMP_NSTENCIL_HALF_MULTI_3D_H
-
-#include "nstencil.h"
-
-namespace LAMMPS_NS {
-
-class NStencilHalfMulti3d : public NStencil {
- public:
-  NStencilHalfMulti3d(class LAMMPS *);
-  void create() override;
-
- protected:
-  void set_stencil_properties() override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/nstencil_half_multi_3d_tri.cpp b/src/nstencil_half_multi_3d_tri.cpp
deleted file mode 100644
index 9761e15854..0000000000
--- a/src/nstencil_half_multi_3d_tri.cpp
+++ /dev/null
@@ -1,99 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "nstencil_half_multi_3d_tri.h"
-
-#include "neigh_list.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NStencilHalfMulti3dTri::NStencilHalfMulti3dTri(LAMMPS *lmp) : NStencil(lmp) {}
-
-/* ---------------------------------------------------------------------- */
-
-void NStencilHalfMulti3dTri::set_stencil_properties()
-{
-  int n = ncollections;
-  int i, j;
-
-  // Cross collections: use full stencil, looking one way through hierarchy
-  // smaller -> larger => use full stencil in larger bin
-  // larger -> smaller => no nstencil required
-  // If cut offs are same, use half stencil
-
-  for (i = 0; i < n; i++) {
-    for (j = 0; j < n; j++) {
-      if (cutcollectionsq[i][i] > cutcollectionsq[j][j]) continue;
-
-      flag_skip_multi[i][j] = false;
-
-      if (cutcollectionsq[i][i] == cutcollectionsq[j][j]) {
-        flag_half_multi[i][j] = true;
-        bin_collection_multi[i][j] = i;
-      } else {
-        flag_half_multi[i][j] = false;
-        bin_collection_multi[i][j] = j;
-      }
-    }
-  }
-}
-
-/* ----------------------------------------------------------------------
-   create stencils based on bin geometry and cutoff
-------------------------------------------------------------------------- */
-
-void NStencilHalfMulti3dTri::create()
-{
-  int icollection, jcollection, bin_collection, i, j, k, ns;
-  int n = ncollections;
-  double cutsq;
-
-  for (icollection = 0; icollection < n; icollection++) {
-    for (jcollection = 0; jcollection < n; jcollection++) {
-      if (flag_skip_multi[icollection][jcollection]) {
-        nstencil_multi[icollection][jcollection] = 0;
-        continue;
-      }
-
-      ns = 0;
-
-      sx = stencil_sx_multi[icollection][jcollection];
-      sy = stencil_sy_multi[icollection][jcollection];
-      sz = stencil_sz_multi[icollection][jcollection];
-
-      mbinx = stencil_mbinx_multi[icollection][jcollection];
-      mbiny = stencil_mbiny_multi[icollection][jcollection];
-      mbinz = stencil_mbinz_multi[icollection][jcollection];
-
-      bin_collection = bin_collection_multi[icollection][jcollection];
-      cutsq = cutcollectionsq[icollection][jcollection];
-
-      if (flag_half_multi[icollection][jcollection]) {
-        for (k = -sz; k <= sz; k++)
-          for (j = -sy; j <= sy; j++)
-            for (i = -sx; i <= sx; i++)
-              if (bin_distance_multi(i, j, k, bin_collection) < cutsq)
-                stencil_multi[icollection][jcollection][ns++] = k * mbiny * mbinx + j * mbinx + i;
-      } else {
-        for (k = -sz; k <= sz; k++)
-          for (j = -sy; j <= sy; j++)
-            for (i = -sx; i <= sx; i++)
-              if (bin_distance_multi(i, j, k, bin_collection) < cutsq)
-                stencil_multi[icollection][jcollection][ns++] = k * mbiny * mbinx + j * mbinx + i;
-      }
-      nstencil_multi[icollection][jcollection] = ns;
-    }
-  }
-}
diff --git a/src/nstencil_half_multi_3d_tri.h b/src/nstencil_half_multi_3d_tri.h
deleted file mode 100644
index 9a55c10f1d..0000000000
--- a/src/nstencil_half_multi_3d_tri.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NSTENCIL_CLASS
-// clang-format off
-NStencilStyle(half/multi/3d/tri,
-              NStencilHalfMulti3dTri, NS_HALF | NS_MULTI | NS_3D | NS_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NSTENCIL_HALF_MULTI_3D_TRI_H
-#define LMP_NSTENCIL_HALF_MULTI_3D_TRI_H
-
-#include "nstencil.h"
-
-namespace LAMMPS_NS {
-
-class NStencilHalfMulti3dTri : public NStencil {
- public:
-  NStencilHalfMulti3dTri(class LAMMPS *);
-  void create() override;
-
- protected:
-  void set_stencil_properties() override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/nstencil_half_multi_old_2d.cpp b/src/nstencil_half_multi_old_2d.cpp
deleted file mode 100644
index 6bf9e5c5b1..0000000000
--- a/src/nstencil_half_multi_old_2d.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "nstencil_half_multi_old_2d.h"
-#include "atom.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NStencilHalfMultiOld2d::NStencilHalfMultiOld2d(LAMMPS *lmp) : NStencil(lmp) {}
-
-/* ----------------------------------------------------------------------
-   create stencil based on bin geometry and cutoff
-------------------------------------------------------------------------- */
-
-void NStencilHalfMultiOld2d::create()
-{
-  int i, j, n;
-  double rsq, typesq;
-  int *s;
-  double *distsq;
-
-  int ntypes = atom->ntypes;
-  for (int itype = 1; itype <= ntypes; itype++) {
-    typesq = cuttypesq[itype];
-    s = stencil_multi_old[itype];
-    distsq = distsq_multi_old[itype];
-    n = 0;
-    for (j = 0; j <= sy; j++)
-      for (i = -sx; i <= sx; i++)
-        if (j > 0 || (j == 0 && i > 0)) {
-          rsq = bin_distance(i, j, 0);
-          if (rsq < typesq) {
-            distsq[n] = rsq;
-            s[n++] = j * mbinx + i;
-          }
-        }
-    nstencil_multi_old[itype] = n;
-  }
-}
diff --git a/src/nstencil_half_multi_old_2d.h b/src/nstencil_half_multi_old_2d.h
deleted file mode 100644
index a3825bcb71..0000000000
--- a/src/nstencil_half_multi_old_2d.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NSTENCIL_CLASS
-// clang-format off
-NStencilStyle(half/multi/old/2d,
-              NStencilHalfMultiOld2d, NS_HALF | NS_MULTI_OLD | NS_2D | NS_ORTHO);
-// clang-format on
-#else
-
-#ifndef LMP_NSTENCIL_HALF_MULTI_OLD_2D_H
-#define LMP_NSTENCIL_HALF_MULTI_OLD_2D_H
-
-#include "nstencil.h"
-
-namespace LAMMPS_NS {
-
-class NStencilHalfMultiOld2d : public NStencil {
- public:
-  NStencilHalfMultiOld2d(class LAMMPS *);
-  void create() override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/nstencil_half_multi_old_2d_tri.cpp b/src/nstencil_half_multi_old_2d_tri.cpp
deleted file mode 100644
index 0aeb65bebd..0000000000
--- a/src/nstencil_half_multi_old_2d_tri.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "nstencil_half_multi_old_2d_tri.h"
-#include "atom.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NStencilHalfMultiOld2dTri::NStencilHalfMultiOld2dTri(LAMMPS *lmp) : NStencil(lmp) {}
-
-/* ----------------------------------------------------------------------
-   create stencil based on bin geometry and cutoff
-------------------------------------------------------------------------- */
-
-void NStencilHalfMultiOld2dTri::create()
-{
-  int i, j, n;
-  double rsq, typesq;
-  int *s;
-  double *distsq;
-
-  int ntypes = atom->ntypes;
-  for (int itype = 1; itype <= ntypes; itype++) {
-    typesq = cuttypesq[itype];
-    s = stencil_multi_old[itype];
-    distsq = distsq_multi_old[itype];
-    n = 0;
-    for (j = -sy; j <= sy; j++)
-      for (i = -sx; i <= sx; i++) {
-        rsq = bin_distance(i, j, 0);
-        if (rsq < typesq) {
-          distsq[n] = rsq;
-          s[n++] = j * mbinx + i;
-        }
-      }
-    nstencil_multi_old[itype] = n;
-  }
-}
diff --git a/src/nstencil_half_multi_old_2d_tri.h b/src/nstencil_half_multi_old_2d_tri.h
deleted file mode 100644
index a65eb21ff7..0000000000
--- a/src/nstencil_half_multi_old_2d_tri.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NSTENCIL_CLASS
-// clang-format off
-NStencilStyle(half/multi/old/2d/tri,
-              NStencilHalfMultiOld2dTri, NS_HALF | NS_MULTI_OLD | NS_2D | NS_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NSTENCIL_HALF_MULTI_OLD_2D_TRI_H
-#define LMP_NSTENCIL_HALF_MULTI_OLD_2D_TRI_H
-
-#include "nstencil.h"
-
-namespace LAMMPS_NS {
-
-class NStencilHalfMultiOld2dTri : public NStencil {
- public:
-  NStencilHalfMultiOld2dTri(class LAMMPS *);
-  void create() override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/nstencil_half_multi_old_3d.cpp b/src/nstencil_half_multi_old_3d.cpp
deleted file mode 100644
index 99e72610f0..0000000000
--- a/src/nstencil_half_multi_old_3d.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "nstencil_half_multi_old_3d.h"
-#include "atom.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NStencilHalfMultiOld3d::NStencilHalfMultiOld3d(LAMMPS *lmp) : NStencil(lmp) {}
-
-/* ----------------------------------------------------------------------
-   create stencil based on bin geometry and cutoff
-------------------------------------------------------------------------- */
-
-void NStencilHalfMultiOld3d::create()
-{
-  int i, j, k, n;
-  double rsq, typesq;
-  int *s;
-  double *distsq;
-
-  int ntypes = atom->ntypes;
-  for (int itype = 1; itype <= ntypes; itype++) {
-    typesq = cuttypesq[itype];
-    s = stencil_multi_old[itype];
-    distsq = distsq_multi_old[itype];
-    n = 0;
-    for (k = 0; k <= sz; k++)
-      for (j = -sy; j <= sy; j++)
-        for (i = -sx; i <= sx; i++)
-          if (k > 0 || j > 0 || (j == 0 && i > 0)) {
-            rsq = bin_distance(i, j, k);
-            if (rsq < typesq) {
-              distsq[n] = rsq;
-              s[n++] = k * mbiny * mbinx + j * mbinx + i;
-            }
-          }
-    nstencil_multi_old[itype] = n;
-  }
-}
diff --git a/src/nstencil_half_multi_old_3d.h b/src/nstencil_half_multi_old_3d.h
deleted file mode 100644
index 3b93d3cd6c..0000000000
--- a/src/nstencil_half_multi_old_3d.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NSTENCIL_CLASS
-// clang-format off
-NStencilStyle(half/multi_old/3d,
-              NStencilHalfMultiOld3d, NS_HALF | NS_MULTI_OLD | NS_3D | NS_ORTHO);
-// clang-format on
-#else
-
-#ifndef LMP_NSTENCIL_HALF_MULTI_OLD_3D_H
-#define LMP_NSTENCIL_HALF_MULTI_OLD_3D_H
-
-#include "nstencil.h"
-
-namespace LAMMPS_NS {
-
-class NStencilHalfMultiOld3d : public NStencil {
- public:
-  NStencilHalfMultiOld3d(class LAMMPS *);
-  void create() override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/nstencil_half_multi_old_3d_tri.cpp b/src/nstencil_half_multi_old_3d_tri.cpp
deleted file mode 100644
index 3717b7836b..0000000000
--- a/src/nstencil_half_multi_old_3d_tri.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-/* ----------------------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#include "nstencil_half_multi_old_3d_tri.h"
-#include "atom.h"
-
-using namespace LAMMPS_NS;
-
-/* ---------------------------------------------------------------------- */
-
-NStencilHalfMultiOld3dTri::NStencilHalfMultiOld3dTri(LAMMPS *lmp) : NStencil(lmp) {}
-
-/* ----------------------------------------------------------------------
-   create stencil based on bin geometry and cutoff
-------------------------------------------------------------------------- */
-
-void NStencilHalfMultiOld3dTri::create()
-{
-  int i, j, k, n;
-  double rsq, typesq;
-  int *s;
-  double *distsq;
-
-  int ntypes = atom->ntypes;
-  for (int itype = 1; itype <= ntypes; itype++) {
-    typesq = cuttypesq[itype];
-    s = stencil_multi_old[itype];
-    distsq = distsq_multi_old[itype];
-    n = 0;
-    for (k = -sz; k <= sz; k++)
-      for (j = -sy; j <= sy; j++)
-        for (i = -sx; i <= sx; i++) {
-          rsq = bin_distance(i, j, k);
-          if (rsq < typesq) {
-            distsq[n] = rsq;
-            s[n++] = k * mbiny * mbinx + j * mbinx + i;
-          }
-        }
-    nstencil_multi_old[itype] = n;
-  }
-}
diff --git a/src/nstencil_half_multi_old_3d_tri.h b/src/nstencil_half_multi_old_3d_tri.h
deleted file mode 100644
index 89fc167959..0000000000
--- a/src/nstencil_half_multi_old_3d_tri.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   https://www.lammps.org/, Sandia National Laboratories
-   LAMMPS development team: developers@lammps.org
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifdef NSTENCIL_CLASS
-// clang-format off
-NStencilStyle(half/multi/old/3d/tri,
-              NStencilHalfMultiOld3dTri, NS_HALF | NS_MULTI_OLD | NS_3D | NS_TRI);
-// clang-format on
-#else
-
-#ifndef LMP_NSTENCIL_HALF_MULTI_OLD_3D_TRI_H
-#define LMP_NSTENCIL_HALF_MULTI_OLD_3D_TRI_H
-
-#include "nstencil.h"
-
-namespace LAMMPS_NS {
-
-class NStencilHalfMultiOld3dTri : public NStencil {
- public:
-  NStencilHalfMultiOld3dTri(class LAMMPS *);
-  void create() override;
-};
-
-}    // namespace LAMMPS_NS
-
-#endif
-#endif
diff --git a/src/nstencil_multi.cpp b/src/nstencil_multi.cpp
new file mode 100644
index 0000000000..693c415876
--- /dev/null
+++ b/src/nstencil_multi.cpp
@@ -0,0 +1,142 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "nstencil_multi.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+template<int HALF, int DIM_3D, int TRI>
+NStencilMulti<HALF, DIM_3D, TRI>::NStencilMulti(LAMMPS *lmp) : NStencil(lmp) {}
+
+/* ---------------------------------------------------------------------- */
+
+template<int HALF, int DIM_3D, int TRI>
+void NStencilMulti<HALF, DIM_3D, TRI>::set_stencil_properties()
+{
+  int n = ncollections;
+  int i, j;
+
+  // FULL
+  // Always look up neighbor using full stencil and neighbor's bin
+  // Stencil cutoff set by i-j cutoff
+
+  // HALF
+  // Cross collections: use full stencil, looking one way through hierarchy
+  // smaller -> larger => use full stencil in larger bin
+  // larger -> smaller => no nstencil required
+  // If cut offs are same, use half stencil
+  // If triclinic, need full stencil
+
+  for (i = 0; i < n; i++) {
+    for (j = 0; j < n; j++) {
+      if (HALF)
+        if (cutcollectionsq[i][i] > cutcollectionsq[j][j]) continue;
+
+      flag_skip_multi[i][j] = false;
+      flag_half_multi[i][j] = false;
+      flag_same_multi[i][j] = false;
+      bin_collection_multi[i][j] = j;
+
+      if (HALF) {
+        if (cutcollectionsq[i][i] == cutcollectionsq[j][j]) {
+          if (!TRI) flag_half_multi[i][j] = true;
+          flag_same_multi[i][j] = true;
+          bin_collection_multi[i][j] = i;
+        }
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   create stencil based on bin geometry and cutoff
+------------------------------------------------------------------------- */
+
+template<int HALF, int DIM_3D, int TRI>
+void NStencilMulti<HALF, DIM_3D, TRI>::create()
+{
+  int icollection, jcollection, bin_collection, i, j, k, ns, half_flag;
+  int n = ncollections;
+  double cutsq;
+
+  for (icollection = 0; icollection < n; icollection++) {
+    for (jcollection = 0; jcollection < n; jcollection++) {
+      if (flag_skip_multi[icollection][jcollection]) {
+        nstencil_multi[icollection][jcollection] = 0;
+        continue;
+      }
+
+      ns = 0;
+
+      sx = stencil_sx_multi[icollection][jcollection];
+      sy = stencil_sy_multi[icollection][jcollection];
+      sz = stencil_sz_multi[icollection][jcollection];
+
+      mbinx = stencil_mbinx_multi[icollection][jcollection];
+      mbiny = stencil_mbiny_multi[icollection][jcollection];
+      mbinz = stencil_mbinz_multi[icollection][jcollection];
+
+      bin_collection = bin_collection_multi[icollection][jcollection];
+      cutsq = cutcollectionsq[icollection][jcollection];
+      half_flag = flag_half_multi[icollection][jcollection];
+
+      // Half and ortho stencils include central bin first
+      // This preserves the historical order of the neighbor list
+      // as the old npair classes used to separately parse the central bin first
+      // This !TRI condition (and the one below) are now unnecessary
+      // since triclinic only uses full stencils - kept the flags for clarity
+      if (HALF && (!TRI))
+        if (half_flag) stencil_multi[icollection][jcollection][ns++] = 0;
+
+      // For half stencils, only the upper plane is needed
+      int sy_min = sy;
+      int sz_min = sz;
+      if (HALF) {
+        if (half_flag && (!DIM_3D)) sy_min = 0;
+        if (half_flag && DIM_3D) sz_min = 0;
+      }
+
+      for (k = -sz_min; k <= sz; k++) {
+        for (j = -sy_min; j <= sy; j++) {
+          for (i = -sx; i <= sx; i++) {
+            // Now only include "upper right" bins for half and ortho stencils
+            if (HALF && (!TRI)) {
+              if (half_flag) {
+                if (DIM_3D) {
+                  if (k <= 0 && j <= 0 && (j != 0 || i <= 0)) continue;
+                } else {
+                  if (j <= 0 && (j != 0 || i <= 0)) continue;
+                }
+              }
+            }
+            if (bin_distance_multi(i, j, k, bin_collection) < cutsq)
+              stencil_multi[icollection][jcollection][ns++] = k * mbiny * mbinx + j * mbinx + i;
+          }
+        }
+      }
+
+      nstencil_multi[icollection][jcollection] = ns;
+    }
+  }
+}
+
+namespace LAMMPS_NS {
+template class NStencilMulti<0,0,0>;
+template class NStencilMulti<0,1,0>;
+template class NStencilMulti<1,0,0>;
+template class NStencilMulti<1,0,1>;
+template class NStencilMulti<1,1,0>;
+template class NStencilMulti<1,1,1>;
+}
diff --git a/src/nstencil_multi.h b/src/nstencil_multi.h
new file mode 100644
index 0000000000..33219891c2
--- /dev/null
+++ b/src/nstencil_multi.h
@@ -0,0 +1,68 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef NSTENCIL_CLASS
+// clang-format off
+typedef NStencilMulti<0, 0, 0> NStencilFullMulti2d;
+NStencilStyle(full/multi/2d,
+              NStencilFullMulti2d,
+              NS_FULL | NS_MULTI | NS_2D | NS_ORTHO | NS_TRI);
+
+typedef NStencilMulti<0, 1, 0> NStencilFullMulti3d;
+NStencilStyle(full/multi/3d,
+              NStencilFullMulti3d,
+              NS_FULL | NS_MULTI | NS_3D | NS_ORTHO | NS_TRI);
+
+typedef NStencilMulti<1, 0, 0> NStencilHalfMulti2d;
+NStencilStyle(half/multi/2d,
+              NStencilHalfMulti2d,
+              NS_HALF | NS_MULTI | NS_2D | NS_ORTHO);
+
+typedef NStencilMulti<1, 0, 1> NStencilHalfMulti2dTri;
+NStencilStyle(half/multi/2d/tri,
+              NStencilHalfMulti2dTri,
+              NS_HALF | NS_MULTI | NS_2D | NS_TRI);
+
+typedef NStencilMulti<1, 1, 0> NStencilHalfMulti3d;
+NStencilStyle(half/multi/3d,
+              NStencilHalfMulti3d,
+              NS_HALF | NS_MULTI | NS_3D | NS_ORTHO);
+
+typedef NStencilMulti<1, 1, 1> NStencilHalfMulti3dTri;
+NStencilStyle(half/multi/3d/tri,
+              NStencilHalfMulti3dTri,
+              NS_HALF | NS_MULTI | NS_3D | NS_TRI);
+// clang-format on
+#else
+
+#ifndef LMP_NSTENCIL_MULTI_H
+#define LMP_NSTENCIL_MULTI_H
+
+#include "nstencil.h"
+
+namespace LAMMPS_NS {
+
+template<int HALF, int DIM_3D, int TRI>
+class NStencilMulti : public NStencil {
+ public:
+  NStencilMulti(class LAMMPS *);
+  void create() override;
+
+ protected:
+  void set_stencil_properties() override;
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/nstencil_multi_old.cpp b/src/nstencil_multi_old.cpp
new file mode 100644
index 0000000000..8648e6f73c
--- /dev/null
+++ b/src/nstencil_multi_old.cpp
@@ -0,0 +1,91 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "nstencil_multi_old.h"
+#include "atom.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+template<int HALF, int DIM_3D, int TRI>
+NStencilMultiOld<HALF, DIM_3D, TRI>::NStencilMultiOld(LAMMPS *lmp) : NStencil(lmp) {}
+
+/* ----------------------------------------------------------------------
+   create stencil based on bin geometry and cutoff
+------------------------------------------------------------------------- */
+
+template<int HALF, int DIM_3D, int TRI>
+void NStencilMultiOld<HALF, DIM_3D, TRI>::create()
+{
+  int i, j, k, n, itype;
+  double rsq, typesq;
+  int *s;
+  double *distsq;
+  int ntypes = atom->ntypes;
+
+  // For half stencils, only the upper plane is needed
+  // for triclinic, need to use full stencil in all dims
+  //   not a half stencil in y
+  // b/c transforming orthog -> lambda -> orthog for ghost atoms
+  //   with an added PBC offset can shift both coords by epsilon
+  // thus for an I/J owned/ghost pair, the xy coords
+  //   and bin assignments can be different on I proc vs J proc
+
+  int sy_min = sy;
+  int sz_min = sz;
+  if ((!TRI) && HALF && (!DIM_3D)) sy_min = 0;
+  if ((!TRI) && HALF && DIM_3D) sz_min = 0;
+
+  for (itype = 1; itype <= ntypes; itype++) {
+
+    typesq = cuttypesq[itype];
+    s = stencil_multi_old[itype];
+    distsq = distsq_multi_old[itype];
+    n = 0;
+
+    // Half and ortho stencils include central bin first
+    // This preserves the historical order of the neighbor list
+    // as the old npair classes used to separately parse the central bin first
+    if (HALF && (!TRI)) s[n++] = 0;
+
+    for (k = -sz_min; k <= sz; k++) {
+      for (j = -sy_min; j <= sy; j++) {
+        for (i = -sx; i <= sx; i++) {
+
+          // Now only include "upper right" bins for half and ortho stencils
+          if (HALF && (!DIM_3D) && (!TRI))
+            if (j <= 0 && (j != 0 || i <= 0)) continue;
+          if (HALF && DIM_3D && (!TRI))
+            if (k <= 0 && j <= 0 && (j != 0 || i <= 0)) continue;
+
+          rsq = bin_distance(i, j, k);
+          if (rsq < typesq) {
+            distsq[n] = rsq;
+            s[n++] = k * mbiny * mbinx + j * mbinx + i;
+          }
+        }
+      }
+    }
+    nstencil_multi_old[itype] = n;
+  }
+}
+
+namespace LAMMPS_NS {
+template class NStencilMultiOld<0,0,0>;
+template class NStencilMultiOld<0,1,0>;
+template class NStencilMultiOld<1,0,0>;
+template class NStencilMultiOld<1,0,1>;
+template class NStencilMultiOld<1,1,0>;
+template class NStencilMultiOld<1,1,1>;
+}
diff --git a/src/nstencil_multi_old.h b/src/nstencil_multi_old.h
new file mode 100644
index 0000000000..5d9dfce644
--- /dev/null
+++ b/src/nstencil_multi_old.h
@@ -0,0 +1,65 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef NSTENCIL_CLASS
+// clang-format off
+typedef NStencilMultiOld<0, 0, 0> NStencilFullMultiOld2d;
+NStencilStyle(full/multi/old/2d,
+              NStencilFullMultiOld2d,
+              NS_FULL | NS_MULTI_OLD | NS_2D | NS_ORTHO | NS_TRI);
+
+typedef NStencilMultiOld<0, 1, 0> NStencilFullMultiOld3d;
+NStencilStyle(full/multi/old/3d,
+              NStencilFullMultiOld3d,
+              NS_FULL | NS_MULTI_OLD | NS_3D | NS_ORTHO | NS_TRI);
+
+typedef NStencilMultiOld<1, 0, 0> NStencilHalfMultiOld2d;
+NStencilStyle(half/multi/old/2d,
+              NStencilHalfMultiOld2d,
+              NS_HALF | NS_MULTI_OLD | NS_2D | NS_ORTHO);
+
+typedef NStencilMultiOld<1, 0, 1> NStencilHalfMultiOld2dTri;
+NStencilStyle(half/multi/old/2d/tri,
+              NStencilHalfMultiOld2dTri,
+              NS_HALF | NS_MULTI_OLD | NS_2D | NS_TRI);
+
+typedef NStencilMultiOld<1, 1, 0> NStencilHalfMultiOld3d;
+NStencilStyle(half/multi/old/3d,
+              NStencilHalfMultiOld3d,
+              NS_HALF | NS_MULTI_OLD | NS_3D | NS_ORTHO);
+
+typedef NStencilMultiOld<1, 1, 1> NStencilHalfMultiOld3dTri;
+NStencilStyle(half/multi/old/3d/tri,
+              NStencilHalfMultiOld3dTri,
+              NS_HALF | NS_MULTI_OLD | NS_3D | NS_TRI);
+// clang-format on
+#else
+
+#ifndef LMP_NSTENCIL_MULTI_OLD_H
+#define LMP_NSTENCIL_MULTI_OLD_H
+
+#include "nstencil.h"
+
+namespace LAMMPS_NS {
+
+template<int HALF, int DIM_3D, int TRI>
+class NStencilMultiOld : public NStencil {
+ public:
+  NStencilMultiOld(class LAMMPS *);
+  void create() override;
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/ntopo.cpp b/src/ntopo.cpp
index 752a8f95a6..a6c7a199e2 100644
--- a/src/ntopo.cpp
+++ b/src/ntopo.cpp
@@ -22,8 +22,8 @@
 
 using namespace LAMMPS_NS;
 
-#define LB_FACTOR 1.5
-#define DELTA 10000
+static constexpr double LB_FACTOR = 1.5;
+static constexpr int DELTA = 10000;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/ntopo_angle_all.cpp b/src/ntopo_angle_all.cpp
index f6ed1cc277..593750c7b7 100644
--- a/src/ntopo_angle_all.cpp
+++ b/src/ntopo_angle_all.cpp
@@ -24,7 +24,7 @@
 
 using namespace LAMMPS_NS;
 
-#define DELTA 10000
+static constexpr int DELTA = 10000;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/ntopo_angle_partial.cpp b/src/ntopo_angle_partial.cpp
index 4ab56465b7..277621f0bd 100644
--- a/src/ntopo_angle_partial.cpp
+++ b/src/ntopo_angle_partial.cpp
@@ -24,7 +24,7 @@
 
 using namespace LAMMPS_NS;
 
-#define DELTA 10000
+static constexpr int DELTA = 10000;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/ntopo_angle_template.cpp b/src/ntopo_angle_template.cpp
index 51f4b0f726..982ef90a9d 100644
--- a/src/ntopo_angle_template.cpp
+++ b/src/ntopo_angle_template.cpp
@@ -26,7 +26,7 @@
 
 using namespace LAMMPS_NS;
 
-#define DELTA 10000
+static constexpr int DELTA = 10000;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/ntopo_bond_all.cpp b/src/ntopo_bond_all.cpp
index 8e2fd1adfd..7e051744e2 100644
--- a/src/ntopo_bond_all.cpp
+++ b/src/ntopo_bond_all.cpp
@@ -24,7 +24,7 @@
 
 using namespace LAMMPS_NS;
 
-#define DELTA 10000
+static constexpr int DELTA = 10000;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/ntopo_bond_partial.cpp b/src/ntopo_bond_partial.cpp
index e2d4944b9c..4df72f9ab8 100644
--- a/src/ntopo_bond_partial.cpp
+++ b/src/ntopo_bond_partial.cpp
@@ -24,7 +24,7 @@
 
 using namespace LAMMPS_NS;
 
-#define DELTA 10000
+static constexpr int DELTA = 10000;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/ntopo_bond_template.cpp b/src/ntopo_bond_template.cpp
index 865fddc12d..c8f4bc51d8 100644
--- a/src/ntopo_bond_template.cpp
+++ b/src/ntopo_bond_template.cpp
@@ -26,7 +26,7 @@
 
 using namespace LAMMPS_NS;
 
-#define DELTA 10000
+static constexpr int DELTA = 10000;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/ntopo_dihedral_all.cpp b/src/ntopo_dihedral_all.cpp
index 216952d9c6..9f62cda07d 100644
--- a/src/ntopo_dihedral_all.cpp
+++ b/src/ntopo_dihedral_all.cpp
@@ -24,7 +24,7 @@
 
 using namespace LAMMPS_NS;
 
-#define DELTA 10000
+static constexpr int DELTA = 10000;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/ntopo_dihedral_partial.cpp b/src/ntopo_dihedral_partial.cpp
index 3e7a4526d5..98246b7b4f 100644
--- a/src/ntopo_dihedral_partial.cpp
+++ b/src/ntopo_dihedral_partial.cpp
@@ -24,7 +24,7 @@
 
 using namespace LAMMPS_NS;
 
-#define DELTA 10000
+static constexpr int DELTA = 10000;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/ntopo_dihedral_template.cpp b/src/ntopo_dihedral_template.cpp
index e2ffe7be4c..2cc41f4a8f 100644
--- a/src/ntopo_dihedral_template.cpp
+++ b/src/ntopo_dihedral_template.cpp
@@ -26,7 +26,7 @@
 
 using namespace LAMMPS_NS;
 
-#define DELTA 10000
+static constexpr int DELTA = 10000;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/ntopo_improper_all.cpp b/src/ntopo_improper_all.cpp
index ee08ddb834..2786fdbfa8 100644
--- a/src/ntopo_improper_all.cpp
+++ b/src/ntopo_improper_all.cpp
@@ -24,7 +24,7 @@
 
 using namespace LAMMPS_NS;
 
-#define DELTA 10000
+static constexpr int DELTA = 10000;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/ntopo_improper_partial.cpp b/src/ntopo_improper_partial.cpp
index 039da2c915..559a7b3d7d 100644
--- a/src/ntopo_improper_partial.cpp
+++ b/src/ntopo_improper_partial.cpp
@@ -24,7 +24,7 @@
 
 using namespace LAMMPS_NS;
 
-#define DELTA 10000
+static constexpr int DELTA = 10000;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/ntopo_improper_template.cpp b/src/ntopo_improper_template.cpp
index 401fb986d9..79712f4ed1 100644
--- a/src/ntopo_improper_template.cpp
+++ b/src/ntopo_improper_template.cpp
@@ -26,7 +26,7 @@
 
 using namespace LAMMPS_NS;
 
-#define DELTA 10000
+static constexpr int DELTA = 10000;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/output.cpp b/src/output.cpp
index cff2e68107..f6da696293 100644
--- a/src/output.cpp
+++ b/src/output.cpp
@@ -38,8 +38,8 @@
 
 using namespace LAMMPS_NS;
 
-#define DELTA 1
-#define EPSDT 1.0e-6
+static constexpr int DELTA = 1;
+static constexpr double EPSDT = 1.0e-6;
 
 enum {SETUP, WRITE, RESET_DT};
 
diff --git a/src/pair_table.cpp b/src/pair_table.cpp
index 2b481c8334..730107c856 100644
--- a/src/pair_table.cpp
+++ b/src/pair_table.cpp
@@ -33,7 +33,7 @@ using namespace LAMMPS_NS;
 
 enum { NONE, RLINEAR, RSQ, BMP };
 
-#define EPSILONR 1.0e-6
+static constexpr double EPSILONR = 1.0e-6;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/platform.cpp b/src/platform.cpp
index 064f142425..b324bd0b5c 100644
--- a/src/platform.cpp
+++ b/src/platform.cpp
@@ -61,6 +61,13 @@
 #include <fcntl.h>
 #include <sys/syslimits.h>
 #endif
+
+// for disk_free()
+#if defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__) || defined(__DragonFly__) || \
+    defined(__OpenBSD__) || defined(__NetBSD__)
+#include <sys/statvfs.h>
+#endif
+
 ////////////////////////////////////////////////////////////////////////
 
 #include <chrono>
@@ -1047,6 +1054,36 @@ bool platform::file_is_readable(const std::string &path)
   }
   return false;
 }
+/* ----------------------------------------------------------------------
+   determine available disk space, if supported. Return -1 if not.
+------------------------------------------------------------------------- */
+
+double platform::disk_free(const std::string &path)
+{
+  double bytes_free = -1.0;
+
+#if defined(__linux__) || defined(__APPLE__) || defined(__FreeBSD__) || defined(__DragonFly__) || \
+    defined(__OpenBSD__) || defined(__NetBSD__)
+  struct statvfs fs;
+
+  if (path.size()) {
+    int rv = statvfs(path.c_str(), &fs);
+    if (rv == 0) {
+#if defined(__linux__)
+      bytes_free = fs.f_bavail * fs.f_bsize;
+#elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__DragonFly__) || \
+    defined(__OpenBSD__) || defined(__NetBSD__)
+      bytes_free = fs.f_bavail * fs.f_frsize;
+#endif
+    }
+  }
+#elif defined(_WIN32)
+  uint64_t is_free = 0;
+  if (GetDiskFreeSpaceEx(path.c_str(), (PULARGE_INTEGER) &is_free, nullptr, nullptr))
+    bytes_free = is_free;
+#endif
+  return bytes_free;
+}
 
 /* ----------------------------------------------------------------------
    check if filename has a known compression extension
diff --git a/src/platform.h b/src/platform.h
index 036074c900..4328f873dd 100644
--- a/src/platform.h
+++ b/src/platform.h
@@ -377,6 +377,15 @@ namespace platform {
 
   bool file_is_readable(const std::string &path);
 
+  /*! Return free disk space in bytes of file system pointed to by path
+   *
+   * Returns -1.0 if the path is invalid or free space reporting not supported.
+   *
+   * \param path file or folder path in file system
+   * \return  */
+
+  double disk_free(const std::string &path);
+
   /*! Check if a file name ends in a known extension for a compressed file format
    *
    * Currently supported file extensions are: .gz, .bz2, .zst, .xz, .lzma, lz4
diff --git a/src/potential_file_reader.cpp b/src/potential_file_reader.cpp
index 2c0b9a6a55..613225a797 100644
--- a/src/potential_file_reader.cpp
+++ b/src/potential_file_reader.cpp
@@ -144,6 +144,8 @@ void PotentialFileReader::next_dvector(double *list, int n)
 {
   try {
     return reader->next_dvector(list, n);
+  } catch (EOFException &) {
+    throw EOFException("EOF reached");
   } catch (FileReaderException &e) {
     error->one(FLERR, e.what());
   }
diff --git a/src/procmap.cpp b/src/procmap.cpp
index 6fcea50315..71e1cf5a6b 100644
--- a/src/procmap.cpp
+++ b/src/procmap.cpp
@@ -33,7 +33,7 @@
 
 using namespace LAMMPS_NS;
 
-#define MAXLINE 128
+static constexpr int MAXLINE = 128;
 
 /* ---------------------------------------------------------------------- */
 
@@ -282,7 +282,7 @@ void ProcMap::custom_grid(char *cfile, int nprocs,
   int me;
   MPI_Comm_rank(world,&me);
 
-  char line[MAXLINE];
+  char line[MAXLINE] = {'\0'};
   FILE *fp = nullptr;
 
   if (me == 0) {
diff --git a/src/rcb.cpp b/src/rcb.cpp
index a42be9b02a..da7134a0f1 100644
--- a/src/rcb.cpp
+++ b/src/rcb.cpp
@@ -21,9 +21,9 @@
 
 using namespace LAMMPS_NS;
 
-#define MYHUGE 1.0e30
-#define TINY 1.0e-6
-#define DELTA 16384
+static constexpr double MYHUGE = 1.0e30;
+static constexpr double TINY = 1.0e-6;
+static constexpr int DELTA = 16384;
 
 // prototypes for non-class functions
 
diff --git a/src/read_dump.cpp b/src/read_dump.cpp
index 7f8d4d6559..5972b369c8 100644
--- a/src/read_dump.cpp
+++ b/src/read_dump.cpp
@@ -33,7 +33,7 @@
 
 using namespace LAMMPS_NS;
 
-#define CHUNK 16384
+static constexpr int CHUNK = 16384;
 
 enum { NOADD, YESADD, KEEPADD };
 
diff --git a/src/reader_native.cpp b/src/reader_native.cpp
index 92409389ef..ae59ca6805 100644
--- a/src/reader_native.cpp
+++ b/src/reader_native.cpp
@@ -24,7 +24,7 @@
 
 using namespace LAMMPS_NS;
 
-#define MAXLINE 1024        // max line length in dump file
+static constexpr int MAXLINE = 1024;        // max line length in dump file
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/reader_xyz.cpp b/src/reader_xyz.cpp
index c3e7d0ed99..22b6df0cb9 100644
--- a/src/reader_xyz.cpp
+++ b/src/reader_xyz.cpp
@@ -23,7 +23,7 @@
 
 using namespace LAMMPS_NS;
 
-#define MAXLINE 1024        // max line length in dump file
+static constexpr int MAXLINE = 1024;        // max line length in dump file
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/replicate.cpp b/src/replicate.cpp
index f5444b9fa8..b27304a2a0 100644
--- a/src/replicate.cpp
+++ b/src/replicate.cpp
@@ -28,8 +28,8 @@
 
 using namespace LAMMPS_NS;
 
-#define LB_FACTOR 1.1
-#define EPSILON   1.0e-6
+static constexpr double LB_FACTOR = 1.1;
+static constexpr double EPSILON =   1.0e-6;
 
 /* ---------------------------------------------------------------------- */
 
@@ -57,7 +57,12 @@ void Replicate::command(int narg, char **arg)
     error->all(FLERR, "Illegal replication grid {}x{}x{}. All replications must be > 0",
                nx, ny, nz);
 
-  int nrep = nx*ny*nz;
+  bigint nrepbig = (bigint) nx * ny * nz;
+  if (nrepbig > MAXSMALLINT)
+    error->all(FLERR, "Total # of replica is too large: {}x{}x{} = {}. "
+               "Please use replicate multiple times", nx, ny, nz, nrepbig);
+
+  int nrep = (int) nrepbig;
   if (me == 0)
     utils::logmesg(lmp, "Replication is creating a {}x{}x{} = {} times larger system...\n",
                    nx, ny, nz, nrep);
diff --git a/src/rerun.cpp b/src/rerun.cpp
index 5f92556092..74a8956994 100644
--- a/src/rerun.cpp
+++ b/src/rerun.cpp
@@ -30,7 +30,7 @@
 
 using namespace LAMMPS_NS;
 
-#define EPSDT 1.0e-6
+static constexpr double EPSDT = 1.0e-6;
 /* ---------------------------------------------------------------------- */
 
 Rerun::Rerun(LAMMPS *lmp) : Command(lmp) {}
diff --git a/src/reset_atoms_id.cpp b/src/reset_atoms_id.cpp
index a6382e6aee..9992a49b28 100644
--- a/src/reset_atoms_id.cpp
+++ b/src/reset_atoms_id.cpp
@@ -38,8 +38,8 @@ static int compare_coords(const void *, const void *);
 static int compare_coords(const int, const int, void *);
 #endif
 
-#define PERBIN 10
-#define BIG 1.0e20
+static constexpr int PERBIN = 10;
+static constexpr double BIG = 1.0e20;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/set.cpp b/src/set.cpp
index 6b21fe92bb..cee769a573 100644
--- a/src/set.cpp
+++ b/src/set.cpp
@@ -52,8 +52,6 @@ enum{TYPE,TYPE_FRACTION,TYPE_RATIO,TYPE_SUBSET,
      SPH_E,SPH_CV,SPH_RHO,EDPD_TEMP,EDPD_CV,CC,SMD_MASS_DENSITY,
      SMD_CONTACT_RADIUS,DPDTHETA,EPSILON,IVEC,DVEC,IARRAY,DARRAY};
 
-#define BIG INT_MAX
-
 /* ---------------------------------------------------------------------- */
 
 void Set::command(int narg, char **arg)
diff --git a/src/special.cpp b/src/special.cpp
index 5d9b2bb09e..232ea35170 100644
--- a/src/special.cpp
+++ b/src/special.cpp
@@ -26,7 +26,7 @@
 
 using namespace LAMMPS_NS;
 
-#define RVOUS 1   // 0 for irregular, 1 for all2all
+static constexpr int RVOUS = 1;   // 0 for irregular, 1 for all2all
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/text_file_reader.cpp b/src/text_file_reader.cpp
index 46a5fd33a9..0b8d717687 100644
--- a/src/text_file_reader.cpp
+++ b/src/text_file_reader.cpp
@@ -189,8 +189,9 @@ void TextFileReader::next_dvector(double *list, int n)
     char *ptr = next_line();
 
     if (ptr == nullptr) {
-      // EOF
-      if (i < n) {
+      if (i == 0) { // EOF without any records
+        throw EOFException("EOF reached");
+      } else if (i < n) { // EOF with incomplete data
         throw FileReaderException(
             fmt::format("Incorrect format in {} file! {}/{} values", filetype, i, n));
       }
diff --git a/src/thermo.cpp b/src/thermo.cpp
index 1c4e25dcb0..3d8b419991 100644
--- a/src/thermo.cpp
+++ b/src/thermo.cpp
@@ -93,7 +93,7 @@ static constexpr char id_press[] = "thermo_press";
 static constexpr char id_pe[] = "thermo_pe";
 
 static char fmtbuf[512];
-#define DELTA 8
+static constexpr int DELTA = 8;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/universe.cpp b/src/universe.cpp
index e743816d92..edd5b01031 100644
--- a/src/universe.cpp
+++ b/src/universe.cpp
@@ -21,7 +21,7 @@
 
 using namespace LAMMPS_NS;
 
-#define MAXLINE 256
+static constexpr int MAXLINE = 256;
 
 /* ----------------------------------------------------------------------
    create & initialize the universe of processors in communicator
@@ -69,7 +69,7 @@ Universe::~Universe()
 
 void Universe::reorder(char *style, char *arg)
 {
-  char line[MAXLINE];
+  char line[MAXLINE] = {'\0'};
 
   if (uworld != uorig) MPI_Comm_free(&uworld);
 
diff --git a/src/utils.cpp b/src/utils.cpp
index 992feb34e8..bde6dffca5 100644
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -395,7 +395,24 @@ double utils::numeric(const char *file, int line, const std::string &str, bool d
       lmp->error->all(file, line, msg);
   }
 
-  return atof(buf.c_str());
+  double rv = 0;
+  try {
+    rv = stod(buf);
+  } catch (std::invalid_argument const &) {
+    auto msg = fmt::format("Floating point number {} in input script or data file is invalid", buf);
+    if (do_abort)
+      lmp->error->one(file, line, msg);
+    else
+      lmp->error->all(file, line, msg);
+  } catch (std::out_of_range const &) {
+    auto msg =
+        fmt::format("Floating point number {} in input script or data file is out of range", buf);
+    if (do_abort)
+      lmp->error->one(file, line, msg);
+    else
+      lmp->error->all(file, line, msg);
+  }
+  return rv;
 }
 
 /* ----------------------------------------------------------------------
@@ -439,7 +456,17 @@ int utils::inumeric(const char *file, int line, const std::string &str, bool do_
       lmp->error->all(file, line, msg);
   }
 
-  return atoi(buf.c_str());
+  int rv = 0;
+  try {
+    rv = stoi(buf);
+  } catch (std::out_of_range const &) {
+    auto msg = fmt::format("Integer {} in input script or data file is out of range", buf);
+    if (do_abort)
+      lmp->error->one(file, line, msg);
+    else
+      lmp->error->all(file, line, msg);
+  }
+  return rv;
 }
 
 /* ----------------------------------------------------------------------
@@ -484,7 +511,18 @@ bigint utils::bnumeric(const char *file, int line, const std::string &str, bool
       lmp->error->all(file, line, msg);
   }
 
-  return ATOBIGINT(buf.c_str());
+  long long rv = 0;
+  try {
+    rv = stoll(buf);
+    if (rv > MAXBIGINT) throw std::out_of_range("64-bit");
+  } catch (std::out_of_range const &) {
+    auto msg = fmt::format("Integer {} in input script or data file is out of range", buf);
+    if (do_abort)
+      lmp->error->one(file, line, msg);
+    else
+      lmp->error->all(file, line, msg);
+  }
+  return static_cast<bigint>(rv);
 }
 
 /* ----------------------------------------------------------------------
@@ -529,7 +567,18 @@ tagint utils::tnumeric(const char *file, int line, const std::string &str, bool
       lmp->error->all(file, line, msg);
   }
 
-  return ATOTAGINT(buf.c_str());
+  long long rv = 0;
+  try {
+    rv = stoll(buf);
+    if (rv > MAXTAGINT) throw std::out_of_range("64-bit");
+  } catch (std::out_of_range const &) {
+    auto msg = fmt::format("Integer {} in input script or data file is out of range", buf);
+    if (do_abort)
+      lmp->error->one(file, line, msg);
+    else
+      lmp->error->all(file, line, msg);
+  }
+  return static_cast<tagint>(rv);
 }
 
 /* ----------------------------------------------------------------------
diff --git a/src/variable.cpp b/src/variable.cpp
index fe6eba39d9..426dbd8b06 100644
--- a/src/variable.cpp
+++ b/src/variable.cpp
@@ -48,13 +48,13 @@
 using namespace LAMMPS_NS;
 using namespace MathConst;
 
-#define VARDELTA 4
-#define MAXLEVEL 4
-#define MAXLINE 256
-#define CHUNK 1024
-#define MAXFUNCARG 6
+static constexpr int VARDELTA = 4;
+static constexpr int MAXLEVEL = 4;
+static constexpr int MAXLINE = 256;
+static constexpr int CHUNK = 1024;
+static constexpr int MAXFUNCARG = 6;
 
-#define MYROUND(a) (( (a)-floor(a) ) >= .5) ? ceil(a) : floor(a)
+static inline double MYROUND(double a) { return ((a - floor(a)) >= 0.5) ? ceil(a) : floor(a); }
 
 enum{ARG,OP};
 
diff --git a/src/velocity.cpp b/src/velocity.cpp
index 421c494553..fd4fd71f68 100644
--- a/src/velocity.cpp
+++ b/src/velocity.cpp
@@ -38,8 +38,8 @@ enum{ALL,LOCAL,GEOM};
 enum{UNIFORM,GAUSSIAN};
 enum{NONE,CONSTANT,EQUAL,ATOM};
 
-#define WARMUP 100
-#define SMALL  0.001
+static constexpr int WARMUP = 100;
+static constexpr double SMALL =  0.001;
 
 /* ---------------------------------------------------------------------- */
 
diff --git a/src/version.h b/src/version.h
index e2f596d1aa..d1d8472ca6 100644
--- a/src/version.h
+++ b/src/version.h
@@ -1 +1,2 @@
 #define LAMMPS_VERSION "21 Nov 2023"
+#define LAMMPS_UPDATE "Development"
diff --git a/src/write_restart.cpp b/src/write_restart.cpp
index a996532687..ad279c14f6 100644
--- a/src/write_restart.cpp
+++ b/src/write_restart.cpp
@@ -545,7 +545,7 @@ void WriteRestart::force_fields()
    all procs call this method, only proc 0 writes to file
 ------------------------------------------------------------------------- */
 
-void WriteRestart::file_layout(int send_size)
+void WriteRestart::file_layout(int /*send_size*/)
 {
   if (me == 0) write_int(MULTIPROC,multiproc);
 
diff --git a/tools/lammps-gui/CMakeLists.txt b/tools/lammps-gui/CMakeLists.txt
index caae722865..b1469794bb 100644
--- a/tools/lammps-gui/CMakeLists.txt
+++ b/tools/lammps-gui/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.16)
 
-project(lammps-gui VERSION 1.5.10 LANGUAGES CXX)
+project(lammps-gui VERSION 1.5.11 LANGUAGES CXX)
 
 set(CMAKE_AUTOUIC ON)
 set(CMAKE_AUTOMOC ON)
diff --git a/tools/lammps-gui/help_index.table b/tools/lammps-gui/help_index.table
index 5ce4ae6203..5aa9e13dda 100644
--- a/tools/lammps-gui/help_index.table
+++ b/tools/lammps-gui/help_index.table
@@ -254,6 +254,7 @@ compute_nbond_atom.html compute nbond/atom
 compute_omega_chunk.html compute omega/chunk
 compute_orientorder_atom.html compute orientorder/atom
 compute_orientorder_atom.html compute orientorder/atom/kk
+compute_pace.html compute pace
 compute_pair_local.html compute pair/local
 compute_pair.html compute pair
 compute_pe_atom.html compute pe/atom
@@ -267,13 +268,17 @@ compute_property_chunk.html compute property/chunk
 compute_property_grid.html compute property/grid
 compute_property_local.html compute property/local
 compute_ptm_atom.html compute ptm/atom
+compute_rattlers_atom.html compute rattlers/atom
 compute_rdf.html compute rdf
+compute_reaxff_atom.html compute reaxff/atom
+compute_reaxff_atom.html compute reaxff/atom/kk
 compute_reduce_chunk.html compute reduce/chunk
 compute_reduce.html compute reduce
 compute_reduce.html compute reduce/region
 compute_rigid_local.html compute rigid/local
 compute.html compute
 compute_saed.html compute saed
+compute_slcsa_atom.html compute slcsa/atom
 compute_slice.html compute slice
 compute_smd_contact_radius.html compute smd/contact/radius
 compute_smd_damage.html compute smd/damage
@@ -484,6 +489,7 @@ fix_drude_transform.html fix drude/transform/inverse
 fix_dt_reset.html fix dt/reset
 fix_dt_reset.html fix dt/reset/kk
 fix_efield.html fix efield
+fix_efield.html fix efield/kk
 fix_efield.html fix efield/tip4p
 fix_ehex.html fix ehex
 fix_electrode.html fix electrode/conp
@@ -565,6 +571,7 @@ fix_nh.html fix nvt/kk
 fix_nh.html fix nvt/omp
 fix_nh_uef.html fix npt/uef
 fix_nh_uef.html fix nvt/uef
+fix_nonaffine_displacement.html fix nonaffine/displacement
 fix_nph_asphere.html fix nph/asphere
 fix_nph_asphere.html fix nph/asphere/omp
 fix_nph_body.html fix nph/body
@@ -634,6 +641,7 @@ fix_polarize.html fix polarize/functional
 fix_pour.html fix pour
 fix_precession_spin.html fix precession/spin
 fix_press_berendsen.html fix press/berendsen
+fix_press_langevin.html fix press/langevin
 fix_print.html fix print
 fix_propel_self.html fix propel/self
 fix_property_atom.html fix property/atom
@@ -703,14 +711,17 @@ fix_spring_chunk.html fix spring/chunk
 fix_spring_rg.html fix spring/rg
 fix_spring.html fix spring
 fix_spring_self.html fix spring/self
+fix_spring_self.html fix spring/self/kk
 fix_srd.html fix srd
 fix_store_force.html fix store/force
 fix_store_state.html fix store/state
 fix_temp_berendsen.html fix temp/berendsen
+fix_temp_berendsen.html fix temp/berendsen/kk
 fix_temp_csvr.html fix temp/csld
 fix_temp_csvr.html fix temp/csvr
 fix_temp_rescale_eff.html fix temp/rescale/eff
 fix_temp_rescale.html fix temp/rescale
+fix_temp_rescale.html fix temp/rescale/kk
 fix_tfmc.html fix tfmc
 fix_tgnh_drude.html fix tgnpt/drude
 fix_tgnh_drude.html fix tgnvt/drude
@@ -980,6 +991,7 @@ pair_coul_shield.html pair_style coul/shield
 pair_coul_slater.html pair_style coul/slater
 pair_coul_slater.html pair_style coul/slater/cut
 pair_coul_slater.html pair_style coul/slater/long
+pair_coul_slater.html pair_style coul/slater/long/gpu
 pair_coul_tt.html pair_style coul/tt
 pair_cs.html pair_style born/coul/dsf/cs
 pair_cs.html pair_style born/coul/long/cs
@@ -1073,8 +1085,10 @@ pair_fep_soft.html pair_style lj/class2/coul/cut/soft
 pair_fep_soft.html pair_style lj/class2/coul/long/soft
 pair_fep_soft.html pair_style lj/class2/soft
 pair_fep_soft.html pair_style lj/cut/coul/cut/soft
+pair_fep_soft.html pair_style lj/cut/coul/cut/soft/gpu
 pair_fep_soft.html pair_style lj/cut/coul/cut/soft/omp
 pair_fep_soft.html pair_style lj/cut/coul/long/soft
+pair_fep_soft.html pair_style lj/cut/coul/long/soft/gpu
 pair_fep_soft.html pair_style lj/cut/coul/long/soft/omp
 pair_fep_soft.html pair_style lj/cut/soft
 pair_fep_soft.html pair_style lj/cut/soft/omp
@@ -1225,7 +1239,9 @@ pair_meam_sw_spline.html pair_style meam/sw/spline
 pair_mesocnt.html pair_style mesocnt
 pair_mesocnt.html pair_style mesocnt/viscous
 pair_mesodpd.html pair_style edpd
+pair_mesodpd.html pair_style edpd/gpu
 pair_mesodpd.html pair_style mdpd
+pair_mesodpd.html pair_style mdpd/gpu
 pair_mesodpd.html pair_style mdpd/rhosum
 pair_mesodpd.html pair_style tdpd
 pair_mgpt.html pair_style mgpt
@@ -1245,7 +1261,8 @@ pair_morse.html pair_style morse/smooth/linear/omp
 pair_multi_lucy.html pair_style multi/lucy
 pair_multi_lucy_rx.html pair_style multi/lucy/rx
 pair_multi_lucy_rx.html pair_style multi/lucy/rx/kk
-pair_nb3b_harmonic.html pair_style nb3b/harmonic
+pair_nb3b.html pair_style nb3b/harmonic
+pair_nb3b.html pair_style nb3b/screened
 pair_nm.html pair_style nm/cut
 pair_nm.html pair_style nm/cut/coul/cut
 pair_nm.html pair_style nm/cut/coul/cut/omp
@@ -1303,16 +1320,20 @@ pair_smd_triangulated_surface.html pair_style smd/tri_surface
 pair_smd_ulsph.html pair_style smd/ulsph
 pair_smtbq.html pair_style smtbq
 pair_snap.html pair_style snap
+pair_snap.html pair_style snap/intel
 pair_snap.html pair_style snap/kk
 pair_soft.html pair_style soft
 pair_soft.html pair_style soft/gpu
 pair_soft.html pair_style soft/omp
 pair_sph_heatconduction.html pair_style sph/heatconduction
+pair_sph_heatconduction.html pair_style sph/heatconduction/gpu
 pair_sph_idealgas.html pair_style sph/idealgas
 pair_sph_lj.html pair_style sph/lj
+pair_sph_lj.html pair_style sph/lj/gpu
 pair_sph_rhosum.html pair_style sph/rhosum
 pair_sph_taitwater_morris.html pair_style sph/taitwater/morris
 pair_sph_taitwater.html pair_style sph/taitwater
+pair_sph_taitwater.html pair_style sph/taitwater/gpu
 pair_spica.html pair_style lj/spica
 pair_spica.html pair_style lj/spica/coul/long
 pair_spica.html pair_style lj/spica/coul/long/gpu
@@ -1384,6 +1405,7 @@ pair_write.html pair_write
 pair_ylz.html pair_style ylz
 pair_yukawa_colloid.html pair_style yukawa/colloid
 pair_yukawa_colloid.html pair_style yukawa/colloid/gpu
+pair_yukawa_colloid.html pair_style yukawa/colloid/kk
 pair_yukawa_colloid.html pair_style yukawa/colloid/omp
 pair_yukawa.html pair_style yukawa
 pair_yukawa.html pair_style yukawa/gpu
diff --git a/tools/lammps-gui/main.cpp b/tools/lammps-gui/main.cpp
index cf09fbb892..d70e9d3e46 100644
--- a/tools/lammps-gui/main.cpp
+++ b/tools/lammps-gui/main.cpp
@@ -36,7 +36,7 @@ int main(int argc, char *argv[])
 
     LammpsGui w(nullptr, infile);
     w.show();
-    return a.exec();
+    return QApplication::exec();
 }
 
 // Local Variables:
diff --git a/tools/lammps-gui/preferences.cpp b/tools/lammps-gui/preferences.cpp
index fd01bb5046..27cc106008 100644
--- a/tools/lammps-gui/preferences.cpp
+++ b/tools/lammps-gui/preferences.cpp
@@ -177,7 +177,7 @@ void Preferences::accept()
         msg.exec();
         const char *path = mystrdup(QCoreApplication::applicationFilePath());
         const char *arg0 = mystrdup(QCoreApplication::arguments().at(0));
-        execl(path, arg0, (char *)NULL);
+        execl(path, arg0, (char *)nullptr);
     }
 
     // reformatting settings
diff --git a/tools/lammps-gui/stdcapture.cpp b/tools/lammps-gui/stdcapture.cpp
index b09aebf053..8be543e70e 100644
--- a/tools/lammps-gui/stdcapture.cpp
+++ b/tools/lammps-gui/stdcapture.cpp
@@ -38,7 +38,7 @@
 StdCapture::StdCapture() : m_oldStdOut(0), m_capturing(false)
 {
     // make stdout unbuffered so that we don't need to flush the stream
-    setvbuf(stdout, NULL, _IONBF, 0);
+    setvbuf(stdout, nullptr, _IONBF, 0);
 
     m_pipe[READ]  = 0;
     m_pipe[WRITE] = 0;
@@ -106,7 +106,7 @@ bool StdCapture::EndCapture()
 
 std::string StdCapture::GetChunk()
 {
-    if (!m_capturing) return std::string();
+    if (!m_capturing) return {};
     int bytesRead = 0;
     buf[0]        = '\0';
 
@@ -120,7 +120,7 @@ std::string StdCapture::GetChunk()
     if (bytesRead > 0) {
         buf[bytesRead] = '\0';
     }
-    return std::string(buf);
+    return {buf};
 }
 
 std::string StdCapture::GetCapture()
diff --git a/tools/msi2lmp/src/SearchAndFill.c b/tools/msi2lmp/src/SearchAndFill.c
index 7a7a1032ea..35de0c81fe 100644
--- a/tools/msi2lmp/src/SearchAndFill.c
+++ b/tools/msi2lmp/src/SearchAndFill.c
@@ -93,7 +93,7 @@ void SearchAndFill(struct FrcFieldItem *item)
 
   file_pos = ftell(FrcF);
   if (file_pos < 0) {
-    fprintf(stderr, "Could not obtain file stream position: ", strerror(errno));
+    fprintf(stderr, "Could not obtain file stream position: %s\n", strerror(errno));
     exit(2);
   }
 
@@ -116,7 +116,7 @@ void SearchAndFill(struct FrcFieldItem *item)
   /* Read lines until keyword is found */
 
   if (fseek(FrcF,file_pos,SEEK_SET) < 0) {
-    fprintf(stderr, "Resetting file stream failed: ", strerror(errno));
+    fprintf(stderr, "Resetting file stream failed: %s\n", strerror(errno));
     exit(2);
   }
   strcpy(line,"empty");
diff --git a/tools/swig/lammps.i b/tools/swig/lammps.i
index 91a6866107..b0670b40a4 100644
--- a/tools/swig/lammps.i
+++ b/tools/swig/lammps.i
@@ -129,11 +129,13 @@ extern void  *lammps_extract_global(void *handle, const char *name);
 extern int    lammps_extract_atom_datatype(void *handle, const char *name);
 extern void  *lammps_extract_atom(void *handle, const char *name);
 
-extern void  *lammps_extract_compute(void *handle, char *id, int, int);
-extern void  *lammps_extract_fix(void *handle, char *, int, int, int, int);
-extern void  *lammps_extract_variable(void *handle, char *, char *);
+extern void  *lammps_extract_compute(void *handle, const char *id, int, int);
+extern void  *lammps_extract_fix(void *handle, const char *, int, int, int, int);
+extern void  *lammps_extract_variable(void *handle, const char *, const char *);
 extern int    lammps_extract_variable_datatype(void *handle, const char *name);
-extern int    lammps_set_variable(void *, char *, char *);
+extern int    lammps_set_variable(void *, const char *, const char *);
+extern int    lammps_set_string_variable(void *, const char *, const char *);
+extern int    lammps_set_internal_variable(void *, const char *, double);
 
 extern void   lammps_gather_atoms(void *, char *, int, int, void *);
 extern void   lammps_gather_atoms_concat(void *, char *, int, int, void *);
@@ -312,11 +314,13 @@ extern void  *lammps_extract_global(void *handle, const char *name);
 extern int    lammps_extract_atom_datatype(void *handle, const char *name);
 extern void  *lammps_extract_atom(void *handle, const char *name);
 
-extern void  *lammps_extract_compute(void *handle, char *id, int, int);
-extern void  *lammps_extract_fix(void *handle, char *, int, int, int, int);
-extern void  *lammps_extract_variable(void *handle, char *, char *);
+extern void  *lammps_extract_compute(void *handle, const char *id, int, int);
+extern void  *lammps_extract_fix(void *handle, const char *, int, int, int, int);
+extern void  *lammps_extract_variable(void *handle, const char *, const char *);
 extern int    lammps_extract_variable_datatype(void *handle, const char *name);
-extern int    lammps_set_variable(void *, char *, char *);
+extern int    lammps_set_variable(void *, const char *, const char *);
+extern int    lammps_set_string_variable(void *, const char *, const char *);
+extern int    lammps_set_internal_variable(void *, const char *, double);
 
 extern void   lammps_gather_atoms(void *, char *, int, int, void *);
 extern void   lammps_gather_atoms_concat(void *, char *, int, int, void *);
diff --git a/tools/valgrind/MPICH.supp b/tools/valgrind/MPICH.supp
new file mode 100644
index 0000000000..6934cf8fbd
--- /dev/null
+++ b/tools/valgrind/MPICH.supp
@@ -0,0 +1,41 @@
+{
+   MPICH_MPI_init1
+   Memcheck:Leak
+   match-leak-kinds: reachable
+   fun:malloc
+   obj:*
+   ...
+   fun:psm3_init
+   ...
+   fun:MPIDI_OFI_init_local
+   ...
+   fun:PMPI_Init
+   fun:main
+}
+{
+   MPICH_MPI_init2
+   Memcheck:Leak
+   match-leak-kinds: reachable
+   fun:malloc
+   fun:strdup
+   obj:*
+   ...
+   fun:fi_ini
+   ...
+   fun:MPIDI_OFI_init_local
+   ...
+   fun:PMPI_Init
+   fun:main
+}
+{
+   MPICH_MPI_init3
+   Memcheck:Leak
+   match-leak-kinds: reachable
+   fun:calloc
+   obj:*
+   ...
+   fun:MPIDI_OFI_init_local
+   ...
+   fun:PMPI_Init
+   fun:main
+}
diff --git a/tools/valgrind/README b/tools/valgrind/README
index 63c440f7b2..e62031da9b 100644
--- a/tools/valgrind/README
+++ b/tools/valgrind/README
@@ -8,6 +8,7 @@ on running LAMMPS, use a command line like following:
 valgrind --show-leak-kinds=all --track-origins=yes \
    --suppressions=/path/to/lammps/tools/valgrind/OpenMP.supp \
    --suppressions=/path/to/lammps/tools/valgrind/OpenMPI.supp \
+   --suppressions=/path/to/lammps/tools/valgrind/MPICH.supp \
    --suppressions=/path/to/lammps/tools/valgrind/Python3.supp \
    --suppressions=/path/to/lammps/tools/valgrind/GTest.supp \
    --suppressions=/path/to/lammps/tools/valgrind/FlexiBLAS.supp \
diff --git a/unittest/c-library/test_library_properties.cpp b/unittest/c-library/test_library_properties.cpp
index 40d0d08575..4f5cd97d1f 100644
--- a/unittest/c-library/test_library_properties.cpp
+++ b/unittest/c-library/test_library_properties.cpp
@@ -335,7 +335,7 @@ TEST_F(LibraryProperties, setting)
         EXPECT_EQ(lammps_extract_setting(lmp, "mu_flag"), 0);
         EXPECT_EQ(lammps_extract_setting(lmp, "rmass_flag"), 0);
         EXPECT_EQ(lammps_extract_setting(lmp, "radius_flag"), 0);
-        EXPECT_EQ(lammps_extract_setting(lmp, "sphere_flag"), 0);
+        EXPECT_EQ(lammps_extract_setting(lmp, "sphere_flag"), -1);
         EXPECT_EQ(lammps_extract_setting(lmp, "ellipsoid_flag"), 0);
         EXPECT_EQ(lammps_extract_setting(lmp, "omega_flag"), 0);
         EXPECT_EQ(lammps_extract_setting(lmp, "torque_flag"), 0);
diff --git a/unittest/commands/test_groups.cpp b/unittest/commands/test_groups.cpp
index b91a6108d9..7f0a054c40 100644
--- a/unittest/commands/test_groups.cpp
+++ b/unittest/commands/test_groups.cpp
@@ -314,7 +314,7 @@ TEST_F(GroupTest, Dynamic)
                  command("group ramp variable grow"););
 }
 
-constexpr double EPSILON = 1.0e-14;
+constexpr double EPSILON = 1.0e-13;
 
 TEST_F(GroupTest, VariableFunctions)
 {
diff --git a/unittest/commands/test_variables.cpp b/unittest/commands/test_variables.cpp
index 7a9b87fbe2..c631b69528 100644
--- a/unittest/commands/test_variables.cpp
+++ b/unittest/commands/test_variables.cpp
@@ -780,6 +780,25 @@ TEST_F(VariableTest, Format)
     //    TEST_FAILURE(".*ERROR: Incorrect conversion in format string.*",
     //                 command("print \"${f1idx}\""););
 }
+
+TEST_F(VariableTest, Set)
+{
+    BEGIN_HIDE_OUTPUT();
+    command("variable three  string    three");
+    command("variable ten    internal  10.0");
+    END_HIDE_OUTPUT();
+    ASSERT_EQ(variable->nvar, 3);
+    ASSERT_THAT(variable->retrieve("three"), StrEq("three"));
+    ASSERT_THAT(variable->retrieve("ten"), StrEq("10"));
+
+    ASSERT_EQ(variable->internalstyle(variable->find("three")), 0);
+    ASSERT_EQ(variable->internalstyle(variable->find("ten")), 1);
+
+    variable->set_string("three", "new");
+    ASSERT_THAT(variable->retrieve("three"), StrEq("new"));
+    variable->internal_set(variable->find("ten"), -2.5);
+    ASSERT_THAT(variable->retrieve("ten"), StrEq("-2.5"));
+}
 } // namespace LAMMPS_NS
 
 int main(int argc, char **argv)
diff --git a/unittest/force-styles/tests/angle-cosine_periodic.yaml b/unittest/force-styles/tests/angle-cosine_periodic.yaml
index 84d8ff1194..5c8227fcbd 100644
--- a/unittest/force-styles/tests/angle-cosine_periodic.yaml
+++ b/unittest/force-styles/tests/angle-cosine_periodic.yaml
@@ -1,6 +1,6 @@
 ---
-lammps_version: 8 Apr 2021
-date_generated: Thu Apr  8 09:28:11 2021
+lammps_version: 21 Nov 2023
+date_generated: Fri Jan 12 18:39:55 2024
 epsilon: 2.5e-13
 prerequisites: ! |
   atom full
@@ -10,77 +10,77 @@ post_commands: ! ""
 input_file: in.fourmol
 angle_style: cosine/periodic
 angle_coeff: ! |
-  1  75.0  1 2
-  2  45.0 -1 2
+  1  75.0  1 1
+  2  45.0  1 2
   3  50.0 -1 3
   4 100.0 -1 4
-equilibrium: 4 3.141592653589793 1.5707963267948966 2.0943951023931957 2.356194490192345
+equilibrium: 4 3.141592653589793 3.141592653589793 2.0943951023931957 2.356194490192345
 extract: ! ""
 natoms: 29
-init_energy: 605.3643061001458
-init_stress: ! |-
-  -1.7082420754402889e+01 -7.3281097507808681e+00  2.4410530505183818e+01  8.5827033671406951e+01  1.4260977966148616e+02  4.1579557432232576e+01
+init_energy: 1178.5476942873006
+init_stress: ! |2-
+   2.7790958427902001e+02 -2.3729473006795436e+02 -4.0614854211065634e+01  2.9034222204142930e+02  1.4123449070173780e+02  2.0504975338277421e+02
 init_forces: ! |2
     1  7.9609486050127529e+00 -3.9274211736421961e+01 -3.8917410871887981e+01
     2  4.6997439470662350e+00  3.8052682089524090e+01  3.0599010994189470e+01
-    3 -7.1532072701475698e+01  9.6873528247272844e+01  7.3410935137796983e+01
-    4  3.1784763224659116e+01 -4.4133218046130608e+01 -6.2234613362865147e+01
-    5  5.8817481848549889e+01 -2.5112568523390145e+01  3.9611729278121981e+00
-    6 -8.7258065964885336e+00 -4.2663580774228997e+01 -1.6819642012415606e+01
+    3 -4.4330179925982058e+01 -1.6514501437366098e+00  1.9894582317318523e+01
+    4  1.1465928779203908e+01 -7.1462736556935234e+00 -1.8983545733370338e+01
+    5  2.7634466780141157e+01  1.5504150132065057e+01  1.0078115065618357e+01
+    6  2.2512674572611367e+01 -5.4260358088923418e+01 -6.0646506351853276e+01
     7 -1.5578858996464229e+01  1.3895348629116569e+01 -3.3939856789628062e+00
-    8 -1.6678237064738614e+01 -2.6557373913973738e+01  8.7708427797183326e+00
-    9 -9.4419020144376677e+00  1.3812152922900303e+01 -1.2280697239365450e+00
-   10  1.0844630504236606e+02  1.9274264686364820e+01  1.2594098114786526e+01
-   11 -1.1888648487599809e+01  1.7288532453781471e+00  1.8714004234488471e+00
-   12  9.7432958614920665e+01  1.1284647087939499e+02 -1.3445218835244805e+02
-   13 -2.2887258478933525e+01 -5.9815335453575649e+01  4.1237962971772127e+01
-   14 -4.6498844054867675e+01 -3.0251289808967520e+01  1.5556535565006259e+01
-   15 -5.3477741242848616e+01 -1.7885978453267143e+01  4.6284681424489207e+01
-   16 -7.3215663693592745e+01  1.7514552522777997e+01  7.4857846653898914e+00
+    8 -6.7011983808365656e+01 -2.4458090084467077e+01  1.7084632474743671e+02
+    9  9.4419020144376677e+00 -1.3812152922900303e+01  1.2280697239365450e+00
+   10  1.3360859023844577e+02  1.1499274633292617e+02 -1.0838863098947982e+02
+   11  1.1888648487599809e+01 -1.7288532453781471e+00 -1.8714004234488471e+00
+   12  2.9260930345940537e+01 -9.2146025429432186e+00 -8.5323421000107373e+01
+   13 -4.6656310032990458e+00 -1.2502935413462930e+01  1.4918864440944628e+01
+   14 -2.1383527724886850e+01 -9.3422692044635554e+00  7.5125645645164223e+00
+   15 -8.0644375221897171e+00 -2.6783296801963008e+00  6.9267625241565547e+00
+   16 -1.1822204683814408e+02  2.1928896047729104e+01  4.0247121672886962e+01
    17  2.0782832048872386e+01 -2.8304296512773977e+01  1.5273484998106287e+01
-   18  1.6481336531704756e+00  1.7222946144801426e+01 -6.9896289164966490e+01
-   19 -2.0180190840279820e+01 -2.5140421523544326e+01  2.9933594625645306e+01
-   20  1.8532057187109345e+01  7.9174753787429015e+00  3.9962694539321184e+01
-   21  1.6243294930835876e+01  2.0333921382774719e+01 -6.0768622624445221e+01
-   22 -2.8924589352090472e+01 -1.9720769613680826e+01  2.1482552755004811e+01
-   23  1.2681294421254595e+01 -6.1315176909389102e-01  3.9286069869440411e+01
-   24 -1.5837796600466618e+01  6.1562453937228881e+01 -3.6651923703785549e+01
-   25 -1.2704181131223443e+01 -4.2563815285902912e+01  6.9610494863238124e+00
-   26  2.8541977731690061e+01 -1.8998638651325965e+01  2.9690874217461737e+01
-   27 -8.7971258084923178e+00  7.2217511410368814e+01 -2.4599681382405976e+01
-   28 -1.9235439225569891e+01 -4.3179911322776611e+01  1.0030656861974458e+00
-   29  2.8032565034062209e+01 -2.9037600087592210e+01  2.3596615696208531e+01
-run_energy: 603.8182365368202
-run_stress: ! |-
-  -1.6098625319219664e+01 -7.7961962067566510e+00  2.3894821525976329e+01  8.7036156470651477e+01  1.4262918929621054e+02  4.2523803236880880e+01
+   18  5.2071052608093424e+00  5.4414090328604708e+01 -2.2082998810309599e+02
+   19 -6.3757194500832497e+01 -7.9428522633699004e+01  9.4572049876109048e+01
+   20  5.8550089240023155e+01  2.5014432305094296e+01  1.2625793822698694e+02
+   21  5.6300281919954635e+01  7.0478650499360143e+01 -2.1062786831190908e+02
+   22 -1.0025444602684506e+02 -6.8353427900946826e+01  7.4459879083463136e+01
+   23  4.3954164106890424e+01 -2.1252225984133197e+00  1.3616798922844595e+02
+   24 -4.9480288140032329e+01  1.9233281221276744e+02 -1.1450757902121047e+02
+   25 -3.9690277556511717e+01 -1.3297745247110566e+02  2.1747642240220362e+01
+   26  8.9170565696544045e+01 -5.9355359741661772e+01  9.2759936780990117e+01
+   27 -2.6339504856062320e+01  2.1622670107205670e+02 -7.3653991239272059e+01
+   28 -5.7592895215991106e+01 -1.2928512206483205e+02  3.0032824456190355e+00
+   29  8.3932400072053426e+01 -8.6941579007224647e+01  7.0650708793653024e+01
+run_energy: 1174.6225600630123
+run_stress: ! |2-
+   2.7658169122411005e+02 -2.3743377487623573e+02 -3.9147916347874407e+01  2.9007767114801470e+02  1.4053974438881829e+02  2.0434258995590761e+02
 run_forces: ! |2
-    1  8.1036664069391833e+00 -3.9279459516104339e+01 -3.8959949625007155e+01
-    2  4.6488532958171156e+00  3.7987813821226069e+01  3.0712083303318757e+01
-    3 -7.1419656269516480e+01  9.7015207052323333e+01  7.3123837986656483e+01
-    4  3.1774739774255771e+01 -4.4324760214341296e+01 -6.1918121921961003e+01
-    5  5.8630133295649813e+01 -2.5003101567718115e+01  3.8957656941403842e+00
-    6 -8.6686835699933500e+00 -4.2717543793109854e+01 -1.6944132920021204e+01
-    7 -1.5605967450730276e+01  1.3924972058096937e+01 -3.4081311693274161e+00
-    8 -1.6735469954990947e+01 -2.6654949908594496e+01  8.9412902423392993e+00
-    9 -9.4705763934675620e+00  1.3861186924074314e+01 -1.2218212802251793e+00
-   10  1.0864309846473817e+02  1.9311615651482960e+01  1.2534898619395602e+01
-   11 -1.1889594908454491e+01  1.6849924892427488e+00  1.9039966312260486e+00
-   12  9.6643785665770423e+01  1.1329932305772147e+02 -1.3435213826206018e+02
-   13 -2.2815824864999897e+01 -5.9701629573330088e+01  4.1148977584672039e+01
-   14 -4.6226658006998740e+01 -3.0469540424436548e+01  1.5534272011399247e+01
-   15 -5.3141801628038777e+01 -1.8156497866651446e+01  4.6272398149175629e+01
-   16 -7.3254211788300807e+01  1.7569251761827239e+01  7.4522974142679850e+00
-   17  2.0784167932320894e+01 -2.8346879951708846e+01  1.5284477542010659e+01
-   18  1.7456021018344252e+00  1.7528557172698406e+01 -7.0852460721917453e+01
-   19 -2.0389936120749365e+01 -2.5462340563923114e+01  3.0421727677614534e+01
-   20  1.8644334018914940e+01  7.9337833912247095e+00  4.0430733044302912e+01
-   21  1.6517268317097550e+01  2.0531536618559141e+01 -6.1717967915716365e+01
-   22 -2.9293957935776255e+01 -1.9905577364456363e+01  2.1870035659045151e+01
-   23  1.2776689618678706e+01 -6.2595925410277875e-01  3.9847932256671214e+01
-   24 -1.6067082221526842e+01  6.2373469754139357e+01 -3.7096821397423525e+01
-   25 -1.2753486814048248e+01 -4.3101082367336026e+01  7.0662489242667057e+00
-   26  2.8820569035575090e+01 -1.9272387386803331e+01  3.0030572473156820e+01
-   27 -8.9233162938210242e+00  7.2669056612963558e+01 -2.4610439704365813e+01
-   28 -1.9256705992379011e+01 -4.3442840232212284e+01  9.5666525994413210e-01
-   29  2.8180022286200035e+01 -2.9226216380751275e+01  2.3653774444421682e+01
+    1  8.0595702750384035e+00 -3.9275884134753326e+01 -3.8921834417294036e+01
+    2  4.6450877605699539e+00  3.7989319483282912e+01  3.0709930248716290e+01
+    3 -4.4176357886610745e+01 -1.3121510542286003e+00  1.9849684676752698e+01
+    4  1.1432955202502885e+01 -7.3978491141098957e+00 -1.8963452056001909e+01
+    5  2.7565769767176914e+01  1.5533965780817836e+01  1.0064393045239932e+01
+    6  2.2440837721485856e+01 -5.4307979505823312e+01 -6.0734450726614625e+01
+    7 -1.5580688823052480e+01  1.3904189059068386e+01 -3.4017896378595758e+00
+    8 -6.6989876135866879e+01 -2.4455457095150752e+01  1.7071695622632274e+02
+    9  9.4762227087055635e+00 -1.3904425552883753e+01  1.2252549039361496e+00
+   10  1.3329492642527092e+02  1.1514887273699682e+02 -1.0807688660290995e+02
+   11  1.1927511834955308e+01 -1.7182396158290132e+00 -1.8914765821083073e+00
+   12  2.9230443011207992e+01 -9.0747074093425084e+00 -8.5406656692466896e+01
+   13 -4.6010476121847610e+00 -1.2371262892106342e+01  1.4758380429325644e+01
+   14 -2.1309655373546295e+01 -9.6560166053345498e+00  7.4826455796077642e+00
+   15 -8.0586553706859778e+00 -2.8089895416921884e+00  7.1963114045665719e+00
+   16 -1.1814487049351524e+02  2.2070805476502699e+01  4.0103979455896329e+01
+   17  2.0787826988548556e+01 -2.8364190015414366e+01  1.5289010744891176e+01
+   18  5.4411962659043454e+00  5.4597888596162299e+01 -2.2067472725627243e+02
+   19 -6.3374090856904559e+01 -7.9190934240040519e+01  9.4782037192716302e+01
+   20  5.7932894591000213e+01  2.4593045643878220e+01  1.2589269006355613e+02
+   21  5.6478944470524624e+01  7.0203094061683373e+01 -2.1102883364979709e+02
+   22 -9.9996788696603545e+01 -6.7985401318866863e+01  7.4849357252797518e+01
+   23  4.3517844226078921e+01 -2.2176927428165065e+00  1.3617947639699958e+02
+   24 -4.9663522759553963e+01  1.9280842870258854e+02 -1.1467096111871800e+02
+   25 -3.9280982289108742e+01 -1.3314956089589265e+02  2.1920816048609726e+01
+   26  8.8944505048662705e+01 -5.9658867806695888e+01  9.2750145070108275e+01
+   27 -2.6592186096443989e+01  2.1652356998534560e+02 -7.3330722990707770e+01
+   28 -5.7218787679563256e+01 -1.2939713888992102e+02  2.9266537226802889e+00
+   29  8.3810973776007245e+01 -8.7126431095424564e+01  7.0404069268027484e+01
 ...
diff --git a/unittest/force-styles/tests/angle-lepton.yaml b/unittest/force-styles/tests/angle-lepton.yaml
index ea108cfdb1..b4d6c0516f 100644
--- a/unittest/force-styles/tests/angle-lepton.yaml
+++ b/unittest/force-styles/tests/angle-lepton.yaml
@@ -9,7 +9,7 @@ prerequisites: ! |
 pre_commands: ! ""
 post_commands: ! ""
 input_file: in.fourmol
-angle_style: lepton
+angle_style: lepton auto_offset
 angle_coeff: ! |
   1 110.1 "k*theta^2; k=75.0"
   2 111.0 "k*theta^2; k=45.0"
diff --git a/unittest/force-styles/tests/angle-lepton_nooffset.yaml b/unittest/force-styles/tests/angle-lepton_nooffset.yaml
new file mode 100644
index 0000000000..711f0cbdd5
--- /dev/null
+++ b/unittest/force-styles/tests/angle-lepton_nooffset.yaml
@@ -0,0 +1,88 @@
+---
+lammps_version: 22 Dec 2022
+date_generated: Fri Dec 23 15:10:29 2022
+epsilon: 7.5e-13
+skip_tests:
+prerequisites: ! |
+  atom full
+  angle lepton
+pre_commands: ! ""
+post_commands: ! ""
+input_file: in.fourmol
+angle_style: lepton no_offset
+angle_coeff: ! |
+  1 110.1 "k*theta^2; k=75.0"
+  2 111.0 "k*theta^2; k=45.0"
+  3 120.0 "k*theta^2; k=50.0"
+  4 108.5 "k*theta^2; k=100.0"
+equilibrium: 4 1.9216075064457567 1.9373154697137058 2.0943951023931953 1.8936822384138476
+extract: ! |
+  theta0 1
+natoms: 29
+init_energy: 41.53081789649104
+init_stress: ! |2-
+   8.9723357320869297e+01 -8.7188643750026529e+01 -2.5347135708427655e+00  9.2043419883119782e+01 -2.8187238090404904e+01 -1.5291148024926793e+00
+init_forces: ! |2
+    1  4.7865489310693540e+01  7.8760925902181516e+00 -3.2694525514709866e+01
+    2 -1.1124882516177341e+00 -9.0075464203887403e+00 -7.2431691227364459e+00
+    3 -5.9057050592859328e+00  5.3263619873546261e+01  5.2353380124691469e+01
+    4 -1.6032230038990633e+01 -2.4560529343731403e+01  1.2891625920422307e+01
+    5 -4.4802331573497639e+01 -4.8300919461089379e+01 -2.3310767889219324e+01
+    6  4.7083124388174824e+01 -9.5212933434476312e+00 -3.2526392870546800e+01
+    7 -1.6208182775476303e+01  1.4458587960739102e+01 -3.5314745459502710e+00
+    8 -6.5664612141881040e+00 -2.5126850154274202e+01  8.2187944731423329e+01
+    9 -1.5504395262358301e+01  1.6121044185227817e+01 -4.2007069622477866e-01
+   10  9.9863759179365275e+00  4.1873540105704549e+01 -6.6085640966037403e+01
+   11 -2.0441876158908627e+01 -6.5186824168985984e+00  9.0023620309811072e+00
+   12 -1.0772126658369565e+01 -1.0807367300158219e+01 -9.6049647456797871e+00
+   13  2.8847886813946291e+00  7.2973241014859198e+00 -1.0414233993842981e-01
+   14  1.5267407478336393e+01 -9.4754911480231776e+00 -6.6307012925544200e+00
+   15  1.2402914209534773e+01 -6.2644630791613967e+00  1.8484576795819933e+01
+   16  3.8927757686508357e-01  1.0690061587911176e+01  6.1542759189377696e+00
+   17  1.4664194297570785e+00 -1.9971277376602425e+00  1.0776844613215999e+00
+   18  1.5785371874873322e-01  1.6495665212200166e+00 -6.6944747776990434e+00
+   19 -1.9328033033421670e+00 -2.4078805870919706e+00  2.8669575541313534e+00
+   20  1.7749495845934338e+00  7.5831406587195394e-01  3.8275172235676900e+00
+   21  3.4186149299343742e+00  4.2795410364249484e+00 -1.2789555411020650e+01
+   22 -6.0875600315279677e+00 -4.1504951869796605e+00  4.5212856070195766e+00
+   23  2.6689451015935934e+00 -1.2904584944528752e-01  8.2682698040010738e+00
+   24 -1.3053945393770587e+00  5.0741459325183271e+00 -3.0209518576073018e+00
+   25 -1.0471133765834284e+00 -3.5082261409793856e+00  5.7374874908501228e-01
+   26  2.3525079159604871e+00 -1.5659197915389413e+00  2.4472031085222894e+00
+   27 -2.8720725187343754e-01  2.3577465459557132e+00 -8.0312673032168869e-01
+   28 -6.2799575211500369e-01 -1.4097313073755862e+00  3.2747938980616453e-02
+   29  9.1520300398844123e-01 -9.4801523858012704e-01  7.7037879134107223e-01
+run_energy: 41.28323739029462
+run_stress: ! |2-
+   8.8236221596506681e+01 -8.6492260623309562e+01 -1.7439609731970940e+00  9.0601855980531312e+01 -2.8735005690484968e+01 -2.6097632235197477e+00
+run_forces: ! |2
+    1  4.7316793853445830e+01  8.2815577813110188e+00 -3.2021703111755464e+01
+    2 -1.1508196824491330e+00 -9.3814982172707460e+00 -7.5761211707510139e+00
+    3 -5.1083163691832576e+00  5.2667553294971619e+01  5.1784852458007592e+01
+    4 -1.6078177452605999e+01 -2.4156048365236213e+01  1.3140924677013103e+01
+    5 -4.4915734474022280e+01 -4.8095168640411821e+01 -2.3331149037574161e+01
+    6  4.7077916942842350e+01 -9.5906213020090156e+00 -3.2570331503075487e+01
+    7 -1.6228599672412471e+01  1.4485102617342370e+01 -3.5441153194985300e+00
+    8 -6.5097893981550730e+00 -2.5117582302614530e+01  8.2131369512416001e+01
+    9 -1.5527440970965937e+01  1.6147270375910470e+01 -4.0812004993325646e-01
+   10  1.0070812216240984e+01  4.1571532807578805e+01 -6.5968810328796337e+01
+   11 -2.0431584971707451e+01 -6.4817395192247664e+00  8.9879981618991636e+00
+   12 -1.0884695976714678e+01 -1.1067390190389006e+01 -9.1551242768940568e+00
+   13  2.8052913970098801e+00  7.1296301666594912e+00  1.3173039168682621e-02
+   14  1.5254877537873529e+01 -8.9700095533297350e+00 -6.5719846903613162e+00
+   15  1.2392009100170984e+01 -6.0827695435257292e+00  1.7929674392339596e+01
+   16  4.7158712437377481e-01  1.0631038523396533e+01  6.0960085687560355e+00
+   17  1.4458707962589659e+00 -1.9708579331587350e+00  1.0634586790394520e+00
+   18  1.4201882413835909e-01  1.4265339757773337e+00 -5.7663956896747992e+00
+   19 -1.6609130686729365e+00 -2.0735307593211125e+00  2.4755525101127143e+00
+   20  1.5188942445345774e+00  6.4699678354377899e-01  3.2908431795620849e+00
+   21  3.2242729509516406e+00  4.0079233768386153e+00 -1.2047892238650988e+01
+   22 -5.7215184687399772e+00 -3.8871624402883409e+00  4.2679223469272234e+00
+   23  2.4972455177883366e+00 -1.2076093655027398e-01  7.7799698917237645e+00
+   24 -1.1661978296905471e+00  4.5271404898674854e+00 -2.6925565853370195e+00
+   25 -9.2712094527152167e-01 -3.1291890525017125e+00  5.1208215565053827e-01
+   26  2.0933187749620688e+00 -1.3979514373657731e+00  2.1804744296864813e+00
+   27 -2.6804542538020537e-01  2.1830651328698103e+00 -7.3931790038945400e-01
+   28 -5.7927072943128310e-01 -1.3052929090347909e+00  2.8365455885795865e-02
+   29  8.4731615481148848e-01 -8.7777222383501941e-01  7.1095244450365813e-01
+...
diff --git a/unittest/force-styles/tests/bond-lepton_nooffset.yaml b/unittest/force-styles/tests/bond-lepton_nooffset.yaml
new file mode 100644
index 0000000000..b39288640e
--- /dev/null
+++ b/unittest/force-styles/tests/bond-lepton_nooffset.yaml
@@ -0,0 +1,89 @@
+---
+lammps_version: 21 Nov 2023
+date_generated: Thu Jan 18 10:15:41 2024
+epsilon: 2.5e-13
+skip_tests:
+prerequisites: ! |
+  atom full
+  bond lepton
+pre_commands: ! ""
+post_commands: ! ""
+input_file: in.fourmol
+bond_style: lepton no_offset
+bond_coeff: ! |
+  1 1.5 "k*r^2; k=250.0"
+  2 1.1 "k2*r^2 + k3*r^3 + k4*r^4; k2=300.0; k3=-100.0; k4=50.0"
+  3 1.3 "k*r^2; k=350.0"
+  4 1.2 "k*(r-0.2)^2; k=500.0"
+  5 1.0 "k*r^2; k=450.0"
+equilibrium: 5 1.5 1.1 1.3 1.2 1
+extract: ! |
+  r0 1
+natoms: 29
+init_energy: 38.295825321689215
+init_stress: ! |-
+  -4.7778964706834920e+01 -9.3066674567350432e+01  3.4789470658440035e+02 -3.0023920169312170e+01 -8.0421418879842847e+01  5.8592449335969732e+01
+init_forces: ! |2
+    1 -5.9149914305071416e+00 -3.7728809612345245e+01 -2.7769433362963369e+01
+    2 -9.4281609567839944e+00 -7.7586487054273015e+00  1.1096676787527940e+01
+    3  3.2211742366572125e+01  2.7682361264425523e+01 -7.0109911672970497e+00
+    4  4.9260777576375503e+00 -1.3809750102765932e+00  3.4951785613141868e+00
+    5 -1.2606902198593501e+00 -1.9373397933007170e+00  6.4372463095041841e+00
+    6 -3.8858476307965482e+01  6.8567296300319640e+01  1.9889888806614337e+02
+    7  7.5297927100028144e+00 -3.8622600737556944e+01 -1.9268793182212875e+02
+    8  1.3018665172824681e+01 -1.2902789438539877e+01  3.2406676637830003e+00
+    9  7.4343536239661590e-01  8.0072549738604493e-01  3.2899591078538779e+00
+   10  6.1558871886113291e+00 -2.2419470219698296e+00  1.0080175092279852e+01
+   11 -3.7020922615305768e-01 -9.1704102274126453e-01 -1.5046795827370363e+00
+   12  5.2437190958790678e+00  3.4225915524442998e+00 -2.5523597276998897e+00
+   13 -1.1277007635800260e+01  4.4610677459696646e+00  2.1195215396108269e-01
+   14  2.9813926585641828e+00 -6.0667387499775116e-01  7.7317115100728788e+00
+   15  2.5872825164662799e-01 -9.9415365173790704e+00 -3.5428115826174169e+00
+   16  5.2775953236493464e+01 -3.1855535724919463e+01 -1.6524229620195118e+02
+   17 -5.8735858023559175e+01  4.0959855098908882e+01  1.5582804819495431e+02
+   18 -9.0963607969319646e+00 -4.3343406270234155e+00 -1.7623055551859267e+01
+   19  1.2597490501067170e+01  8.0591915019111742e+00  1.5261489294231819e+01
+   20 -3.5011297041352050e+00 -3.7248508748877587e+00  2.3615662576274494e+00
+   21 -1.5332952658285048e+00  5.9630208068632040e-01 -7.4967230017303281e+00
+   22  4.2380253233105529e+00  1.0270453290850614e+00  6.6489894421385651e+00
+   23 -2.7047300574820481e+00 -1.6233474097713818e+00  8.4773355959176278e-01
+   24 -6.6588083188726532e+00  3.5110922792825918e+00 -6.5625174267043489e+00
+   25  7.9844426562464141e+00 -1.2853795683286129e+00  6.7123710742192300e+00
+   26 -1.3256343373737607e+00 -2.2257127109539789e+00 -1.4985364751488087e-01
+   27  6.6999960289138851e+00  6.3808952243186141e+00  2.0100808779497248e+00
+   28 -8.8466157439236681e-01  3.8018717064230995e-01 -5.9857060538593476e-01
+   29 -5.8153344545215182e+00 -6.7610823949609244e+00 -1.4115102725637900e+00
+run_energy: 37.78424389351509
+run_stress: ! |-
+  -4.6127506998693484e+01 -9.2129732247211749e+01  3.4548310342284810e+02 -2.9841348469661163e+01 -7.8434962689387717e+01  5.9253167412123155e+01
+run_forces: ! |2
+    1 -5.8451208652159004e+00 -3.7483084455000643e+01 -2.7706576989352534e+01
+    2 -9.4646964278974774e+00 -7.8058897724822449e+00  1.1098831256058579e+01
+    3  3.1827086102630346e+01  2.7573911030624821e+01 -6.9576662575837211e+00
+    4  5.1502169659901655e+00 -1.4367546726785101e+00  3.6631301025186187e+00
+    5 -1.2208420775139264e+00 -1.8781699435112362e+00  6.2332639085051911e+00
+    6 -3.8491523409043303e+01  6.8063273218541468e+01  1.9723141045830272e+02
+    7  7.4838209349394775e+00 -3.8394258853636330e+01 -1.9092625515909930e+02
+    8  1.2676329319901857e+01 -1.2475162287097550e+01  3.3659783337736577e+00
+    9  6.8845241565874460e-01  7.3814593866184031e-01  3.0434095400342533e+00
+   10  6.2545583994797553e+00 -2.9600470917047201e+00  9.4247125735981765e+00
+   11 -1.9554747834212524e-01 -4.8434314068172696e-01 -7.9452259566032057e-01
+   12  5.2092795750960841e+00  3.1431929551776721e+00 -3.1346654851373348e+00
+   13 -1.1496483840617872e+01  4.5245217971580018e+00  2.1348220240918236e-01
+   14  3.1913399826660909e+00 -6.3760720126489068e-01  8.2740980433927742e+00
+   15  2.7338564489784484e-01 -9.7206665011069671e+00 -3.4841809697094543e+00
+   16  5.2461611410918316e+01 -3.1639255494702798e+01 -1.6483607587596811e+02
+   17 -5.8501866653548078e+01  4.0872194473703807e+01  1.5529162691391761e+02
+   18 -7.0990354207248405e+00 -2.4743922643289666e+00 -1.7824398936159682e+01
+   19  1.2019842510974870e+01  7.7105128268768715e+00  1.4523712108141252e+01
+   20 -4.9208070902500296e+00 -5.2361205625479048e+00  3.3006868280184283e+00
+   21 -1.8548628650934149e+00  2.7467524264262122e-01 -6.7601469408617412e+00
+   22  3.9136757840663186e+00  9.5561415744904055e-01  6.1181929861632272e+00
+   23 -2.0588129189729036e+00 -1.2302894000916618e+00  6.4195395469851357e-01
+   24 -5.7681973234153086e+00  2.0209144998436366e+00 -5.2864044021513967e+00
+   25  6.3696975292216704e+00 -1.0109756418053095e+00  5.3564043759405795e+00
+   26 -6.0150020580636188e-01 -1.0099388580383271e+00 -6.9999973789182365e-02
+   27  6.8467535469188450e+00  5.7500299184200578e+00  2.2775780974490298e+00
+   28 -1.3929430925479587e+00  5.9772788540443345e-01 -9.4056106886485980e-01
+   29 -5.4538104543708865e+00 -6.3477578038244911e+00 -1.3370170285841700e+00
+...
diff --git a/unittest/force-styles/tests/mol-pair-coul_slater_long.yaml b/unittest/force-styles/tests/mol-pair-coul_slater_long.yaml
index ba11503a2c..51b04f301c 100644
--- a/unittest/force-styles/tests/mol-pair-coul_slater_long.yaml
+++ b/unittest/force-styles/tests/mol-pair-coul_slater_long.yaml
@@ -1,7 +1,7 @@
 ---
 lammps_version: 23 Jun 2022
 date_generated: Thu Jul  7 09:00:39 2022
-epsilon: 2e-13
+epsilon: 1e-12
 skip_tests:
 prerequisites: ! |
   atom full
diff --git a/unittest/force-styles/tests/mol-pair-lepton.yaml b/unittest/force-styles/tests/mol-pair-lepton.yaml
index 03117a9aa5..33576e81c2 100644
--- a/unittest/force-styles/tests/mol-pair-lepton.yaml
+++ b/unittest/force-styles/tests/mol-pair-lepton.yaml
@@ -1,6 +1,6 @@
 ---
-lammps_version: 22 Dec 2022
-date_generated: Thu Dec 22 09:57:30 2022
+lammps_version: 21 Nov 2023
+date_generated: Thu Jan 18 11:01:50 2024
 epsilon: 5e-14
 skip_tests: intel
 prerequisites: ! |
@@ -23,23 +23,24 @@ pair_coeff: ! |
   2 4    "4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.005;sig=0.5"
   2 5    "4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.00866025;sig=2.05"
   3 3    "4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.02;sig=3.2"
-  3 4    "4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.0173205;sig=3.15"
+  3 4    "-eps*r;eps=0.0173205;sig=3.15"
   3 5    "4.0*eps*((sig/r)^12-(sig/r)^6);eps=0.0173205;sig=3.15"
+  4 4    "10.0"
 extract: ! ""
 natoms: 29
-init_vdwl: 749.2468149791969
+init_vdwl: 746.1575578155301
 init_coul: 0
 init_stress: ! |2-
-   2.1793853434038242e+03  2.1988955172192768e+03  4.6653977523326257e+03 -7.5956547636050584e+02  2.4751536734032861e+01  6.6652028436400667e+02
+   2.1723526811665593e+03  2.1959162890293533e+03  4.6328064825512138e+03 -7.5509180369489252e+02  9.4506578600439983e+00  6.7585028859193505e+02
 init_forces: ! |2
-    1 -2.3333390280895912e+01  2.6994567613322641e+02  3.3272827850356805e+02
+    1 -2.3359983837422618e+01  2.6996030011590727e+02  3.3274783233743295e+02
     2  1.5828554630414899e+02  1.3025008843535872e+02 -1.8629682358935722e+02
     3 -1.3528903738169066e+02 -3.8704313358319990e+02 -1.4568978437133106e+02
     4 -7.8711096705893366e+00  2.1350518625373538e+00 -5.5954532185548134e+00
     5 -2.5176757268228540e+00 -4.0521510681020239e+00  1.2152704057877019e+01
     6 -8.3190662465252137e+02  9.6394149462625603e+02  1.1509093566509248e+03
-    7  5.8203388932513583e+01 -3.3608997951626793e+02 -1.7179617996573040e+03
-    8  1.4451392284291535e+02 -1.0927475861088995e+02  3.9990593492420442e+02
+    7  6.6340523101244187e+01 -3.4078810185436379e+02 -1.7003039516942540e+03
+    8  1.3674478037618434e+02 -1.0517874373121482e+02  3.8291074246191346e+02
     9  7.9156945283097443e+01  8.5273009783986538e+01  3.5032175698445189e+02
    10  5.3118875219105360e+02 -6.1040990859419412e+02 -1.8355872642619292e+02
    11 -2.3530157267965532e+00 -5.9077640073819717e+00 -9.6590723955414290e+00
@@ -48,8 +49,8 @@ init_forces: ! |2
    14 -3.3852721292265153e+00  6.8636181241903649e-01 -8.7507190862499868e+00
    15 -2.0454999188605300e-01  8.4846165523049883e+00  3.0131615419406712e+00
    16  4.6326310311812108e+02 -3.3087715736498188e+02 -1.1893024561782554e+03
-   17 -4.5334300923766727e+02  3.1554283255882569e+02  1.2058417793481203e+03
-   18 -1.8862623280672661e-02 -3.3402010907951661e-02  3.1000479299095260e-02
+   17 -4.5371128972368928e+02  3.1609940794953951e+02  1.2052011419527653e+03
+   18  8.0197172683943874e-03 -2.4939258820032362e-03 -1.0571459969936936e-02
    19  3.1843079640570047e-04 -2.3918627818763426e-04  1.7427252638513439e-03
    20 -9.9760831209706009e-04 -1.0209184826753090e-03  3.6910972636601454e-04
    21 -7.1566125273265186e+01 -8.1615678329920655e+01  2.2589561408339890e+02
@@ -61,38 +62,38 @@ init_forces: ! |2
    27  5.1810388677546001e+01 -2.2705458321213797e+02  9.0849111082069669e+01
    28 -1.8041307121444069e+02  7.7534042932772905e+01 -1.2206956760706598e+02
    29  1.2861057254925012e+02  1.4952711274394568e+02  3.1216025556267880e+01
-run_vdwl: 719.4530651193046
+run_vdwl: 716.5213000416621
 run_coul: 0
 run_stress: ! |2-
-   2.1330153957371017e+03  2.1547728168285516e+03  4.3976497417710125e+03 -7.3873328448298525e+02  4.1743821105370067e+01  6.2788012209191027e+02
+   2.1263870112744726e+03  2.1520080341389726e+03  4.3663519512361027e+03 -7.3456213833770062e+02  2.6927285459244832e+01  6.3691834104928068e+02
 run_forces: ! |2
-    1 -2.0299419751359164e+01  2.6686193378823020e+02  3.2358785870694015e+02
-    2  1.5298617928491225e+02  1.2596516341409203e+02 -1.7961292655338619e+02
-    3 -1.3353630652439830e+02 -3.7923748696131315e+02 -1.4291839793625817e+02
-    4 -7.8374717836161762e+00  2.1276610789823409e+00 -5.5845014473820616e+00
-    5 -2.5014258630866735e+00 -4.0250131424704412e+00  1.2103512372025639e+01
-    6 -8.0681462887292457e+02  9.2165637136761688e+02  1.0270795806932783e+03
-    7  5.5780279349903523e+01 -3.1117530951561656e+02 -1.5746991292869018e+03
-    8  1.3452983055535049e+02 -1.0064659350255911e+02  3.8851791558207651e+02
-    9  7.6746213883425980e+01  8.2501469877402130e+01  3.3944351200617882e+02
-   10  5.2128033527695595e+02 -5.9920098848285863e+02 -1.8126029815043339e+02
-   11 -2.3573118090915246e+00 -5.8616944550888359e+00 -9.6049808811326205e+00
-   12  1.7503975847822900e+01  1.0626930310560814e+01 -8.0603160272054968e+00
-   13  8.0530313322973104e+00 -3.1756495170399117e+00 -1.4618315664740528e-01
-   14 -3.3416065168069773e+00  6.6492606336082150e-01 -8.6345131440469700e+00
-   15 -2.2253843262374914e-01  8.5025661635348779e+00  3.0369735873081622e+00
-   16  4.3476311264989465e+02 -3.1171086735551415e+02 -1.1135217194927448e+03
-   17 -4.2469846140777133e+02  2.9615411776780593e+02  1.1302573488400669e+03
-   18 -1.8849981672825908e-02 -3.3371636477421307e-02  3.0986293443778727e-02
-   19  3.0940277774414027e-04 -2.4634536455373044e-04  1.7433360008861016e-03
-   20 -9.8648131277150790e-04 -1.0112587134526946e-03  3.6932948773965417e-04
-   21 -7.0490745283106378e+01 -7.9749153581142139e+01  2.2171003384646431e+02
-   22 -1.0638717908920071e+02 -2.5949502163177968e+01 -1.6645589526812276e+02
-   23  1.7686797710735027e+02  1.0571018898885514e+02 -5.5243337084099387e+01
-   24  3.8206017656281375e+01 -2.1022820141992960e+02  1.1260711266189014e+02
-   25 -1.4918881473530880e+02  2.3762151395876508e+01 -1.2549188139143085e+02
-   26  1.1097059498808308e+02  1.8645503634228518e+02  1.2861559677865248e+01
-   27  5.0800844984832125e+01 -2.2296588090685469e+02  8.8607367716323253e+01
-   28 -1.7694190504288886e+02  7.6029945485182026e+01 -1.1950518150242071e+02
-   29  1.2614894925528141e+02  1.4694250820033548e+02  3.0893386672863034e+01
+    1 -2.0326040164905073e+01  2.6687684422507328e+02  3.2360752654223910e+02
+    2  1.5298608857690186e+02  1.2596506573447739e+02 -1.7961281277841888e+02
+    3 -1.3353631293077220e+02 -3.7923732277833739e+02 -1.4291833260989750e+02
+    4 -7.8374717116975035e+00  2.1276610267113969e+00 -5.5845014524498486e+00
+    5 -2.5014258756924157e+00 -4.0250131713717776e+00  1.2103512280982228e+01
+    6 -8.0714971444536457e+02  9.2203068890526424e+02  1.0274502514782534e+03
+    7  6.3722543724608350e+01 -3.1586173092061807e+02 -1.5580743968587681e+03
+    8  1.2737293861904031e+02 -9.6945064279519002e+01  3.7231518354375891e+02
+    9  7.6709940036396304e+01  8.2451980339096536e+01  3.3926849385746954e+02
+   10  5.2123408713149831e+02 -5.9914309504622599e+02 -1.8121478407355445e+02
+   11 -2.3573086824741427e+00 -5.8616969504300931e+00 -9.6049799947287671e+00
+   12  1.7504108236707797e+01  1.0626901299509713e+01 -8.0602444903747301e+00
+   13  8.0530313558451159e+00 -3.1756495145404533e+00 -1.4618321144421534e-01
+   14 -3.3416062225209915e+00  6.6492609500227240e-01 -8.6345136470911594e+00
+   15 -2.2253820242887132e-01  8.5025660110994483e+00  3.0369741645942137e+00
+   16  4.3476708820318731e+02 -3.1171425443331651e+02 -1.1135289618967258e+03
+   17 -4.2507048343681140e+02  2.9671384825884064e+02  1.1296230654445915e+03
+   18  8.0130752607770750e-03 -2.4895867517657545e-03 -1.0574351684568857e-02
+   19  3.0939970262803125e-04 -2.4635874092791046e-04  1.7433490521479268e-03
+   20 -9.8648319666298735e-04 -1.0112621691758337e-03  3.6933139856766442e-04
+   21 -7.0490745298133859e+01 -7.9749153568373742e+01  2.2171003384665224e+02
+   22 -1.0638717908973166e+02 -2.5949502162671845e+01 -1.6645589526807785e+02
+   23  1.7686797710711278e+02  1.0571018898899243e+02 -5.5243337084327727e+01
+   24  3.8206017659583978e+01 -2.1022820135505594e+02  1.1260711269986750e+02
+   25 -1.4918881473631544e+02  2.3762151403215309e+01 -1.2549188138812220e+02
+   26  1.1097059498835199e+02  1.8645503634383900e+02  1.2861559678659969e+01
+   27  5.0800844960383969e+01 -2.2296588092255456e+02  8.8607367714616288e+01
+   28 -1.7694190504410764e+02  7.6029945484553380e+01 -1.1950518150262033e+02
+   29  1.2614894924957088e+02  1.4694250819500266e+02  3.0893386676150566e+01
 ...
diff --git a/unittest/force-styles/tests/mol-pair-lj_cut_coul_cut_soft.yaml b/unittest/force-styles/tests/mol-pair-lj_cut_coul_cut_soft.yaml
index e242a56029..485730531f 100644
--- a/unittest/force-styles/tests/mol-pair-lj_cut_coul_cut_soft.yaml
+++ b/unittest/force-styles/tests/mol-pair-lj_cut_coul_cut_soft.yaml
@@ -1,7 +1,7 @@
 ---
 lammps_version: 17 Feb 2022
 date_generated: Fri Mar 18 22:17:31 2022
-epsilon: 2e-13
+epsilon: 5e-13
 skip_tests:
 prerequisites: ! |
   atom full
diff --git a/unittest/force-styles/tests/mol-pair-lj_cut_coul_long_soft.yaml b/unittest/force-styles/tests/mol-pair-lj_cut_coul_long_soft.yaml
index 8eca065092..a1e89e54c0 100644
--- a/unittest/force-styles/tests/mol-pair-lj_cut_coul_long_soft.yaml
+++ b/unittest/force-styles/tests/mol-pair-lj_cut_coul_long_soft.yaml
@@ -1,7 +1,7 @@
 ---
 lammps_version: 17 Feb 2022
 date_generated: Fri Mar 18 22:17:31 2022
-epsilon: 5e-12
+epsilon: 7.5e-12
 skip_tests:
 prerequisites: ! |
   atom full
diff --git a/unittest/formats/test_atom_styles.cpp b/unittest/formats/test_atom_styles.cpp
index 4ad571f9fa..0a36ad8c0d 100644
--- a/unittest/formats/test_atom_styles.cpp
+++ b/unittest/formats/test_atom_styles.cpp
@@ -156,7 +156,6 @@ struct AtomState {
     int extra_dihedral_per_atom = 0;
     int extra_improper_per_atom = 0;
 
-    int sphere_flag                  = 0;
     int ellipsoid_flag               = 0;
     int line_flag                    = 0;
     int tri_flag                     = 0;
@@ -293,7 +292,6 @@ void ASSERT_ATOM_STATE_EQ(Atom *atom, const AtomState &expected)
     ASSERT_EQ(atom->extra_dihedral_per_atom, expected.extra_dihedral_per_atom);
     ASSERT_EQ(atom->extra_improper_per_atom, expected.extra_improper_per_atom);
 
-    ASSERT_EQ(atom->sphere_flag, expected.sphere_flag);
     ASSERT_EQ(atom->ellipsoid_flag, expected.ellipsoid_flag);
     ASSERT_EQ(atom->line_flag, expected.line_flag);
     ASSERT_EQ(atom->tri_flag, expected.tri_flag);
@@ -882,7 +880,6 @@ TEST_F(AtomStyleTest, sphere)
     expected.atom_style  = "sphere";
     expected.molecular   = Atom::ATOMIC;
     expected.tag_enable  = 1;
-    expected.sphere_flag = 1;
     expected.rmass_flag  = 1;
     expected.radius_flag = 1;
     expected.omega_flag  = 1;
@@ -1391,7 +1388,6 @@ TEST_F(AtomStyleTest, line)
     expected.atom_style    = "line";
     expected.molecular     = Atom::ATOMIC;
     expected.tag_enable    = 1;
-    expected.sphere_flag   = 1;
     expected.molecule_flag = 1;
     expected.line_flag     = 1;
     expected.rmass_flag    = 1;
@@ -1661,7 +1657,6 @@ TEST_F(AtomStyleTest, tri)
     expected.atom_style    = "tri";
     expected.molecular     = Atom::ATOMIC;
     expected.tag_enable    = 1;
-    expected.sphere_flag   = 1;
     expected.molecule_flag = 1;
     expected.tri_flag      = 1;
     expected.rmass_flag    = 1;
diff --git a/unittest/formats/test_input_convert.cpp b/unittest/formats/test_input_convert.cpp
index 858275a76f..78a78d08df 100644
--- a/unittest/formats/test_input_convert.cpp
+++ b/unittest/formats/test_input_convert.cpp
@@ -117,6 +117,17 @@ TEST_F(InputConvertTest, numeric)
     TEST_FAILURE(".*ERROR: Expected floating point.*", utils::numeric(FLERR, nullptr, false, lmp););
     TEST_FAILURE(".*ERROR: Expected floating point.*",
                  utils::numeric(FLERR, "2.56D+3", false, lmp););
+    TEST_FAILURE(".*ERROR: Floating point number.*out of range.*",
+                 utils::numeric(FLERR, "1.0e2000", false, lmp););
+    TEST_FAILURE(".*ERROR: Expected floating .*", utils::numeric(FLERR, "--546700-", false, lmp););
+    TEST_FAILURE(".*ERROR: Expected floating.*", utils::numeric(FLERR, "546700+", false, lmp););
+    TEST_FAILURE(".*ERROR: Expected floating.*", utils::numeric(FLERR, "--546700", false, lmp););
+    TEST_FAILURE(".*ERROR: Expected floating.*", utils::numeric(FLERR, "++546700", false, lmp););
+    TEST_FAILURE(".*ERROR: Expected floating.*", utils::numeric(FLERR, "+-546700", false, lmp););
+    TEST_FAILURE(".*ERROR: Expected floating .*", utils::numeric(FLERR, "5.467e--1", false, lmp););
+    TEST_FAILURE(".*ERROR: Expected floating.*", utils::numeric(FLERR, "4.4e++1", false, lmp););
+    TEST_FAILURE(".*ERROR: Expected floating.*", utils::numeric(FLERR, "--5.0460", false, lmp););
+    TEST_FAILURE(".*ERROR: Expected floating.*", utils::numeric(FLERR, "++5.4670", false, lmp););
 }
 
 TEST_F(InputConvertTest, inumeric)
@@ -142,6 +153,13 @@ TEST_F(InputConvertTest, inumeric)
     TEST_FAILURE(".*ERROR: Expected integer.*", utils::inumeric(FLERR, "0x05", false, lmp););
     TEST_FAILURE(".*ERROR: Expected integer.*", utils::inumeric(FLERR, "", false, lmp););
     TEST_FAILURE(".*ERROR: Expected integer.*", utils::inumeric(FLERR, nullptr, false, lmp););
+    TEST_FAILURE(".*ERROR: Integer.*out of range.*",
+                 utils::inumeric(FLERR, "1263012546700", false, lmp););
+    TEST_FAILURE(".*ERROR: Expected integer.*", utils::inumeric(FLERR, "--546700-", false, lmp););
+    TEST_FAILURE(".*ERROR: Expected integer.*", utils::inumeric(FLERR, "546700+", false, lmp););
+    TEST_FAILURE(".*ERROR: Expected integer.*", utils::inumeric(FLERR, "--546700", false, lmp););
+    TEST_FAILURE(".*ERROR: Expected integer.*", utils::inumeric(FLERR, "++546700", false, lmp););
+    TEST_FAILURE(".*ERROR: Expected integer.*", utils::inumeric(FLERR, "+-546700", false, lmp););
 }
 
 TEST_F(InputConvertTest, bnumeric)
@@ -167,6 +185,13 @@ TEST_F(InputConvertTest, bnumeric)
     TEST_FAILURE(".*ERROR: Expected integer.*", utils::bnumeric(FLERR, "0x05", false, lmp););
     TEST_FAILURE(".*ERROR: Expected integer.*", utils::bnumeric(FLERR, "", false, lmp););
     TEST_FAILURE(".*ERROR: Expected integer.*", utils::bnumeric(FLERR, nullptr, false, lmp););
+    TEST_FAILURE(".*ERROR: Integer.*out of range.*",
+                 utils::bnumeric(FLERR, "18446744073709551616123", false, lmp););
+    TEST_FAILURE(".*ERROR: Expected integer.*", utils::bnumeric(FLERR, "--546700-", false, lmp););
+    TEST_FAILURE(".*ERROR: Expected integer.*", utils::bnumeric(FLERR, "546700+", false, lmp););
+    TEST_FAILURE(".*ERROR: Expected integer.*", utils::bnumeric(FLERR, "--546700", false, lmp););
+    TEST_FAILURE(".*ERROR: Expected integer.*", utils::bnumeric(FLERR, "++546700", false, lmp););
+    TEST_FAILURE(".*ERROR: Expected integer.*", utils::bnumeric(FLERR, "+-546700", false, lmp););
 }
 
 TEST_F(InputConvertTest, tnumeric)
@@ -192,6 +217,17 @@ TEST_F(InputConvertTest, tnumeric)
     TEST_FAILURE(".*ERROR: Expected integer.*", utils::tnumeric(FLERR, "0x05", false, lmp););
     TEST_FAILURE(".*ERROR: Expected integer.*", utils::tnumeric(FLERR, "", false, lmp););
     TEST_FAILURE(".*ERROR: Expected integer.*", utils::tnumeric(FLERR, nullptr, false, lmp););
+#if defined(LAMMPS_SMALLBIG)
+    TEST_FAILURE(".*ERROR: Integer.*out of range.*",
+                 utils::tnumeric(FLERR, "4294967296", false, lmp););
+#endif
+    TEST_FAILURE(".*ERROR: Integer.*out of range.*",
+                 utils::tnumeric(FLERR, "18446744073709551616123", false, lmp););
+    TEST_FAILURE(".*ERROR: Expected integer.*", utils::tnumeric(FLERR, "--546700-", false, lmp););
+    TEST_FAILURE(".*ERROR: Expected integer.*", utils::tnumeric(FLERR, "546700+", false, lmp););
+    TEST_FAILURE(".*ERROR: Expected integer.*", utils::tnumeric(FLERR, "--546700", false, lmp););
+    TEST_FAILURE(".*ERROR: Expected integer.*", utils::tnumeric(FLERR, "++546700", false, lmp););
+    TEST_FAILURE(".*ERROR: Expected integer.*", utils::tnumeric(FLERR, "+-546700", false, lmp););
 }
 
 } // namespace LAMMPS_NS
diff --git a/unittest/formats/test_molecule_file.cpp b/unittest/formats/test_molecule_file.cpp
index 8fe1fc1eab..c798d2f4c2 100644
--- a/unittest/formats/test_molecule_file.cpp
+++ b/unittest/formats/test_molecule_file.cpp
@@ -32,6 +32,8 @@ using testing::StrEq;
 
 using utils::split_words;
 
+const double EPSILON = 5.0e-14;
+
 #define test_name test_info_->name()
 
 static void create_molecule_files(const std::string &h2o_filename, const std::string &co2_filename)
@@ -145,7 +147,7 @@ protected:
         fclose(fp);
 
         command(fmt::format("molecule {} {} {}", name, file, args));
-        remove(file.c_str());
+        platform::unlink(file.c_str());
     }
 };
 
@@ -184,7 +186,7 @@ TEST_F(MoleculeFileTest, badargs)
     TEST_FAILURE(
         ".*Illegal molecule command.*",
         run_mol_cmd(test_name, "scale", "Comment\n1 atoms\n\n Coords\n\n 1 0.0 0.0 0.0\n"););
-    remove("badargs.mol");
+    platform::unlink("moltest_badargs.mol");
 }
 
 TEST_F(MoleculeFileTest, noatom)
@@ -193,14 +195,14 @@ TEST_F(MoleculeFileTest, noatom)
                  run_mol_cmd(test_name, "",
                              "Comment\n0 atoms\n1 bonds\n\n"
                              " Coords\n\nBonds\n\n 1 1 2\n"););
-    remove("noatom.mol");
+    platform::unlink("moltest_noatom.mol");
 }
 
 TEST_F(MoleculeFileTest, empty)
 {
     TEST_FAILURE(".*ERROR: Unexpected end of molecule file.*",
                  run_mol_cmd(test_name, "", "Comment\n\n"););
-    remove("empty.mol");
+    platform::unlink("moltest_empty.mol");
 }
 
 TEST_F(MoleculeFileTest, nospecial)
@@ -210,7 +212,7 @@ TEST_F(MoleculeFileTest, nospecial)
                              "Comment\n3 atoms\n\n2 bonds\n\n"
                              " Coords\n\n 1 1.0 1.0 1.0\n 2 1.0 1.0 0.0\n 3 1.0 0.0 1.0\n"
                              " Bonds\n\n 1 1 1 2\n 2 1 1 3\n"););
-    remove("nospecial.mol");
+    platform::unlink("moltest_nospecial.mol");
 }
 
 TEST_F(MoleculeFileTest, minimal)
@@ -218,7 +220,7 @@ TEST_F(MoleculeFileTest, minimal)
     BEGIN_CAPTURE_OUTPUT();
     run_mol_cmd(test_name, "", "Comment\n1 atoms\n\n Coords\n\n 1 0.0 0.0 0.0\n");
     auto output = END_CAPTURE_OUTPUT();
-    ASSERT_THAT(output, ContainsRegex(".*Read molecule template.*\n.*1 molecules.*\n"
+    ASSERT_THAT(output, ContainsRegex(".*Read molecule template.*\n.*Comment.*\n.*1 molecules.*\n"
                                       ".*0 fragments.*\n.*1 atoms.*\n.*0 bonds.*"));
 }
 
@@ -230,7 +232,7 @@ TEST_F(MoleculeFileTest, notype)
     command("create_box 1 box");
     run_mol_cmd(test_name, "", "Comment\n1 atoms\n\n Coords\n\n 1 0.0 0.0 0.0\n");
     auto output = END_CAPTURE_OUTPUT();
-    ASSERT_THAT(output, ContainsRegex(".*Read molecule template.*\n.*1 molecules.*\n"
+    ASSERT_THAT(output, ContainsRegex(".*Read molecule template.*\n.*Comment.*\n.*1 molecules.*\n"
                                       ".*0 fragments.*\n.*1 atoms.*\n.*0 bonds.*"));
     TEST_FAILURE(".*ERROR: Create_atoms molecule must have atom types.*",
                  command("create_atoms 0 single 0.0 0.0 0.0 mol notype 542465"););
@@ -259,7 +261,7 @@ TEST_F(MoleculeFileTest, twomols)
                 " Coords\n\n 1 0.0 0.0 0.0\n 2 0.0 0.0 1.0\n"
                 " Molecules\n\n 1 1\n 2 2\n\n Types\n\n 1 1\n 2 2\n\n");
     auto output = END_CAPTURE_OUTPUT();
-    ASSERT_THAT(output, ContainsRegex(".*Read molecule template.*\n.*2 molecules.*\n"
+    ASSERT_THAT(output, ContainsRegex(".*Read molecule template.*\n.*Comment.*\n.*2 molecules.*\n"
                                       ".*0 fragments.*\n.*2 atoms with max type 2.*\n.*0 bonds.*"));
     ASSERT_EQ(lmp->atom->nmolecule, 1);
     auto mols = lmp->atom->get_molecule_by_id(test_name);
@@ -273,10 +275,10 @@ TEST_F(MoleculeFileTest, twofiles)
     auto output = END_CAPTURE_OUTPUT();
     ASSERT_THAT(
         output,
-        ContainsRegex(".*Read molecule template twomols:.*\n.*1 molecules.*\n"
+        ContainsRegex(".*Read molecule template twomols:.*\n.*Water.*\n.*1 molecules.*\n"
                       ".*0 fragments.*\n.*3 atoms with max type 2.*\n.*2 bonds with max type 1.*\n"
                       ".*1 angles with max type 1.*\n.*0 dihedrals.*\n.*0 impropers.*\n"
-                      ".*Read molecule template twomols:.*\n.*1 molecules.*\n"
+                      ".*Read molecule template twomols:.*\n.*CO2.*\n.*1 molecules.*\n"
                       ".*0 fragments.*\n.*3 atoms with max type 4.*\n.*2 bonds with max type 2.*\n"
                       ".*1 angles with max type 2.*\n.*0 dihedrals.*"));
     BEGIN_CAPTURE_OUTPUT();
@@ -306,7 +308,7 @@ TEST_F(MoleculeFileTest, labelmap)
     auto output = END_CAPTURE_OUTPUT();
     ASSERT_THAT(
         output,
-        ContainsRegex(".*Read molecule template h2olabel:.*\n.*1 molecules.*\n"
+        ContainsRegex(".*Read molecule template h2olabel:.*\n.*Water.*\n.*1 molecules.*\n"
                       ".*0 fragments.*\n.*3 atoms with max type 2.*\n.*2 bonds with max type 1.*\n"
                       ".*1 angles with max type 1.*\n.*0 dihedrals.*\n.*0 impropers.*"));
     BEGIN_CAPTURE_OUTPUT();
@@ -314,7 +316,7 @@ TEST_F(MoleculeFileTest, labelmap)
     output = END_CAPTURE_OUTPUT();
     ASSERT_THAT(
         output,
-        ContainsRegex(".*Read molecule template co2label:.*\n.*1 molecules.*\n"
+        ContainsRegex(".*Read molecule template co2label:.*\n.*CO2.*\n.*1 molecules.*\n"
                       ".*0 fragments.*\n.*3 atoms with max type 4.*\n.*2 bonds with max type 2.*\n"
                       ".*1 angles with max type 2.*\n.*0 dihedrals.*"));
     BEGIN_CAPTURE_OUTPUT();
@@ -328,12 +330,12 @@ TEST_F(MoleculeFileTest, labelmap)
     auto second = output.substr(mark);
     ASSERT_THAT(
         first,
-        ContainsRegex(".*Read molecule template h2onum:.*\n.*1 molecules.*\n"
+        ContainsRegex(".*Read molecule template h2onum:.*\n.*Water.*\n.*1 molecules.*\n"
                       ".*0 fragments.*\n.*3 atoms with max type 2.*\n.*2 bonds with max type 1.*\n"
                       ".*1 angles with max type 1.*\n.*0 dihedrals.*\n.*0 impropers.*\n"));
     ASSERT_THAT(
         second,
-        ContainsRegex(".*Read molecule template co2num:.*\n.*1 molecules.*\n"
+        ContainsRegex(".*Read molecule template co2num:.*\n.*CO2.*\n.*1 molecules.*\n"
                       ".*0 fragments.*\n.*3 atoms with max type 4.*\n.*2 bonds with max type 2.*\n"
                       ".*1 angles with max type 2.*\n.*0 dihedrals.*"));
     ASSERT_EQ(lmp->atom->nmolecule, 4);
@@ -379,7 +381,7 @@ TEST_F(MoleculeFileTest, bonds)
                 " 1 1 1 2\n"
                 " 2 2 1 3\n\n");
     auto output = END_CAPTURE_OUTPUT();
-    ASSERT_THAT(output, ContainsRegex(".*Read molecule template.*\n.*1 molecules.*\n"
+    ASSERT_THAT(output, ContainsRegex(".*Read molecule template.*\n.*Comment.*\n.*1 molecules.*\n"
                                       ".*0 fragments.*\n.*4 atoms.*type.*2.*\n"
                                       ".*2 bonds.*type.*2.*\n.*0 angles.*"));
 
@@ -404,6 +406,60 @@ TEST_F(MoleculeFileTest, bonds)
     END_HIDE_OUTPUT();
 }
 
+TEST_F(MoleculeFileTest, dipoles)
+{
+    if (!LAMMPS::is_installed_pkg("DIPOLE")) GTEST_SKIP();
+    BEGIN_CAPTURE_OUTPUT();
+    command("atom_style dipole");
+    command("region box block 0 1 0 1 0 1");
+    command("create_box 2 box");
+    run_mol_cmd(test_name, "",
+                "# Dumbbell with dipole molecule file.\n\n"
+                "2 atoms\n\n"
+                "Coords\n\n1 -1.0 0.0 0.0\n2  1.0 0.0 0.0\n\n"
+                "Types\n\n1 1\n2 2\n\n"
+                "Dipoles\n\n1 1.0 0.0 0.0\n2 1.0 1.0 0.0\n\n");
+    auto output = END_CAPTURE_OUTPUT();
+    ASSERT_THAT(output, ContainsRegex(".*Read molecule template.*\n.*Dumbbell.*\n.*1 molecules.*\n"
+                                      ".*0 fragments.*\n.*2 atoms.*type.*2.*\n"));
+
+    BEGIN_CAPTURE_OUTPUT();
+    command("mass * 1.0");
+    command("create_atoms 0 single 0.5 0.5 0.5 mol dipoles 67235 rotate 90.0 0.0 0.0 1.0");
+    output = END_CAPTURE_OUTPUT();
+    ASSERT_THAT(output, ContainsRegex(".*Created 2 atoms.*"));
+
+    Molecule *mol = lmp->atom->molecules[0];
+    ASSERT_EQ(mol->natoms, 2);
+    ASSERT_EQ(lmp->atom->natoms, 2);
+    mol->compute_mass();
+    mol->compute_com();
+    EXPECT_NEAR(mol->masstotal, 2.0, EPSILON);
+    EXPECT_NEAR(mol->com[0], 0.0, EPSILON);
+    EXPECT_NEAR(mol->com[1], 0.0, EPSILON);
+    EXPECT_NEAR(mol->com[2], 0.0, EPSILON);
+    EXPECT_EQ(mol->comatom, 1);
+    ASSERT_NE(mol->mu, nullptr);
+    EXPECT_NEAR(mol->mu[0][0], 1.0, EPSILON);
+    EXPECT_NEAR(mol->mu[0][1], 0.0, EPSILON);
+    EXPECT_NEAR(mol->mu[0][2], 0.0, EPSILON);
+    EXPECT_NEAR(mol->mu[1][0], 1.0, EPSILON);
+    EXPECT_NEAR(mol->mu[1][1], 1.0, EPSILON);
+    EXPECT_NEAR(mol->mu[1][2], 0.0, EPSILON);
+    EXPECT_NEAR(mol->maxextent, 2.0, EPSILON);
+    // dipoles should be rotated by 90 degrees clockwise around the z axis
+    double **mu = lmp->atom->mu;
+    ASSERT_NE(mu, nullptr);
+    EXPECT_NEAR(mu[0][0], 0.0, EPSILON);
+    EXPECT_NEAR(mu[0][1], 1.0, EPSILON);
+    EXPECT_NEAR(mu[0][2], 0.0, EPSILON);
+    EXPECT_NEAR(mu[0][3], 1.0, EPSILON);
+    EXPECT_NEAR(mu[1][0], -1.0, EPSILON);
+    EXPECT_NEAR(mu[1][1], 1.0, EPSILON);
+    EXPECT_NEAR(mu[1][2], 0.0, EPSILON);
+    EXPECT_NEAR(mu[1][3], sqrt(2.0), EPSILON);
+}
+
 int main(int argc, char **argv)
 {
     MPI_Init(&argc, &argv);
diff --git a/unittest/fortran/test_fortran_extract_variable.f90 b/unittest/fortran/test_fortran_extract_variable.f90
index b4b5ab8727..619962daf9 100644
--- a/unittest/fortran/test_fortran_extract_variable.f90
+++ b/unittest/fortran/test_fortran_extract_variable.f90
@@ -361,15 +361,23 @@ FUNCTION f_lammps_extract_variable_vector(i) BIND(C)
   f_lammps_extract_variable_vector = vector(i)
 END FUNCTION f_lammps_extract_variable_vector
 
-SUBROUTINE f_lammps_set_variable_string() BIND(C)
-  USE, INTRINSIC :: ISO_C_BINDING, ONLY : c_double, c_int
+SUBROUTINE f_lammps_set_string_variable() BIND(C)
   USE LIBLAMMPS
   USE keepstuff, ONLY : lmp, f2c_string
   IMPLICIT NONE
   CHARACTER(LEN=40) :: string
 
   string = "this is the new string"
-  CALL lmp%set_variable('str', string)
-END SUBROUTINE f_lammps_set_variable_string
+  CALL lmp%set_string_variable('str', string)
+END SUBROUTINE f_lammps_set_string_variable
+
+SUBROUTINE f_lammps_set_internal_variable() BIND(C)
+  USE, INTRINSIC :: ISO_C_BINDING, ONLY : c_double
+  USE LIBLAMMPS
+  USE keepstuff, ONLY : lmp, f2c_string
+  IMPLICIT NONE
+
+  CALL lmp%set_internal_variable('int', -2.5_c_double)
+END SUBROUTINE f_lammps_set_internal_variable
 
 ! vim: sts=2 ts=2 sw=2 et
diff --git a/unittest/fortran/wrap_configuration.cpp b/unittest/fortran/wrap_configuration.cpp
index 5fb744086e..08974d8a08 100644
--- a/unittest/fortran/wrap_configuration.cpp
+++ b/unittest/fortran/wrap_configuration.cpp
@@ -234,7 +234,7 @@ TEST_F(LAMMPS_configuration, style_count)
 {
     Info info(lmp);
     for (const auto &c : style_category)
-        EXPECT_EQ(f_lammps_style_count(c.c_str()), info.get_available_styles(c.c_str()).size());
+        EXPECT_EQ(f_lammps_style_count(c.c_str()), info.get_available_styles(c).size());
 };
 
 TEST_F(LAMMPS_configuration, style_name)
diff --git a/unittest/fortran/wrap_extract_variable.cpp b/unittest/fortran/wrap_extract_variable.cpp
index 8c81897c0a..b1f79e149a 100644
--- a/unittest/fortran/wrap_extract_variable.cpp
+++ b/unittest/fortran/wrap_extract_variable.cpp
@@ -43,7 +43,9 @@ double f_lammps_extract_variable_internal();
 double f_lammps_extract_variable_equal();
 double f_lammps_extract_variable_atom(int);
 double f_lammps_extract_variable_vector(int);
-void f_lammps_set_variable_string();
+void f_lammps_set_string_variable();
+void f_lammps_set_internal_variable();
+
 char *c_path_join(const char *, const char *);
 }
 
@@ -155,7 +157,7 @@ TEST_F(LAMMPS_extract_variable, string)
     char *fstr = f_lammps_extract_variable_string();
     EXPECT_STREQ(fstr, "this is a string");
     std::free(fstr);
-    f_lammps_set_variable_string();
+    f_lammps_set_string_variable();
     fstr = f_lammps_extract_variable_string();
     EXPECT_STREQ(fstr, "this is the new string");
     std::free(fstr);
@@ -254,6 +256,8 @@ TEST_F(LAMMPS_extract_variable, internal)
 {
     f_lammps_setup_extract_variable();
     EXPECT_DOUBLE_EQ(f_lammps_extract_variable_internal(), 4.0);
+    f_lammps_set_internal_variable();
+    EXPECT_DOUBLE_EQ(f_lammps_extract_variable_internal(), -2.5);
 };
 
 TEST_F(LAMMPS_extract_variable, equal)
diff --git a/unittest/python/CMakeLists.txt b/unittest/python/CMakeLists.txt
index b4ba281d93..f3b851620c 100644
--- a/unittest/python/CMakeLists.txt
+++ b/unittest/python/CMakeLists.txt
@@ -84,20 +84,26 @@ if(Python_EXECUTABLE)
            WORKING_DIRECTORY ${EXECUTABLE_OUTPUT_PATH})
   set_tests_properties(PythonCommands PROPERTIES ENVIRONMENT "${PYTHON_TEST_ENVIRONMENT}")
 
-  add_test(NAME PythonNumpy
-           COMMAND ${PYTHON_TEST_RUNNER} ${CMAKE_CURRENT_SOURCE_DIR}/python-numpy.py -v
-           WORKING_DIRECTORY ${EXECUTABLE_OUTPUT_PATH})
-  set_tests_properties(PythonNumpy PROPERTIES ENVIRONMENT "${PYTHON_TEST_ENVIRONMENT}")
+  # randomly failing on macOS with python 3.12
+  if(NOT APPLE)
+    add_test(NAME PythonNumpy
+             COMMAND ${PYTHON_TEST_RUNNER} ${CMAKE_CURRENT_SOURCE_DIR}/python-numpy.py -v
+             WORKING_DIRECTORY ${EXECUTABLE_OUTPUT_PATH})
+    set_tests_properties(PythonNumpy PROPERTIES ENVIRONMENT "${PYTHON_TEST_ENVIRONMENT}")
+  endif()
 
   add_test(NAME PythonCapabilities
            COMMAND ${PYTHON_TEST_RUNNER} ${CMAKE_CURRENT_SOURCE_DIR}/python-capabilities.py -v
            WORKING_DIRECTORY ${EXECUTABLE_OUTPUT_PATH})
   set_tests_properties(PythonCapabilities PROPERTIES ENVIRONMENT "${PYTHON_TEST_ENVIRONMENT}")
 
-  add_test(NAME PythonPyLammps
-           COMMAND ${PYTHON_TEST_RUNNER} ${CMAKE_CURRENT_SOURCE_DIR}/python-pylammps.py -v
-           WORKING_DIRECTORY ${EXECUTABLE_OUTPUT_PATH})
-  set_tests_properties(PythonPyLammps PROPERTIES ENVIRONMENT "${PYTHON_TEST_ENVIRONMENT}")
+  # randomly failing on macOS with python 3.12
+  if(NOT APPLE)
+    add_test(NAME PythonPyLammps
+             COMMAND ${PYTHON_TEST_RUNNER} ${CMAKE_CURRENT_SOURCE_DIR}/python-pylammps.py -v
+             WORKING_DIRECTORY ${EXECUTABLE_OUTPUT_PATH})
+    set_tests_properties(PythonPyLammps PROPERTIES ENVIRONMENT "${PYTHON_TEST_ENVIRONMENT}")
+  endif()
 
   add_test(NAME PythonFormats
            COMMAND ${PYTHON_TEST_RUNNER} ${CMAKE_CURRENT_SOURCE_DIR}/python-formats.py -v
diff --git a/unittest/python/python-commands.py b/unittest/python/python-commands.py
index 1c25751191..c9f16f6a71 100644
--- a/unittest/python/python-commands.py
+++ b/unittest/python/python-commands.py
@@ -475,6 +475,26 @@ create_atoms 1 single &
         a = self.lmp.extract_variable("a")
         self.assertEqual(a, 3.14)
 
+    def test_extract_variable_stringstyle(self):
+        self.lmp.command("variable a string xxx")
+        a = self.lmp.extract_variable("a")
+        self.assertEqual(a, 'xxx')
+
+        rv = self.lmp.set_string_variable("a","20")
+        a = self.lmp.extract_variable("a")
+        self.assertEqual(a, '20')
+        self.assertEqual(rv, 0)
+
+    def test_extract_variable_internalstyle(self):
+        self.lmp.command("variable a internal 2.0")
+        a = self.lmp.extract_variable("a")
+        self.assertEqual(a, 2.0)
+
+        rv = self.lmp.set_internal_variable("a",-4.5)
+        a = self.lmp.extract_variable("a")
+        self.assertEqual(a, -4.5)
+        self.assertEqual(rv, 0)
+
     def test_extract_variable_atomstyle(self):
         self.lmp.command("units lj")
         self.lmp.command("atom_style atomic")
diff --git a/unittest/utils/test_lepton.cpp b/unittest/utils/test_lepton.cpp
index 7b2c86f05f..55d3bf8351 100644
--- a/unittest/utils/test_lepton.cpp
+++ b/unittest/utils/test_lepton.cpp
@@ -129,9 +129,9 @@ TEST(LeptonCustomFunction, zbl)
  */
 
 class ExampleFunction : public Lepton::CustomFunction {
-    int getNumArguments() const { return 2; }
-    double evaluate(const double *arguments) const { return 2.0 * arguments[0] * arguments[1]; }
-    double evaluateDerivative(const double *arguments, const int *derivOrder) const
+    int getNumArguments() const override { return 2; }
+    double evaluate(const double *arguments) const override { return 2.0 * arguments[0] * arguments[1]; }
+    double evaluateDerivative(const double *arguments, const int *derivOrder) const override
     {
         if (derivOrder[0] == 1) {
             if (derivOrder[1] == 0)
@@ -142,7 +142,7 @@ class ExampleFunction : public Lepton::CustomFunction {
         if (derivOrder[1] == 1 && derivOrder[0] == 0) return 2.0 * arguments[0];
         return 0.0;
     }
-    Lepton::CustomFunction *clone() const { return new ExampleFunction(); }
+    Lepton::CustomFunction *clone() const override { return new ExampleFunction(); }
 };
 
 /**
@@ -542,6 +542,41 @@ TEST(Lepton, Optimize)
     out.str("");
 }
 
+TEST(Lepton, Exception)
+{
+    Lepton::CompiledExpression function, derivative;
+
+    auto parsed = Lepton::Parser::parse("x*x");
+    function    = parsed.createCompiledExpression();
+    derivative  = parsed.differentiate("x").createCompiledExpression();
+
+    double x = 1.5;
+    EXPECT_NO_THROW(function.getVariableReference("x") = x;);
+    EXPECT_NO_THROW(derivative.getVariableReference("x") = x;);
+    EXPECT_DOUBLE_EQ(function.evaluate(), 2.25);
+    EXPECT_DOUBLE_EQ(derivative.evaluate(), 3.0);
+
+    parsed     = Lepton::Parser::parse("x");
+    function   = parsed.createCompiledExpression();
+    derivative = parsed.differentiate("x").createCompiledExpression();
+
+    x = 2.5;
+    EXPECT_NO_THROW(function.getVariableReference("x") = x;);
+    EXPECT_THROW(derivative.getVariableReference("x") = x;, Lepton::Exception);
+    EXPECT_DOUBLE_EQ(function.evaluate(), 2.5);
+    EXPECT_DOUBLE_EQ(derivative.evaluate(), 1.0);
+
+    parsed     = Lepton::Parser::parse("1.0");
+    function   = parsed.createCompiledExpression();
+    derivative = parsed.differentiate("x").createCompiledExpression();
+
+    x = 0.5;
+    EXPECT_THROW(function.getVariableReference("x") = x;, Lepton::Exception);
+    EXPECT_THROW(derivative.getVariableReference("x") = x;, Lepton::Exception);
+    EXPECT_DOUBLE_EQ(function.evaluate(), 1.0);
+    EXPECT_DOUBLE_EQ(derivative.evaluate(), 0.0);
+}
+
 int main(int argc, char **argv)
 {
     MPI_Init(&argc, &argv);