Merge branch 'master' into multi-config-support

# Conflicts: # cmake/Modules/Packages/MSCG.cmake # examples/plugins/CMakeLists.txt
2021-10-11 17:03:41 -04:00
parent 342ca7ff1d a6cde11896
commit 510987dc80
412 changed files with 5927 additions and 3856 deletions
--- a/.github/workflows/compile-msvc.yml
+++ b/.github/workflows/compile-msvc.yml
@ -0,0 +1,33 @@
 # GitHub action to build LAMMPS on Windows with Visual C++
 name: "Native Windows Compilation"
 on:
  push:
    branches: [master]
 jobs:
  build:
    name: Windows Compilation Test
    if: ${{ github.repository == 'lammps/lammps' }}
    runs-on: windows-latest
    steps:
    - name: Checkout repository
      uses: actions/checkout@v2
      with:
        fetch-depth: 2
    - name: Building LAMMPS via CMake
      shell: bash
      run: |
        cmake -C cmake/presets/windows.cmake \
              -S cmake -B build \
              -D BUILD_SHARED_LIBS=on \
              -D LAMMPS_EXCEPTIONS=on
        cmake --build build --config Release
    - name: Run LAMMPS executable
      shell: bash
      run: |
        ./build/Release/lmp.exe -h
        ./build/Release/lmp.exe -in bench/in.lj
--- a/.gitignore
+++ b/.gitignore
@ -37,8 +37,8 @@ vgcore.*
 .Trashes
 ehthumbs.db
 Thumbs.db
 .clang-format
 .lammps_history
 .vs
 #cmake
 /build*
@ -49,3 +49,8 @@ Thumbs.db
 /Testing
 /cmake_install.cmake
 /lmp
 out/Debug
 out/RelWithDebInfo
 out/Release
 out/x86
 out/x64
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@ -82,21 +82,39 @@ include(CheckIncludeFileCXX)
 # set required compiler flags and compiler/CPU arch specific optimizations
 if((CMAKE_CXX_COMPILER_ID STREQUAL "Intel") OR (CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM"))
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -restrict")
+  if(CMAKE_SYSTEM_NAME STREQUAL "Windows")
-  if(CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.3 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.4)
+    if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
-    set(CMAKE_TUNE_DEFAULT "-xCOMMON-AVX512")
+      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qrestrict")
    endif()
    if(CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.3 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.4)
      set(CMAKE_TUNE_DEFAULT "/QxCOMMON-AVX512")
    else()
      set(CMAKE_TUNE_DEFAULT "/QxHost")
    endif()
  else()
-    set(CMAKE_TUNE_DEFAULT "-xHost")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -restrict")
    if(CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.3 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.4)
      set(CMAKE_TUNE_DEFAULT "-xCOMMON-AVX512")
    else()
      set(CMAKE_TUNE_DEFAULT "-xHost")
    endif()
  endif()
 endif()
-# we require C++11 without extensions
+# we require C++11 without extensions. Kokkos requires at least C++14 (currently)
 set(CMAKE_CXX_STANDARD 11)
 if(PKG_KOKKOS AND (CMAKE_CXX_STANDARD LESS 14))
  set(CMAKE_CXX_STANDARD 14)
 endif()
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_EXTENSIONS OFF CACHE BOOL "Use compiler extensions")
-# ugly hack for MSVC which by default always reports an old C++ standard in the __cplusplus macro
+# ugly hacks for MSVC which by default always reports an old C++ standard in the __cplusplus macro
 # and prints lots of pointless warnings about "unsafe" functions
 if(MSVC)
  add_compile_options(/Zc:__cplusplus)
  add_compile_options(/wd4244)
  add_compile_options(/wd4267)
  add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
 endif()
 # export all symbols when building a .dll file on windows
@ -281,6 +299,11 @@ else()
  target_include_directories(mpi_stubs PUBLIC $<BUILD_INTERFACE:${LAMMPS_SOURCE_DIR}/STUBS>)
  if(BUILD_SHARED_LIBS)
    target_link_libraries(lammps PRIVATE mpi_stubs)
    if(MSVC)
      target_link_libraries(lmp PRIVATE mpi_stubs)
      target_include_directories(lmp INTERFACE $<BUILD_INTERFACE:${LAMMPS_SOURCE_DIR}/STUBS>)
      target_compile_definitions(lmp INTERFACE $<INSTALL_INTERFACE:LAMMPS_LIB_NO_MPI>)
    endif(MSVC)
    target_include_directories(lammps INTERFACE $<BUILD_INTERFACE:${LAMMPS_SOURCE_DIR}/STUBS>)
    target_compile_definitions(lammps INTERFACE $<INSTALL_INTERFACE:LAMMPS_LIB_NO_MPI>)
  else()
@ -468,9 +491,12 @@ foreach(HEADER cmath)
  endif(NOT FOUND_${HEADER})
 endforeach(HEADER)
-set(MATH_LIBRARIES "m" CACHE STRING "math library")
+# make the standard math library overrideable and autodetected (for systems that don't have it)
-mark_as_advanced( MATH_LIBRARIES )
+find_library(STANDARD_MATH_LIB m DOC "Standard Math library")
-target_link_libraries(lammps PRIVATE ${MATH_LIBRARIES})
+mark_as_advanced(STANDARD_MATH_LIB)
 if(STANDARD_MATH_LIB)
  target_link_libraries(lammps PRIVATE ${STANDARD_MATH_LIB})
 endif()
 ######################################
 # Generate Basic Style files
@ -608,7 +634,7 @@ endif()
 # and after everything else that is compiled locally
 ######################################################################
 if(CMAKE_SYSTEM_NAME STREQUAL "Windows")
-  target_link_libraries(lammps PRIVATE -lwsock32 -lpsapi)
+  target_link_libraries(lammps PRIVATE "wsock32;psapi")
 endif()
 ######################################################
--- a/cmake/CMakeSettings.json
+++ b/cmake/CMakeSettings.json
@ -0,0 +1,55 @@
 {
    "configurations": [
        {
            "name": "x64-Debug-MSVC",
            "generator": "Ninja",
            "configurationType": "Debug",
            "buildRoot": "${workspaceRoot}\\build\\${name}",
            "installRoot": "${workspaceRoot}\\install\\${name}",
            "cmakeCommandArgs": "-S ${workspaceRoot}\\cmake -C ${workspaceRoot}\\cmake\\presets\\windows.cmake",
            "buildCommandArgs": "",
            "ctestCommandArgs": "",
            "inheritEnvironments": [ "msvc_x64_x64" ],
            "variables": [
                {
                    "name": "BUILD_SHARED_LIBS",
                    "value": "True",
                    "type": "BOOL"
                },
                {
                    "name": "BUILD_TOOLS",
                    "value": "True",
                    "type": "BOOL"
                },
                {
                    "name": "LAMMPS_EXCEPTIONS",
                    "value": "True",
                    "type": "BOOL"
                }
            ]
        },
        {
            "name": "x64-Debug-Clang",
            "generator": "Ninja",
            "configurationType": "Debug",
            "buildRoot": "${workspaceRoot}\\build\\${name}",
            "installRoot": "${workspaceRoot}\\install\\${name}",
            "cmakeCommandArgs": "-S ${workspaceRoot}\\cmake -C ${workspaceRoot}\\cmake\\presets\\windows.cmake",
            "buildCommandArgs": "",
            "ctestCommandArgs": "",
            "inheritEnvironments": [ "clang_cl_x64" ],
            "variables": [
                {
                    "name": "BUILD_TOOLS",
                    "value": "True",
                    "type": "BOOL"
                },
                {
                    "name": "LAMMPS_EXCEPTIONS",
                    "value": "True",
                    "type": "BOOL"
                }
            ]
        }
    ]
 }
--- a/cmake/Modules/Packages/GPU.cmake
+++ b/cmake/Modules/Packages/GPU.cmake
@ -217,13 +217,20 @@ elseif(GPU_API STREQUAL "OPENCL")
 elseif(GPU_API STREQUAL "HIP")
  if(NOT DEFINED HIP_PATH)
      if(NOT DEFINED ENV{HIP_PATH})
-          set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed")
+          set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to HIP installation")
      else()
-          set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed")
+          set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to HIP installation")
      endif()
  endif()
-  set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
+  if(NOT DEFINED ROCM_PATH)
-  find_package(HIP REQUIRED)
+      if(NOT DEFINED ENV{ROCM_PATH})
          set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to ROCm installation")
      else()
          set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to ROCm installation")
      endif()
  endif()
  list(APPEND CMAKE_PREFIX_PATH ${HIP_PATH} ${ROCM_PATH})
  find_package(hip REQUIRED)
  option(HIP_USE_DEVICE_SORT "Use GPU sorting" ON)
  if(NOT DEFINED HIP_PLATFORM)
@ -325,10 +332,11 @@ elseif(GPU_API STREQUAL "HIP")
  set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "${LAMMPS_LIB_BINARY_DIR}/gpu/*_cubin.h ${LAMMPS_LIB_BINARY_DIR}/gpu/*.cu.cpp")
-  hip_add_library(gpu STATIC ${GPU_LIB_SOURCES})
+  add_library(gpu STATIC ${GPU_LIB_SOURCES})
  target_include_directories(gpu PRIVATE ${LAMMPS_LIB_BINARY_DIR}/gpu)
  target_compile_definitions(gpu PRIVATE -D_${GPU_PREC_SETTING} -DMPI_GERYON -DUCL_NO_EXIT)
  target_compile_definitions(gpu PRIVATE -DUSE_HIP)
  target_link_libraries(gpu PRIVATE hip::host)
  if(HIP_USE_DEVICE_SORT)
    # add hipCUB
@ -377,8 +385,9 @@ elseif(GPU_API STREQUAL "HIP")
    endif()
  endif()
-  hip_add_executable(hip_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp)
+  add_executable(hip_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp)
  target_compile_definitions(hip_get_devices PRIVATE -DUCL_HIP)
  target_link_libraries(hip_get_devices hip::host)
  if(HIP_PLATFORM STREQUAL "nvcc")
    target_compile_definitions(gpu PRIVATE -D__HIP_PLATFORM_NVCC__)
--- a/cmake/Modules/Packages/KOKKOS.cmake
+++ b/cmake/Modules/Packages/KOKKOS.cmake
@ -1,6 +1,8 @@
 ########################################################################
 # As of version 3.3.0 Kokkos requires C++14
-set(CMAKE_CXX_STANDARD 14)
+if(CMAKE_CXX_STANDARD LESS 14)
  message(FATAL_ERROR "The KOKKOS package requires the C++ standard to be set to at least C++14")
 endif()
 ########################################################################
 # consistency checks and Kokkos options/settings required by LAMMPS
 if(Kokkos_ENABLE_CUDA)
--- a/cmake/Modules/Packages/LATTE.cmake
+++ b/cmake/Modules/Packages/LATTE.cmake
@ -19,6 +19,14 @@ if(DOWNLOAD_LATTE)
  set(LATTE_MD5 "820e73a457ced178c08c71389a385de7" CACHE STRING "MD5 checksum of LATTE tarball")
  mark_as_advanced(LATTE_URL)
  mark_as_advanced(LATTE_MD5)
  # CMake cannot pass BLAS or LAPACK library variable to external project if they are a list
  list(LENGTH BLAS_LIBRARIES} NUM_BLAS)
  list(LENGTH LAPACK_LIBRARIES NUM_LAPACK)
  if((NUM_BLAS GREATER 1) OR (NUM_LAPACK GREATER 1))
    message(FATAL_ERROR "Cannot compile downloaded LATTE library due to a technical limitation")
  endif()
  include(ExternalProject)
  ExternalProject_Add(latte_build
    URL     ${LATTE_URL}
--- a/cmake/Modules/Packages/MACHDYN.cmake
+++ b/cmake/Modules/Packages/MACHDYN.cmake
@ -7,8 +7,9 @@ endif()
 option(DOWNLOAD_EIGEN3 "Download Eigen3 instead of using an already installed one)" ${DOWNLOAD_EIGEN3_DEFAULT})
 if(DOWNLOAD_EIGEN3)
  message(STATUS "Eigen3 download requested - we will build our own")
-  set(EIGEN3_URL "https://gitlab.com/libeigen/eigen/-/archive/3.3.9/eigen-3.3.9.tar.gz" CACHE STRING "URL for Eigen3 tarball")
+
-  set(EIGEN3_MD5 "609286804b0f79be622ccf7f9ff2b660" CACHE STRING "MD5 checksum of Eigen3 tarball")
+  set(EIGEN3_URL "https://download.lammps.org/thirdparty/eigen-3.4.0.tar.gz" CACHE STRING "URL for Eigen3 tarball")
  set(EIGEN3_MD5 "4c527a9171d71a72a9d4186e65bea559" CACHE STRING "MD5 checksum of Eigen3 tarball")
  mark_as_advanced(EIGEN3_URL)
  mark_as_advanced(EIGEN3_MD5)
  include(ExternalProject)
--- a/cmake/Modules/Packages/ML-HDNNP.cmake
+++ b/cmake/Modules/Packages/ML-HDNNP.cmake
@ -45,12 +45,12 @@ if(DOWNLOAD_N2P2)
    # get path to MPI include directory when cross-compiling to windows
    if((CMAKE_SYSTEM_NAME STREQUAL Windows) AND CMAKE_CROSSCOMPILING)
      get_target_property(N2P2_MPI_INCLUDE MPI::MPI_CXX INTERFACE_INCLUDE_DIRECTORIES)
-      set(N2P2_PROJECT_OPTIONS "-I ${N2P2_MPI_INCLUDE} -DMPICH_SKIP_MPICXX=1")
+      set(N2P2_PROJECT_OPTIONS "-I${N2P2_MPI_INCLUDE}")
      set(MPI_CXX_COMPILER ${CMAKE_CXX_COMPILER})
    endif()
    if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
      get_target_property(N2P2_MPI_INCLUDE MPI::MPI_CXX INTERFACE_INCLUDE_DIRECTORIES)
-      set(N2P2_PROJECT_OPTIONS "-I ${N2P2_MPI_INCLUDE} -DMPICH_SKIP_MPICXX=1")
+      set(N2P2_PROJECT_OPTIONS "-I${N2P2_MPI_INCLUDE}")
      set(MPI_CXX_COMPILER ${CMAKE_CXX_COMPILER})
    endif()
  endif()
@ -69,6 +69,12 @@ if(DOWNLOAD_N2P2)
  # echo final flag for debugging
  message(STATUS "N2P2 BUILD OPTIONS: ${N2P2_BUILD_OPTIONS}")
  # must have "sed" command to compile n2p2 library (for now)
  find_program(HAVE_SED sed)
  if(NOT HAVE_SED)
    message(FATAL_ERROR "Must have 'sed' program installed to compile 'n2p2' library for ML-HDNNP package")
  endif()
  # download compile n2p2 library. much patch MPI calls in LAMMPS interface to accommodate MPI-2 (e.g. for cross-compiling)
  include(ExternalProject)
  ExternalProject_Add(n2p2_build
--- a/cmake/Modules/Packages/ML-QUIP.cmake
+++ b/cmake/Modules/Packages/ML-QUIP.cmake
@ -50,7 +50,7 @@ if(DOWNLOAD_QUIP)
    GIT_TAG origin/public
    GIT_SHALLOW YES
    GIT_PROGRESS YES
-    PATCH_COMMAND cp ${CMAKE_BINARY_DIR}/quip.config <SOURCE_DIR>/arch/Makefile.lammps
+    PATCH_COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_BINARY_DIR}/quip.config <SOURCE_DIR>/arch/Makefile.lammps
    CONFIGURE_COMMAND env QUIP_ARCH=lammps make config
    BUILD_COMMAND env QUIP_ARCH=lammps make libquip
    INSTALL_COMMAND ""
--- a/cmake/Modules/Packages/SCAFACOS.cmake
+++ b/cmake/Modules/Packages/SCAFACOS.cmake
@ -23,6 +23,11 @@ if(DOWNLOAD_SCAFACOS)
  file(DOWNLOAD ${LAMMPS_THIRDPARTY_URL}/scafacos-1.0.1-fix.diff ${CMAKE_CURRENT_BINARY_DIR}/scafacos-1.0.1.fix.diff
          EXPECTED_HASH MD5=4baa1333bb28fcce102d505e1992d032)
  find_program(HAVE_PATCH patch)
  if(NOT HAVE_PATCH)
    message(FATAL_ERROR "The 'patch' program is required to build the ScaFaCoS library")
  endif()
  include(ExternalProject)
  ExternalProject_Add(scafacos_build
    URL     ${SCAFACOS_URL}
--- a/cmake/Modules/Packages/VORONOI.cmake
+++ b/cmake/Modules/Packages/VORONOI.cmake
@ -26,6 +26,11 @@ if(DOWNLOAD_VORO)
    set(VORO_BUILD_OPTIONS CXX=${CMAKE_CXX_COMPILER} CFLAGS=${VORO_BUILD_CFLAGS})
  endif()
  find_program(HAVE_PATCH patch)
  if(NOT HAVE_PATCH)
    message(FATAL_ERROR "The 'patch' program is required to build the voro++ library")
  endif()
  ExternalProject_Add(voro_build
    URL     ${VORO_URL}
    URL_MD5 ${VORO_MD5}
--- a/cmake/Modules/Tools.cmake
+++ b/cmake/Modules/Tools.cmake
@ -25,7 +25,9 @@ if(BUILD_TOOLS)
  get_filename_component(MSI2LMP_SOURCE_DIR ${LAMMPS_TOOLS_DIR}/msi2lmp/src ABSOLUTE)
  file(GLOB MSI2LMP_SOURCES ${MSI2LMP_SOURCE_DIR}/[^.]*.c)
  add_executable(msi2lmp ${MSI2LMP_SOURCES})
-  target_link_libraries(msi2lmp PRIVATE ${MATH_LIBRARIES})
+  if(STANDARD_MATH_LIB)
    target_link_libraries(msi2lmp PRIVATE ${STANDARD_MATH_LIB})
  endif()
  install(TARGETS msi2lmp DESTINATION ${CMAKE_INSTALL_BINDIR})
  install(FILES ${LAMMPS_DOC_DIR}/msi2lmp.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1)
 endif()
--- a/cmake/presets/hip_amd.cmake
+++ b/cmake/presets/hip_amd.cmake
@ -0,0 +1,30 @@
 # preset that will enable hip (clang/clang++) with support for MPI and OpenMP (on Linux boxes)
 # prefer flang over gfortran, if available
 find_program(CLANG_FORTRAN NAMES flang gfortran f95)
 set(ENV{OMPI_FC} ${CLANG_FORTRAN})
 set(CMAKE_CXX_COMPILER "hipcc" CACHE STRING "" FORCE)
 set(CMAKE_C_COMPILER "hipcc" CACHE STRING "" FORCE)
 set(CMAKE_Fortran_COMPILER ${CLANG_FORTRAN} CACHE STRING "" FORCE)
 set(CMAKE_CXX_FLAGS_DEBUG "-Wall -Wextra -g" CACHE STRING "" FORCE)
 set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-Wall -Wextra -g -O2 -DNDEBUG" CACHE STRING "" FORCE)
 set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG" CACHE STRING "" FORCE)
 set(CMAKE_Fortran_FLAGS_DEBUG "-Wall -Wextra -g -std=f2003" CACHE STRING "" FORCE)
 set(CMAKE_Fortran_FLAGS_RELWITHDEBINFO "-Wall -Wextra -g -O2 -DNDEBUG -std=f2003" CACHE STRING "" FORCE)
 set(CMAKE_Fortran_FLAGS_RELEASE "-O3 -DNDEBUG -std=f2003" CACHE STRING "" FORCE)
 set(CMAKE_C_FLAGS_DEBUG "-Wall -Wextra -g" CACHE STRING "" FORCE)
 set(CMAKE_C_FLAGS_RELWITHDEBINFO "-Wall -Wextra -g -O2 -DNDEBUG" CACHE STRING "" FORCE)
 set(CMAKE_C_FLAGS_RELEASE "-O3 -DNDEBUG" CACHE STRING "" FORCE)
 set(MPI_CXX "hipcc" CACHE STRING "" FORCE)
 set(MPI_CXX_COMPILER "mpicxx" CACHE STRING "" FORCE)
 unset(HAVE_OMP_H_INCLUDE CACHE)
 set(OpenMP_C "hipcc" CACHE STRING "" FORCE)
 set(OpenMP_C_FLAGS "-fopenmp" CACHE STRING "" FORCE)
 set(OpenMP_C_LIB_NAMES "omp" CACHE STRING "" FORCE)
 set(OpenMP_CXX "hipcc" CACHE STRING "" FORCE)
 set(OpenMP_CXX_FLAGS "-fopenmp" CACHE STRING "" FORCE)
 set(OpenMP_CXX_LIB_NAMES "omp" CACHE STRING "" FORCE)
 set(OpenMP_omp_LIBRARY "libomp.so" CACHE PATH "" FORCE)
--- a/cmake/presets/windows.cmake
+++ b/cmake/presets/windows.cmake
@ -0,0 +1,64 @@
 set(WIN_PACKAGES
  ASPHERE
  BOCS
  BODY
  BROWNIAN
  CG-DNA
  CG-SDK
  CLASS2
  COLLOID
  COLVARS
  CORESHELL
  DIELECTRIC
  DIFFRACTION
  DIPOLE
  DPD-BASIC
  DPD-MESO
  DPD-REACT
  DPD-SMOOTH
  DRUDE
  EFF
  EXTRA-COMPUTE
  EXTRA-DUMP
  EXTRA-FIX
  EXTRA-MOLECULE
  EXTRA-PAIR
  FEP
  GRANULAR
  INTERLAYER
  KSPACE
  MANIFOLD
  MANYBODY
  MC
  MEAM
  MISC
  ML-IAP
  ML-SNAP
  MOFFF
  MOLECULE
  MOLFILE
  OPENMP
  ORIENT
  PERI
  PHONON
  POEMS
  PTM
  QEQ
  QTB
  REACTION
  REAXFF
  REPLICA
  RIGID
  SHOCK
  SMTBQ
  SPH
  SPIN
  SRD
  TALLY
  UEF
  YAFF)
 foreach(PKG ${WIN_PACKAGES})
  set(PKG_${PKG} ON CACHE BOOL "" FORCE)
 endforeach()
--- a/doc/doxygen/Doxyfile.in
+++ b/doc/doxygen/Doxyfile.in
@ -435,6 +435,8 @@ INPUT                  = @LAMMPS_SOURCE_DIR@/utils.cpp                 \
                         @LAMMPS_SOURCE_DIR@/my_pool_chunk.cpp         \
                         @LAMMPS_SOURCE_DIR@/my_pool_chunk.h           \
                         @LAMMPS_SOURCE_DIR@/math_eigen.h              \
                         @LAMMPS_SOURCE_DIR@/platform.h                \
                         @LAMMPS_SOURCE_DIR@/platform.cpp              \
 # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
 # directories that are symbolic links (a Unix file system feature) are excluded
--- a/doc/lammps.1
+++ b/doc/lammps.1
@ -1,4 +1,4 @@
-.TH LAMMPS "20 September 2021" "2021-09-20"
+.TH LAMMPS "29 September 2021" "2021-09-29"
 .SH NAME
 .B LAMMPS
 \- Molecular Dynamics Simulator.
--- a/doc/src/Build_settings.rst
+++ b/doc/src/Build_settings.rst
@ -71,7 +71,8 @@ LAMMPS can use them if they are available on your system.
         -D FFTW3_INCLUDE_DIR=path   # path to FFTW3 include files
         -D FFTW3_LIBRARY=path       # path to FFTW3 libraries
-         -D FFT_FFTW_THREADS=on      # enable using threaded FFTW3 libraries
+         -D FFTW3_OMP_LIBRARY=path   # path to FFTW3 OpenMP wrapper libraries
         -D FFT_FFTW_THREADS=on      # enable using OpenMP threaded FFTW3 libraries
         -D MKL_INCLUDE_DIR=path     # ditto for Intel MKL library
         -D FFT_MKL_THREADS=on       # enable using threaded FFTs with MKL libraries
         -D MKL_LIBRARY=path         # path to MKL libraries
@ -353,8 +354,10 @@ Read or write compressed files
 -----------------------------------------
 If this option is enabled, large files can be read or written with
-gzip compression by several LAMMPS commands, including
+compression by ``gzip`` or similar tools by several LAMMPS commands,
-:doc:`read_data <read_data>`, :doc:`rerun <rerun>`, and :doc:`dump <dump>`.
+including :doc:`read_data <read_data>`, :doc:`rerun <rerun>`, and
 :doc:`dump <dump>`.  Currently supported compression tools are:
 ``gzip``, ``bzip2``, ``zstd``, and ``lzma``.
 .. tabs::
@ -363,8 +366,7 @@ gzip compression by several LAMMPS commands, including
      .. code-block:: bash
         -D WITH_GZIP=value       # yes or no
-                                  # default is yes if CMake can find gzip, else no
+                                  # default is yes if CMake can find the gzip program, else no
         -D GZIP_EXECUTABLE=path  # path to gzip executable if CMake cannot find it
   .. tab:: Traditional make
@ -372,14 +374,15 @@ gzip compression by several LAMMPS commands, including
         LMP_INC = -DLAMMPS_GZIP
-This option requires that your operating system fully supports the "popen()"
+This option requires that your operating system fully supports the
-function in the standard runtime library and that a ``gzip`` executable can be
+"popen()" function in the standard runtime library and that a ``gzip``
-found by LAMMPS during a run.
+or other executable can be found by LAMMPS in the standard search path
 during a run.
 .. note::
-   On some clusters with high-speed networks, using the "fork()" library
+   On clusters with high-speed networks, using the "fork()" library call
-   call (required by "popen()") can interfere with the fast communication
+   (required by "popen()") can interfere with the fast communication
   library and lead to simulations using compressed output or input to
   hang or crash. For selected operations, compressed file I/O is also
   available using a compression library instead, which is what the
--- a/doc/src/Build_windows.rst
+++ b/doc/src/Build_windows.rst
@ -4,6 +4,7 @@ Notes for building LAMMPS on Windows
 * :ref:`General remarks <generic>`
 * :ref:`Running Linux on Windows <linux>`
 * :ref:`Using GNU GCC ported to Windows <gnu>`
 * :ref:`Using Visual Studio <msvc>`
 * :ref:`Using a cross-compiler <cross>`
 ----------
@ -31,13 +32,13 @@ pre-compiled Windows binary packages are sufficient for your needs.  If
 it is necessary for you to compile LAMMPS on a Windows machine
 (e.g. because it is your main desktop), please also consider using a
 virtual machine software and compile and run LAMMPS in a Linux virtual
-machine, or - if you have a sufficiently up-to-date Windows 10
+machine, or - if you have a sufficiently up-to-date Windows 10 or
-installation - consider using the Windows subsystem for Linux.  This
+Windows 11 installation - consider using the Windows subsystem for
-optional Windows feature allows you to run the bash shell from Ubuntu
+Linux.  This optional Windows feature allows you to run the bash shell
-from within Windows and from there on, you can pretty much use that
+from Ubuntu from within Windows and from there on, you can pretty much
-shell like you are running on an Ubuntu Linux machine (e.g. installing
+use that shell like you are running on an Ubuntu Linux machine
-software via apt-get and more).  For more details on that, please see
+(e.g. installing software via apt-get and more).  For more details on
-:doc:`this tutorial <Howto_wsl>`.
+that, please see :doc:`this tutorial <Howto_wsl>`.
 .. _gnu:
@ -67,6 +68,35 @@ requiring changes to the LAMMPS source code, or figure out corrections
 yourself, please report them on the lammps-users mailing list, or file
 them as an issue or pull request on the LAMMPS GitHub project.
 .. _msvc:
 Using Microsoft Visual Studio
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 Following the integration of the :doc:`platform namespace
 <Developer_platform>` into the LAMMPS code base, portability of LAMMPS
 to be compiled on Windows using Visual Studio has been significantly
 improved.  This has been tested with Visual Studio 2019 (aka version
 16).  Not all features and packages in LAMMPS are currently supported
 out of the box, but a preset ``cmake/presets/windows.cmake`` is provided
 that contains the packages that have been compiled successfully.  You
 must use the CMake based build procedure, and either use the integrated
 CMake support of Visual Studio or use an external CMake installation to
 create build files for the Visual Studio build system.  Please note that
 on launching Visual Studio it will scan the directory tree and likely
 miss the correct master ``CMakeLists.txt``.  Try to open the
 ``cmake/CMakeSettings.json`` and use those CMake configurations as a
 starting point.  It is also possible to configure and compile LAMMPS
 from the command line with a CMake binary from `cmake.org <https://cmake.org>`_.
 To support running in parallel you can compile with OpenMP enabled using
 the OPENMP package or install Microsoft MPI (including the SDK) and compile
 LAMMPS with MPI enabled.
 This is work in progress and you should contact the LAMMPS developers
 via GitHub, the forum, or the mailing list, if you have questions or
 LAMMPS specific problems.
 .. _cross:
 Using a cross-compiler
--- a/doc/src/Developer.rst
+++ b/doc/src/Developer.rst
@ -11,10 +11,12 @@ of time and requests from the LAMMPS user community.
   :maxdepth: 1
   Developer_org
   Developer_parallel
   Developer_flow
   Developer_write
   Developer_notes
   Developer_plugins
   Developer_unittest
   Classes
   Developer_platform
   Developer_utils
--- a/doc/src/Developer_par_comm.rst
+++ b/doc/src/Developer_par_comm.rst
@ -0,0 +1,120 @@
 Communication
 ^^^^^^^^^^^^^
 Following the partitioning scheme in use all per-atom data is
 distributed across the MPI processes, which allows LAMMPS to handle very
 large systems provided it uses a correspondingly large number of MPI
 processes.  Since The per-atom data (atom IDs, positions, velocities,
 types, etc.)  To be able to compute the short-range interactions MPI
 processes need not only access to data of atoms they "own" but also
 information about atoms from neighboring sub-domains, in LAMMPS referred
 to as "ghost" atoms.  These are copies of atoms storing required
 per-atom data for up to the communication cutoff distance. The green
 dashed-line boxes in the :ref:`domain-decomposition` figure illustrate
 the extended ghost-atom sub-domain for one processor.
 This approach is also used to implement periodic boundary
 conditions: atoms that lie within the cutoff distance across a periodic
 boundary are also stored as ghost atoms and taken from the periodic
 replication of the sub-domain, which may be the same sub-domain, e.g. if
 running in serial.  As a consequence of this, force computation in
 LAMMPS is not subject to minimum image conventions and thus cutoffs may
 be larger than half the simulation domain.
 .. _ghost-atom-comm:
 .. figure:: img/ghost-comm.png
   :align: center
   ghost atom communication
   This figure shows the ghost atom communication patterns between
   sub-domains for "brick" (left) and "tiled" communication styles for
   2d simulations.  The numbers indicate MPI process ranks.  Here the
   sub-domains are drawn spatially separated for clarity.  The
   dashed-line box is the extended sub-domain of processor 0 which
   includes its ghost atoms.  The red- and blue-shaded boxes are the
   regions of communicated ghost atoms.
 Efficient communication patterns are needed to update the "ghost" atom
 data, since that needs to be done at every MD time step or minimization
 step.  The diagrams of the `ghost-atom-comm` figure illustrate how ghost
 atom communication is performed in two stages for a 2d simulation (three
 in 3d) for both a regular and irregular partitioning of the simulation
 box.  For the regular case (left) atoms are exchanged first in the
 *x*-direction, then in *y*, with four neighbors in the grid of processor
 sub-domains.
 In the *x* stage, processor ranks 1 and 2 send owned atoms in their
 red-shaded regions to rank 0 (and vice versa).  Then in the *y* stage,
 ranks 3 and 4 send atoms in their blue-shaded regions to rank 0, which
 includes ghost atoms they received in the *x* stage.  Rank 0 thus
 acquires all its ghost atoms; atoms in the solid blue corner regions
 are communicated twice before rank 0 receives them.
 For the irregular case (right) the two stages are similar, but a
 processor can have more than one neighbor in each direction.  In the
 *x* stage, MPI ranks 1,2,3 send owned atoms in their red-shaded regions to
 rank 0 (and vice versa).  These include only atoms between the lower
 and upper *y*-boundary of rank 0's sub-domain.  In the *y* stage, ranks
 4,5,6 send atoms in their blue-shaded regions to rank 0.  This may
 include ghost atoms they received in the *x* stage, but only if they
 are needed by rank 0 to fill its extended ghost atom regions in the
 +/-*y* directions (blue rectangles).  Thus in this case, ranks 5 and
 6 do not include ghost atoms they received from each other (in the *x*
 stage) in the atoms they send to rank 0.  The key point is that while
 the pattern of communication is more complex in the irregular
 partitioning case, it can still proceed in two stages (three in 3d)
 via atom exchanges with only neighboring processors.
 When attributes of owned atoms are sent to neighboring processors to
 become attributes of their ghost atoms, LAMMPS calls this a "forward"
 communication.  On timesteps when atoms migrate to new owning processors
 and neighbor lists are rebuilt, each processor creates a list of its
 owned atoms which are ghost atoms in each of its neighbor processors.
 These lists are used to pack per-atom coordinates (for example) into
 message buffers in subsequent steps until the next reneighboring.
 A "reverse" communication is when computed ghost atom attributes are
 sent back to the processor who owns the atom.  This is used (for
 example) to sum partial forces on ghost atoms to the complete force on
 owned atoms.  The order of the two stages described in the
 :ref:`ghost-atom-comm` figure is inverted and the same lists of atoms
 are used to pack and unpack message buffers with per-atom forces.  When
 a received buffer is unpacked, the ghost forces are summed to owned atom
 forces.  As in forward communication, forces on atoms in the four blue
 corners of the diagrams are sent, received, and summed twice (once at
 each stage) before owning processors have the full force.
 These two operations are used many places within LAMMPS aside from
 exchange of coordinates and forces, for example by manybody potentials
 to share intermediate per-atom values, or by rigid-body integrators to
 enable each atom in a body to access body properties.  Here are
 additional details about how these communication operations are
 performed in LAMMPS:
 - When exchanging data with different processors, forward and reverse
  communication is done using ``MPI_Send()`` and ``MPI_IRecv()`` calls.
  If a processor is "exchanging" atoms with itself, only the pack and
  unpack operations are performed, e.g. to create ghost atoms across
  periodic boundaries when running on a single processor.
 - For forward communication of owned atom coordinates, periodic box
  lengths are added and subtracted when the receiving processor is
  across a periodic boundary from the sender.  There is then no need to
  apply a minimum image convention when calculating distances between
  atom pairs when building neighbor lists or computing forces.
 - The cutoff distance for exchanging ghost atoms is typically equal to
  the neighbor cutoff.  But it can also chosen to be longer if needed,
  e.g. half the diameter of a rigid body composed of multiple atoms or
  over 3x the length of a stretched bond for dihedral interactions.  It
  can also exceed the periodic box size.  For the regular communication
  pattern (left), if the cutoff distance extends beyond a neighbor
  processor's sub-domain, then multiple exchanges are performed in the
  same direction.  Each exchange is with the same neighbor processor,
  but buffers are packed/unpacked using a different list of atoms. For
  forward communication, in the first exchange a processor sends only
  owned atoms.  In subsequent exchanges, it sends ghost atoms received
  in previous exchanges.  For the irregular pattern (right) overlaps of
  a processor's extended ghost-atom sub-domain with all other processors
  in each dimension are detected.
--- a/doc/src/Developer_par_long.rst
+++ b/doc/src/Developer_par_long.rst
@ -0,0 +1,188 @@
 Long-range interactions
 ^^^^^^^^^^^^^^^^^^^^^^^
 For charged systems, LAMMPS can compute long-range Coulombic
 interactions via the FFT-based particle-particle/particle-mesh (PPPM)
 method implemented in :doc:`kspace style pppm and its variants
 <kspace_style>`.  For that Coulombic interactions are partitioned into
 short- and long-range components.  The short-ranged portion is computed
 in real space as a loop over pairs of charges within a cutoff distance,
 using neighbor lists.  The long-range portion is computed in reciprocal
 space using a kspace style.  For the PPPM implementation the simulation
 cell is overlaid with a regular FFT grid in 3d. It proceeds in several stages:
 a) each atom's point charge is interpolated to nearby FFT grid points,
 b) a forward 3d FFT is performed,
 c) a convolution operation is performed in reciprocal space,
 d) one or more inverse 3d FFTs are performed, and
 e) electric field values from grid points near each atom are interpolated to compute
   its forces.
 For any of the spatial-decomposition partitioning schemes each processor
 owns the brick-shaped portion of FFT grid points contained within its
 sub-domain.  The two interpolation operations use a stencil of grid
 points surrounding each atom.  To accommodate the stencil size, each
 processor also stores a few layers of ghost grid points surrounding its
 brick.  Forward and reverse communication of grid point values is
 performed similar to the corresponding :doc:`atom data communication
 <Developer_par_comm>`.  In this case, electric field values on owned
 grid points are sent to neighboring processors to become ghost point
 values.  Likewise charge values on ghost points are sent and summed to
 values on owned points.
 For triclinic simulation boxes, the FFT grid planes are parallel to
 the box faces, but the mapping of charge and electric field values
 to/from grid points is done in reduced coordinates where the tilted
 box is conceptually a unit cube, so that the stencil and FFT
 operations are unchanged.  However the FFT grid size required for a
 given accuracy is larger for triclinic domains than it is for
 orthogonal boxes.
 .. _fft-parallel:
 .. figure:: img/fft-decomp-parallel.png
   :align: center
   parallel FFT in PPPM
   Stages of a parallel FFT for a simulation domain overlaid
   with an 8x8x8 3d FFT grid, partitioned across 64 processors.
   Within each of the 4 diagrams, grid cells of the same color are
   owned by a single processor; for simplicity only cells owned by 4
   or 8 of the 64 processors are colored.  The two images on the left
   illustrate brick-to-pencil communication.  The two images on the
   right illustrate pencil-to-pencil communication, which in this
   case transposes the *y* and *z* dimensions of the grid.
 Parallel 3d FFTs require substantial communication relative to their
 computational cost.  A 3d FFT is implemented by a series of 1d FFTs
 along the *x-*, *y-*, and *z-*\ direction of the FFT grid.  Thus the FFT
 grid cannot be decomposed like atoms into 3 dimensions for parallel
 processing of the FFTs but only in 1 (as planes) or 2 (as pencils)
 dimensions and in between the steps the grid needs to be transposed to
 have the FFT grid portion "owned" by each MPI process complete in the
 direction of the 1d FFTs it has to perform. LAMMPS uses the
 pencil-decomposition algorithm as shown in the :ref:`fft-parallel` figure.
 Initially (far left), each processor owns a brick of same-color grid
 cells (actually grid points) contained within in its sub-domain.  A
 brick-to-pencil communication operation converts this layout to 1d
 pencils in the *x*-dimension (center left).  Again, cells of the same
 color are owned by the same processor.  Each processor can then compute
 a 1d FFT on each pencil of data it wholly owns using a call to the
 configured FFT library.  A pencil-to-pencil communication then converts
 this layout to pencils in the *y* dimension (center right) which
 effectively transposes the *x* and *y* dimensions of the grid, followed
 by 1d FFTs in *y*.  A final transpose of pencils from *y* to *z* (far
 right) followed by 1d FFTs in *z* completes the forward FFT.  The data
 is left in a *z*-pencil layout for the convolution operation.  One or
 more inverse FFTs then perform the sequence of 1d FFTs and communication
 steps in reverse order; the final layout of resulting grid values is the
 same as the initial brick layout.
 Each communication operation within the FFT (brick-to-pencil or
 pencil-to-pencil or pencil-to-brick) converts one tiling of the 3d grid
 to another, where a tiling in this context means an assignment of a
 small brick-shaped subset of grid points to each processor, the union of
 which comprise the entire grid.  The parallel `fftMPI library
 <https://lammps.github.io/fftmpi/>`_ written for LAMMPS allows arbitrary
 definitions of the tiling so that an irregular partitioning of the
 simulation domain can use it directly.  Transforming data from one
 tiling to another is implemented in `fftMPI` using point-to-point
 communication, where each processor sends data to a few other
 processors, since each tile in the initial tiling overlaps with a
 handful of tiles in the final tiling.
 The transformations could also be done using collective communication
 across all $P$ processors with a single call to ``MPI_Alltoall()``, but
 this is typically much slower.  However, for the specialized brick and
 pencil tiling illustrated in :ref:`fft-parallel` figure, collective
 communication across the entire MPI communicator is not required.  In
 the example an :math:`8^3` grid with 512 grid cells is partitioned
 across 64 processors; each processor owns a 2x2x2 3d brick of grid
 cells.  The initial brick-to-pencil communication (upper left to upper
 right) only requires collective communication within subgroups of 4
 processors, as illustrated by the 4 colors.  More generally, a
 brick-to-pencil communication can be performed by partitioning *P*
 processors into :math:`P^{\frac{2}{3}}` subgroups of
 :math:`P^{\frac{1}{3}}` processors each.  Each subgroup performs
 collective communication only within its subgroup.  Similarly,
 pencil-to-pencil communication can be performed by partitioning *P*
 processors into :math:`P^{\frac{1}{2}}` subgroups of
 :math:`P^{\frac{1}{2}}` processors each.  This is illustrated in the
 figure for the :math:`y \Rightarrow z` communication (center).  An
 eight-processor subgroup owns the front *yz* plane of data and performs
 collective communication within the subgroup to transpose from a
 *y*-pencil to *z*-pencil layout.
 LAMMPS invokes point-to-point communication by default, but also
 provides the option of partitioned collective communication when using a
 :doc:`kspace_modify collective yes <kspace_modify>` command to switch to
 that mode.  In the latter case, the code detects the size of the
 disjoint subgroups and partitions the single *P*-size communicator into
 multiple smaller communicators, each of which invokes collective
 communication.  Testing on a large IBM Blue Gene/Q machine at Argonne
 National Labs showed a significant improvement in FFT performance for
 large processor counts; partitioned collective communication was faster
 than point-to-point communication or global collective communication
 involving all *P* processors.
 Here are some additional details about FFTs for long-range and related
 grid/particle operations that LAMMPS supports:
 - The fftMPI library allows each grid dimension to be a multiple of
  small prime factors (2,3,5), and allows any number of processors to
  perform the FFT.  The resulting brick and pencil decompositions are
  thus not always as well-aligned but the size of subgroups of
  processors for the two modes of communication (brick/pencil and
  pencil/pencil) still scale as :math:`O(P^{\frac{1}{3}})` and
  :math:`O(P^{\frac{1}{2}})`.
 - For efficiency in performing 1d FFTs, the grid transpose
  operations illustrated in Figure \ref{fig:fft} also involve
  reordering the 3d data so that a different dimension is contiguous
  in memory.  This reordering can be done during the packing or
  unpacking of buffers for MPI communication.
 - For large systems and particularly a large number of MPI processes,
  the dominant cost for parallel FFTs is often the communication, not
  the computation of 1d FFTs, even though the latter scales as :math:`N
  \log(N)` in the number of grid points *N* per grid direction.  This is
  due to the fact that only a 2d decomposition into pencils is possible
  while atom data (and their corresponding short-range force and energy
  computations) can be decomposed efficiently in 3d.
  This can be addressed by reducing the number of MPI processes involved
  in the MPI communication by using :doc:`hybrid MPI + OpenMP
  parallelization <Speed_omp>`.  This will use OpenMP parallelization
  inside the MPI domains and while that may have a lower parallel
  efficiency, it reduces the communication overhead.
  As an alternative it is also possible to start a :ref:`multi-partition
  <partition>` calculation and then use the :doc:`verlet/split
  integrator <run_style>` to perform the PPPM computation on a
  dedicated, separate partition of MPI processes.  This uses an integer
  "1:*p*" mapping of *p* sub-domains of the atom decomposition to one
  sub-domain of the FFT grid decomposition and where pairwise non-bonded
  and bonded forces and energies are computed on the larger partition
  and the PPPM kspace computation concurrently on the smaller partition.
 - LAMMPS also implements PPPM-based solvers for other long-range
  interactions, dipole and dispersion (Lennard-Jones), which can be used
  in conjunction with long-range  Coulombics for point charges.
 - LAMMPS implements a ``GridComm`` class which overlays the simulation
  domain with a regular grid, partitions it across processors in a
  manner consistent with processor sub-domains, and provides methods for
  forward and reverse communication of owned and ghost grid point
  values.  It is used for PPPM as an FFT grid (as outlined above) and
  also for the MSM algorithm which uses a cascade of grid sizes from
  fine to coarse to compute long-range Coulombic forces.  The GridComm
  class is also useful for models where continuum fields interact with
  particles.  For example, the two-temperature model (TTM) defines heat
  transfer between atoms (particles) and electrons (continuum gas) where
  spatial variations in the electron temperature are computed by finite
  differences of a discretized heat equation on a regular grid.  The
  :doc:`fix ttm/grid <fix_ttm>` command uses the ``GridComm`` class
  internally to perform its grid operations on a distributed grid
  instead of the original :doc:`fix ttm <fix_ttm>` which uses a
  replicated grid.
--- a/doc/src/Developer_par_neigh.rst
+++ b/doc/src/Developer_par_neigh.rst
@ -0,0 +1,159 @@
 Neighbor lists
 ^^^^^^^^^^^^^^
 To compute forces efficiently, each processor creates a Verlet-style
 neighbor list which enumerates all pairs of atoms *i,j* (*i* = owned,
 *j* = owned or ghost) with separation less than the applicable
 neighbor list cutoff distance.  In LAMMPS the neighbor lists are stored
 in a multiple-page data structure; each page is a contiguous chunk of
 memory which stores vectors of neighbor atoms *j* for many *i* atoms.
 This allows pages to be incrementally allocated or deallocated in blocks
 as needed.  Neighbor lists typically consume the most memory of any data
 structure in LAMMPS.  The neighbor list is rebuilt (from scratch) once
 every few timesteps, then used repeatedly each step for force or other
 computations.  The neighbor cutoff distance is :math:`R_n = R_f +
 \Delta_s`, where :math:`R_f` is the (largest) force cutoff defined by
 the interatomic potential for computing short-range pairwise or manybody
 forces and :math:`\Delta_s` is a "skin" distance that allows the list to
 be used for multiple steps assuming that atoms do not move very far
 between consecutive time steps.  Typically the code triggers
 reneighboring when any atom has moved half the skin distance since the
 last reneighboring; this and other options of the neighbor list rebuild
 can be adjusted with the :doc:`neigh_modify <neigh_modify>` command.
 On steps when reneighboring is performed, atoms which have moved outside
 their owning processor's sub-domain are first migrated to new processors
 via communication.  Periodic boundary conditions are also (only)
 enforced on these steps to ensure each atom is re-assigned to the
 correct processor.  After migration, the atoms owned by each processor
 are stored in a contiguous vector.  Periodically each processor
 spatially sorts owned atoms within its vector to reorder it for improved
 cache efficiency in force computations and neighbor list building.  For
 that atoms are spatially binned and then reordered so that atoms in the
 same bin are adjacent in the vector.  Atom sorting can be disabled or
 its settings modified with the :doc:`atom_modify <atom_modify>` command.
 .. _neighbor-stencil:
 .. figure:: img/neigh-stencil.png
   :align: center
   neighbor list stencils
   A 2d simulation sub-domain (thick black line) and the corresponding
   ghost atom cutoff region (dashed blue line) for both orthogonal
   (left) and triclinic (right) domains.  A regular grid of neighbor
   bins (thin lines) overlays the entire simulation domain and need not
   align with sub-domain boundaries; only the portion overlapping the
   augmented sub-domain is shown.  In the triclinic case it overlaps the
   bounding box of the tilted rectangle.  The blue- and red-shaded bins
   represent a stencil of bins searched to find neighbors of a particular
   atom (black dot).
 To build a local neighbor list in linear time, the simulation domain is
 overlaid (conceptually) with a regular 3d (or 2d) grid of neighbor bins,
 as shown in the :ref:`neighbor-stencil` figure for 2d models and a
 single MPI processor's sub-domain.  Each processor stores a set of
 neighbor bins which overlap its sub-domain extended by the neighbor
 cutoff distance :math:`R_n`.  As illustrated, the bins need not align
 with processor boundaries; an integer number in each dimension is fit to
 the size of the entire simulation box.
 Most often LAMMPS builds what it calls a "half" neighbor list where
 each *i,j* neighbor pair is stored only once, with either atom *i* or
 *j* as the central atom.  The build can be done efficiently by using a
 pre-computed "stencil" of bins around a central origin bin which
 contains the atom whose neighbors are being searched for.  A stencil
 is simply a list of integer offsets in *x,y,z* of nearby bins
 surrounding the origin bin which are close enough to contain any
 neighbor atom *j* within a distance :math:`R_n` from any atom *i* in the
 origin bin.  Note that for a half neighbor list, the stencil can be
 asymmetric since each atom only need store half its nearby neighbors.
 These stencils are illustrated in the figure for a half list and a bin
 size of :math:`\frac{1}{2} R_n`.  There are 13 red+blue stencil bins in
 2d (for the orthogonal case, 15 for triclinic).  In 3d there would be
 63, 13 in the plane of bins that contain the origin bin and 25 in each
 of the two planes above it in the *z* direction (75 for triclinic).  The
 reason the triclinic stencil has extra bins is because the bins tile the
 bounding box of the entire triclinic domain and thus are not periodic
 with respect to the simulation box itself.  The stencil and logic for
 determining which *i,j* pairs to include in the neighbor list are
 altered slightly to account for this.
 To build a neighbor list, a processor first loops over its "owned" plus
 "ghost" atoms and assigns each to a neighbor bin.  This uses an integer
 vector to create a linked list of atom indices within each bin.  It then
 performs a triply-nested loop over its owned atoms *i*, the stencil of
 bins surrounding atom *i*'s bin, and the *j* atoms in each stencil bin
 (including ghost atoms).  If the distance :math:`r_{ij} < R_n`, then
 atom *j* is added to the vector of atom *i*'s neighbors.
 Here are additional details about neighbor list build options LAMMPS
 supports:
 - The choice of bin size is an option; a size half of :math:`R_n` has
  been found to be optimal for many typical cases.  Smaller bins incur
  additional overhead to loop over; larger bins require more distance
  calculations.  Note that for smaller bin sizes, the 2d stencil in the
  figure would be more semi-circular in shape (hemispherical in 3d),
  with bins near the corners of the square eliminated due to their
  distance from the origin bin.
 - Depending on the interatomic potential(s) and other commands used in
  an input script, multiple neighbor lists and stencils with different
  attributes may be needed.  This includes lists with different cutoff
  distances, e.g. for force computation versus occasional diagnostic
  computations such as a radial distribution function, or for the
  r-RESPA time integrator which can partition pairwise forces by
  distance into subsets computed at different time intervals.  It
  includes "full" lists (as opposed to half lists) where each *i,j* pair
  appears twice, stored once with *i* and *j*, and which use a larger
  symmetric stencil.  It also includes lists with partial enumeration of
  ghost atom neighbors.  The full and ghost-atom lists are used by
  various manybody interatomic potentials.  Lists may also use different
  criteria for inclusion of a pair interaction.  Typically this simply
  depends only on the distance between two atoms and the cutoff
  distance.  But for finite-size coarse-grained particles with
  individual diameters (e.g. polydisperse granular particles), it can
  also depend on the diameters of the two particles.
 - When using :doc:`pair style hybrid <pair_hybrid>` multiple sub-lists
  of the master neighbor list for the full system need to be generated,
  one for each sub-style, which contains only the *i,j* pairs needed to
  compute interactions between subsets of atoms for the corresponding
  potential.  This means not all *i* or *j* atoms owned by a processor
  are included in a particular sub-list.
 - Some models use different cutoff lengths for pairwise interactions
  between different kinds of particles which are stored in a single
  neighbor list.  One example is a solvated colloidal system with large
  colloidal particles where colloid/colloid, colloid/solvent, and
  solvent/solvent interaction cutoffs can be dramatically different.
  Another is a model of polydisperse finite-size granular particles;
  pairs of particles interact only when they are in contact with each
  other.  Mixtures with particle size ratios as high as 10-100x may be
  used to model realistic systems.  Efficient neighbor list building
  algorithms for these kinds of systems are available in LAMMPS.  They
  include a method which uses different stencils for different cutoff
  lengths and trims the stencil to only include bins that straddle the
  cutoff sphere surface.  More recently a method which uses both
  multiple stencils and multiple bin sizes was developed; it builds
  neighbor lists efficiently for systems with particles of any size
  ratio, though other considerations (timestep size, force computations)
  may limit the ability to model systems with huge polydispersity.
 - For small and sparse systems and as a fallback method, LAMMPS also
  supports neighbor list construction without binning by using a full
  :math:`O(N^2)` loop over all *i,j* atom pairs in a sub-domain when
  using the :doc:`neighbor nsq <neighbor>` command.
 - Dependent on the "pair" setting of the :doc:`newton <newton>` command,
  the "half" neighbor lists may contain **all** pairs of atoms where
  atom *j* is a ghost atom (i.e. when the newton pair setting is *off*)
  For the newton pair *on* setting the atom *j* is only added to the
  list if its *z* coordinate is larger, or if equal the *y* coordinate
  is larger, and that is equal, too, the *x* coordinate is larger.  For
  homogeneously dense systems that will result in picking neighbors from
  a same size sector in always the same direction relative to the
  "owned" atom and thus it should lead to similar length neighbor lists
  and thus reduce the chance of a load imbalance.
--- a/doc/src/Developer_par_openmp.rst
+++ b/doc/src/Developer_par_openmp.rst
@ -0,0 +1,114 @@
 OpenMP Parallelism
 ^^^^^^^^^^^^^^^^^^
 The styles in the INTEL, KOKKOS, and OPENMP package offer to use OpenMP
 thread parallelism to predominantly distribute loops over local data
 and thus follow an orthogonal parallelization strategy to the
 decomposition into spatial domains used by the :doc:`MPI partitioning
 <Developer_par_part>`.  For clarity, this section discusses only the
 implementation in the OPENMP package as it is the simplest. The INTEL
 and KOKKOS package offer additional options and are more complex since
 they support more features and different hardware like co-processors
 or GPUs.
 One of the key decisions when implementing the OPENMP package was to
 keep the changes to the source code small, so that it would be easier to
 maintain the code and keep it in sync with the non-threaded standard
 implementation.  this is achieved by a) making the OPENMP version a
 derived class from the regular version (e.g. ``PairLJCutOMP`` from
 ``PairLJCut``) and overriding only methods that are multi-threaded or
 need to be modified to support multi-threading (similar to what was done
 in the OPT package), b) keeping the structure in the modified code very
 similar so that side-by-side comparisons are still useful, and c)
 offloading additional functionality and multi-thread support functions
 into three separate classes ``ThrOMP``, ``ThrData``, and ``FixOMP``.
 ``ThrOMP`` provides additional, multi-thread aware functionality not
 available in the corresponding base class (e.g. ``Pair`` for
 ``PairLJCutOMP``) like multi-thread aware variants of the "tally"
 functions. Those functions are made available through multiple
 inheritance so those new functions have to have unique names to avoid
 ambiguities; typically ``_thr`` is appended to the name of the function.
 ``ThrData`` is a classes that manages per-thread data structures.
 It is used instead of extending the corresponding storage to per-thread
 arrays to avoid slowdowns due to "false sharing" when multiple threads
 update adjacent elements in an array and thus force the CPU cache lines
 to be reset and re-fetched.  ``FixOMP`` finally manages the "multi-thread
 state" like settings and access to per-thread storage, it is activated
 by the :doc:`package omp <package>` command.
 Avoiding data races
 """""""""""""""""""
 A key problem when implementing thread parallelism in an MD code is
 to avoid data races when updating accumulated properties like forces,
 energies, and stresses.  When interactions are computed, they always
 involve multiple atoms and thus there are race conditions when multiple
 threads want to update per-atom data of the same atoms.  Five possible
 strategies have been considered to avoid this:
 1) restructure the code so that there is no overlapping access possible
   when computing in parallel, e.g. by breaking lists into multiple
   parts and synchronizing threads in between.
 2) have each thread be "responsible" for a specific group of atoms and
   compute these interactions multiple times, once on each thread that
   is responsible for a given atom and then have each thread only update
   the properties of this atom.
 3) use mutexes around functions and regions of code where the data race
   could happen
 4) use atomic operations when updating per-atom properties
 5) use replicated per-thread data structures to accumulate data without
   conflicts and then use a reduction to combine those results into the
   data structures used by the regular style.
 Option 5 was chosen for the OPENMP package because it would retain the
 performance for the case of 1 thread and the code would be more
 maintainable.  Option 1 would require extensive code changes,
 particularly to the neighbor list code; options 2 would have incurred a
 2x or more performance penalty for the serial case; option 3 causes
 significant overhead and would enforce serialization of operations in
 inner loops and thus defeat the purpose of multi-threading; option 4
 slows down the serial case although not quite as bad as option 2.  The
 downside of option 5 is that the overhead of the reduction operations
 grows with the number of threads used, so there would be a crossover
 point where options 2 or 4 would result in faster executing.  That is
 why option 2 for example is used in the GPU package because a GPU is a
 processor with a massive number of threads.  However, since the MPI
 parallelization is generally more effective for typical MD systems, the
 expectation is that thread parallelism is only used for a smaller number
 of threads (2-8).  At the time of its implementation, that number was
 equivalent to the number of CPU cores per CPU socket on high-end
 supercomputers.
 Thus arrays like the force array are dimensioned to the number of atoms
 times the number of threads when enabling OpenMP support and inside the
 compute functions a pointer to a different chunk is obtained by each thread.
 Similarly, accumulators like potential energy or virial are kept in
 per-thread instances of the ``ThrData`` class and then only reduced and
 stored in their global counterparts at the end of the force computation.
 Loop scheduling
 """""""""""""""
 Multi-thread parallelization is applied by distributing (outer) loops
 statically across threads.  Typically this would be the loop over local
 atoms *i* when processing *i,j* pairs of atoms from a neighbor list.
 The design of the neighbor list code results in atoms having a similar
 number of neighbors for homogeneous systems and thus load imbalances
 across threads are not common and typically happen for systems where
 also the MPI parallelization would be unbalanced, which would typically
 have a more pronounced impact on the performance.  This same loop
 scheduling scheme can also be applied to the reduction operations on
 per-atom data to try and reduce the overhead of the reduction operation.
 Neighbor list parallelization
 """""""""""""""""""""""""""""
 In addition to the parallelization of force computations, also the
 generation of the neighbor lists is parallelized.  As explained
 previously, neighbor lists are built by looping over "owned" atoms and
 storing the neighbors in "pages".  In the OPENMP variants of the
 neighbor list code, each thread operates on a different chunk of "owned"
 atoms and allocates and fills its own set of pages with neighbor list
 data.  This is achieved by each thread keeping its own instance of the
 :cpp:class:`MyPage <LAMMPS_NS::MyPage>` page allocator class.
--- a/doc/src/Developer_par_part.rst
+++ b/doc/src/Developer_par_part.rst
@ -0,0 +1,89 @@
 Partitioning
 ^^^^^^^^^^^^
 The underlying spatial decomposition strategy used by LAMMPS for
 distributed-memory parallelism is set with the :doc:`comm_style command
 <comm_style>` and can be either "brick" (a regular grid) or "tiled".
 .. _domain-decomposition:
 .. figure:: img/domain-decomp.png
   :align: center
   domain decomposition
   This figure shows the different kinds of domain decomposition used
   for MPI parallelization: "brick" on the left with an orthogonal
   (left) and a triclinic (middle) simulation domain, and a "tiled"
   decomposition (right).  The black lines show the division into
   sub-domains and the contained atoms are "owned" by the corresponding
   MPI process. The green dashed lines indicate how sub-domains are
   extended with "ghost" atoms up to the communication cutoff distance.
 The LAMMPS simulation box is a 3d or 2d volume, which can be orthogonal
 or triclinic in shape, as illustrated in the :ref:`domain-decomposition`
 figure for the 2d case.  Orthogonal means the box edges are aligned with
 the *x*, *y*, *z* Cartesian axes, and the box faces are thus all
 rectangular.  Triclinic allows for a more general parallelepiped shape
 in which edges are aligned with three arbitrary vectors and the box
 faces are parallelograms.  In each dimension box faces can be periodic,
 or non-periodic with fixed or shrink-wrapped boundaries.  In the fixed
 case, atoms which move outside the face are deleted; shrink-wrapped
 means the position of the box face adjusts continuously to enclose all
 the atoms.
 For distributed-memory MPI parallelism, the simulation box is spatially
 decomposed (partitioned) into non-overlapping sub-domains which fill the
 box. The default partitioning, "brick", is most suitable when atom
 density is roughly uniform, as shown in the left-side images of the
 :ref:`domain-decomposition` figure.  The sub-domains comprise a regular
 grid and all sub-domains are identical in size and shape.  Both the
 orthogonal and triclinic boxes can deform continuously during a
 simulation, e.g. to compress a solid or shear a liquid, in which case
 the processor sub-domains likewise deform.
 For models with non-uniform density, the number of particles per
 processor can be load-imbalanced with the default partitioning.  This
 reduces parallel efficiency, as the overall simulation rate is limited
 by the slowest processor, i.e. the one with the largest computational
 load.  For such models, LAMMPS supports multiple strategies to reduce
 the load imbalance:
 - The processor grid decomposition is by default based on the simulation
  cell volume and tries to optimize the volume to surface ratio for the sub-domains.
  This can be changed with the :doc:`processors command <processors>`.
 - The parallel planes defining the size of the sub-domains can be shifted
  with the :doc:`balance command <balance>`. Which can be done in addition
  to choosing a more optimal processor grid.
 - The recursive bisectioning algorithm in combination with the "tiled"
  communication style can produce a partitioning with equal numbers of
  particles in each sub-domain.
 .. |decomp1| image:: img/decomp-regular.png
   :width: 24%
 .. |decomp2| image:: img/decomp-processors.png
   :width: 24%
 .. |decomp3| image:: img/decomp-balance.png
   :width: 24%
 .. |decomp4| image:: img/decomp-rcb.png
   :width: 24%
 |decomp1|  |decomp2|  |decomp3|  |decomp4|
 The pictures above demonstrate different decompositions for a 2d system
 with 12 MPI ranks.  The atom colors indicate the load imbalance of each
 sub-domain with green being optimal and red the least optimal.
 Due to the vacuum in the system, the default decomposition is unbalanced
 with several MPI ranks without atoms (left). By forcing a 1x12x1
 processor grid, every MPI rank does computations now, but number of
 atoms per sub-domain is still uneven and the thin slice shape increases
 the amount of communication between sub-domains (center left). With a
 2x6x1 processor grid and shifting the sub-domain divisions, the load
 imbalance is further reduced and the amount of communication required
 between sub-domains is less (center right).  And using the recursive
 bisectioning leads to further improved decomposition (right).
--- a/doc/src/Developer_parallel.rst
+++ b/doc/src/Developer_parallel.rst
@ -0,0 +1,28 @@
 Parallel algorithms
 -------------------
 LAMMPS is designed to enable running simulations in parallel using the
 MPI parallel communication standard with distributed data via domain
 decomposition.  The parallelization aims to be efficient result in good
 strong scaling (= good speedup for the same system) and good weak
 scaling (= the computational cost of enlarging the system is
 proportional to the system size).  Additional parallelization using GPUs
 or OpenMP can also be applied within the sub-domain assigned to an MPI
 process.  For clarity, most of the following illustrations show the 2d
 simulation case. The underlying algorithms in those cases, however,
 apply to both 2d and 3d cases equally well.
 .. note::
   The text and most of the figures in this chapter were adapted
   for the manual from the section on parallel algorithms in the
   :ref:`new LAMMPS paper <lammps_paper>`.
 .. toctree::
   :maxdepth: 1
   Developer_par_part
   Developer_par_comm
   Developer_par_neigh
   Developer_par_long
   Developer_par_openmp
--- a/doc/src/Developer_platform.rst
+++ b/doc/src/Developer_platform.rst
@ -0,0 +1,149 @@
 Platform abstraction functions
 ------------------------------
 The ``platform`` sub-namespace inside the ``LAMMPS_NS`` namespace
 provides a collection of wrapper and convenience functions and utilities
 that perform common tasks for which platform specific code would be
 required or for which a more high-level abstraction would be convenient
 and reduce duplicated code.  This reduces redundant implementations and
 encourages consistent behavior and thus has some overlap with the
 :doc:`"utils" sub-namespace <Developer_utils>`.
 Time functions
 ^^^^^^^^^^^^^^
 .. doxygenfunction:: cputime
   :project: progguide
 .. doxygenfunction:: walltime
   :project: progguide
 .. doxygenfunction:: usleep
   :project: progguide
 Platform information functions
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. doxygenfunction:: os_info
   :project: progguide
 .. doxygenfunction:: compiler_info
   :project: progguide
 .. doxygenfunction:: cxx_standard
   :project: progguide
 .. doxygenfunction:: openmp_standard
   :project: progguide
 .. doxygenfunction:: mpi_vendor
   :project: progguide
 .. doxygenfunction:: mpi_info
   :project: progguide
 File and path functions and global constants
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. doxygenvariable:: filepathsep
   :project: progguide
 .. doxygenvariable:: pathvarsep
   :project: progguide
 .. doxygenfunction:: guesspath
   :project: progguide
 .. doxygenfunction:: path_basename
   :project: progguide
 .. doxygenfunction:: path_join
   :project: progguide
 .. doxygenfunction:: file_is_readable
   :project: progguide
 .. doxygenfunction:: is_console
   :project: progguide
 .. doxygenfunction:: path_is_directory
   :project: progguide
 .. doxygenfunction:: current_directory
   :project: progguide
 .. doxygenfunction:: list_directory
   :project: progguide
 .. doxygenfunction:: chdir
   :project: progguide
 .. doxygenfunction:: mkdir
   :project: progguide
 .. doxygenfunction:: rmdir
   :project: progguide
 .. doxygenfunction:: unlink
   :project: progguide
 Standard I/O function wrappers
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. doxygenvariable:: END_OF_FILE
   :project: progguide
 .. doxygenfunction:: ftell
   :project: progguide
 .. doxygenfunction:: fseek
   :project: progguide
 .. doxygenfunction:: ftruncate
   :project: progguide
 .. doxygenfunction:: popen
   :project: progguide
 .. doxygenfunction:: pclose
   :project: progguide
 Environment variable functions
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. doxygenfunction:: putenv
   :project: progguide
 .. doxygenfunction:: list_pathenv
   :project: progguide
 .. doxygenfunction:: find_exe_path
   :project: progguide
 Dynamically loaded object or library functions
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. doxygenfunction:: dlopen
   :project: progguide
 .. doxygenfunction:: dlclose
   :project: progguide
 .. doxygenfunction:: dlsym
   :project: progguide
 .. doxygenfunction:: dlerror
   :project: progguide
 Compressed file I/O functions
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 .. doxygenfunction:: has_compress_extension
   :project: progguide
 .. doxygenfunction:: compressed_read
   :project: progguide
 .. doxygenfunction:: compressed_write
   :project: progguide
--- a/doc/src/Developer_utils.rst
+++ b/doc/src/Developer_utils.rst
@ -7,7 +7,9 @@ a collection of convenience functions and utilities that perform common
 tasks that are required repeatedly throughout the LAMMPS code like
 reading or writing to files with error checking or translation of
 strings into specific types of numbers with checking for validity.  This
-reduces redundant implementations and encourages consistent behavior.
+reduces redundant implementations and encourages consistent behavior and
 thus has some overlap with the :doc:`"platform" sub-namespace
 <Developer_platform>`.
 I/O with status check and similar functions
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@ -60,6 +62,9 @@ silently returning the result of a partial conversion or zero in cases
 where the string is not a valid number.  This behavior allows to more
 easily detect typos or issues when processing input files.
 Similarly the :cpp:func:`logical() <LAMMPS_NS::utils::logical>` function
 will convert a string into a boolean and will only accept certain words.
 The *do_abort* flag should be set to ``true`` in case  this function
 is called only on a single MPI rank, as that will then trigger the
 a call to ``Error::one()`` for errors instead of ``Error::all()``
@ -83,6 +88,9 @@ strings for compliance without conversion.
 .. doxygenfunction:: tnumeric
   :project: progguide
 .. doxygenfunction:: logical
   :project: progguide
 String processing
 ^^^^^^^^^^^^^^^^^
@ -95,6 +103,12 @@ and parsing files or arguments.
 .. doxygenfunction:: strdup
   :project: progguide
 .. doxygenfunction:: lowercase
   :project: progguide
 .. doxygenfunction:: uppercase
   :project: progguide
 .. doxygenfunction:: trim
   :project: progguide
@ -137,21 +151,6 @@ and parsing files or arguments.
 .. doxygenfunction:: is_double
   :project: progguide
 File and path functions
 ^^^^^^^^^^^^^^^^^^^^^^^^^
 .. doxygenfunction:: guesspath
   :project: progguide
 .. doxygenfunction:: path_basename
   :project: progguide
 .. doxygenfunction:: path_join
   :project: progguide
 .. doxygenfunction:: file_is_readable
   :project: progguide
 Potential file functions
 ^^^^^^^^^^^^^^^^^^^^^^^^
--- a/doc/src/Howto_thermostat.rst
+++ b/doc/src/Howto_thermostat.rst
@ -2,8 +2,8 @@ Thermostats
 ===========
 Thermostatting means controlling the temperature of particles in an MD
-simulation.  :doc:`Barostatting <Howto_barostat>` means controlling the
+simulation.  :doc:`Barostatting <Howto_barostat>` means controlling
-pressure.  Since the pressure includes a kinetic component due to
+the pressure.  Since the pressure includes a kinetic component due to
 particle velocities, both these operations require calculation of the
 temperature.  Typically a target temperature (T) and/or pressure (P)
 is specified by the user, and the thermostat or barostat attempts to
@ -26,11 +26,13 @@ can be invoked via the *dpd/tstat* pair style:
 * :doc:`pair_style dpd/tstat <pair_dpd>`
 :doc:`Fix nvt <fix_nh>` only thermostats the translational velocity of
-particles.  :doc:`Fix nvt/sllod <fix_nvt_sllod>` also does this, except
+particles.  :doc:`Fix nvt/sllod <fix_nvt_sllod>` also does this,
-that it subtracts out a velocity bias due to a deforming box and
+except that it subtracts out a velocity bias due to a deforming box
-integrates the SLLOD equations of motion.  See the :doc:`Howto nemd <Howto_nemd>` page for further details.  :doc:`Fix nvt/sphere <fix_nvt_sphere>` and :doc:`fix nvt/asphere <fix_nvt_asphere>` thermostat not only translation
+and integrates the SLLOD equations of motion.  See the :doc:`Howto
-velocities but also rotational velocities for spherical and aspherical
+nemd <Howto_nemd>` page for further details.  :doc:`Fix nvt/sphere
-particles.
+<fix_nvt_sphere>` and :doc:`fix nvt/asphere <fix_nvt_asphere>`
 thermostat not only translation velocities but also rotational
 velocities for spherical and aspherical particles.
 .. note::
@ -40,25 +42,31 @@ particles.
   e.g. molecular systems.  The latter can be tricky to do correctly.
 DPD thermostatting alters pairwise interactions in a manner analogous
-to the per-particle thermostatting of :doc:`fix langevin <fix_langevin>`.
+to the per-particle thermostatting of :doc:`fix langevin
 <fix_langevin>`.
-Any of the thermostatting fixes can be instructed to use custom temperature
+Any of the thermostatting fixes can be instructed to use custom
-computes that remove bias which has two effects:  first, the current
+temperature computes that remove bias which has two effects: first,
-calculated temperature, which is compared to the requested target temperature,
+the current calculated temperature, which is compared to the requested
-is calculated with the velocity bias removed;  second, the thermostat adjusts
+target temperature, is calculated with the velocity bias removed;
-only the thermal temperature component of the particle's velocities, which are
+second, the thermostat adjusts only the thermal temperature component
-the velocities with the bias removed.  The removed bias is then added back
+of the particle's velocities, which are the velocities with the bias
-to the adjusted velocities.  See the doc pages for the individual
+removed.  The removed bias is then added back to the adjusted
-fixes and for the :doc:`fix_modify <fix_modify>` command for
+velocities.  See the doc pages for the individual fixes and for the
-instructions on how to assign a temperature compute to a
+:doc:`fix_modify <fix_modify>` command for instructions on how to
-thermostatting fix.  For example, you can apply a thermostat to only
+assign a temperature compute to a thermostatting fix.
 the x and z components of velocity by using it in conjunction with
 :doc:`compute temp/partial <compute_temp_partial>`.  Of you could
 thermostat only the thermal temperature of a streaming flow of
 particles without affecting the streaming velocity, by using
 :doc:`compute temp/profile <compute_temp_profile>`.
-Below is a list of some custom temperature computes that can be used like that:
+For example, you can apply a thermostat only to atoms in a spatial
 region by using it in conjunction with :doc:`compute temp/region
 <compute_temp_region>`.  Or you can apply a thermostat to only the x
 and z components of velocity by using it with :doc:`compute
 temp/partial <compute_temp_partial>`.  Of you could thermostat only
 the thermal temperature of a streaming flow of particles without
 affecting the streaming velocity, by using :doc:`compute temp/profile
 <compute_temp_profile>`.
 Below is a list of custom temperature computes that can be used like
 that:
 * :doc:`compute_temp_asphere`
 * :doc:`compute_temp_body`
@ -72,8 +80,6 @@ Below is a list of some custom temperature computes that can be used like that:
 * :doc:`compute_temp_rotate`
 * :doc:`compute_temp_sphere`
 .. note::
   Only the nvt fixes perform time integration, meaning they update
@ -86,17 +92,17 @@ Below is a list of some custom temperature computes that can be used like that:
 * :doc:`fix nve/sphere <fix_nve_sphere>`
 * :doc:`fix nve/asphere <fix_nve_asphere>`
-Thermodynamic output, which can be setup via the
+Thermodynamic output, which can be setup via the :doc:`thermo_style
-:doc:`thermo_style <thermo_style>` command, often includes temperature
+<thermo_style>` command, often includes temperature values.  As
-values.  As explained on the page for the
+explained on the page for the :doc:`thermo_style <thermo_style>`
-:doc:`thermo_style <thermo_style>` command, the default temperature is
+command, the default temperature is setup by the thermo command
-setup by the thermo command itself.  It is NOT the temperature
+itself.  It is NOT the temperature associated with any thermostatting
-associated with any thermostatting fix you have defined or with any
+fix you have defined or with any compute you have defined that
-compute you have defined that calculates a temperature.  The doc pages
+calculates a temperature.  The doc pages for the thermostatting fixes
-for the thermostatting fixes explain the ID of the temperature compute
+explain the ID of the temperature compute they create.  Thus if you
-they create.  Thus if you want to view these temperatures, you need to
+want to view these temperatures, you need to specify them explicitly
-specify them explicitly via the :doc:`thermo_style custom <thermo_style>` command.  Or you can use the
+via the :doc:`thermo_style custom <thermo_style>` command.  Or you can
-:doc:`thermo_modify <thermo_modify>` command to re-define what
+use the :doc:`thermo_modify <thermo_modify>` command to re-define what
 temperature compute is used for default thermodynamic output.
 ----------
--- a/doc/src/Intro_citing.rst
+++ b/doc/src/Intro_citing.rst
@ -4,28 +4,41 @@ Citing LAMMPS
 Core Algorithms
 ^^^^^^^^^^^^^^^
-Since LAMMPS is a community project, there is not a single one
+The paper mentioned below is the best overview of LAMMPS, but there are
-publication or reference that describes **all** of LAMMPS.
+also publications describing particular models or algorithms implemented
-The canonical publication that describes the foundation, that is
+in LAMMPS or complementary software that is has interfaces to.  Please
-the basic spatial decomposition approach, the neighbor finding,
+see below for how to cite contributions to LAMMPS.
 and basic communications algorithms used in LAMMPS is:
- `S. Plimpton, Fast Parallel Algorithms for Short-Range Molecular Dynamics, J Comp Phys, 117, 1-19 (1995). <http://www.sandia.gov/~sjplimp/papers/jcompphys95.pdf>`_
+.. _lammps_paper:
-So any project using LAMMPS (or a derivative application using LAMMPS as
+The latest canonical publication that describes the basic features, the
-a simulation engine) should cite this paper. A new publication
+source code design, the program structure, the spatial decomposition
-describing the developments and improvements of LAMMPS in the 25 years
+approach, the neighbor finding, basic communications algorithms, and how
-since then is currently in preparation.
+users and developers have contributed to LAMMPS is:
  `LAMMPS - A flexible simulation tool for particle-based materials modeling at the atomic, meso, and continuum scales, Comp. Phys. Comm. (accepted 09/2021), DOI:10.1016/j.cpc.2021.108171 <https://doi.org/10.1016/j.cpc.2021.108171>`_
 So a project using LAMMPS or a derivative application that uses LAMMPS
 as a simulation engine should cite this paper.  The paper is expected to
 be published in its final form under the same DOI in the first half
 of 2022.  Please also give the URL of the LAMMPS website in your paper,
 namely https://www.lammps.org.
 The original publication describing the parallel algorithms used in the
 initial versions of LAMMPS is:
  `S. Plimpton, Fast Parallel Algorithms for Short-Range Molecular Dynamics, J Comp Phys, 117, 1-19 (1995). <http://www.sandia.gov/~sjplimp/papers/jcompphys95.pdf>`_
 DOI for the LAMMPS code
 ^^^^^^^^^^^^^^^^^^^^^^^
-LAMMPS developers use the `Zenodo service at CERN
+LAMMPS developers use the `Zenodo service at CERN <https://zenodo.org/>`_
-<https://zenodo.org/>`_ to create digital object identifies (DOI) for
+to create digital object identifies (DOI) for stable releases of the
-stable releases of the LAMMPS code. There are two types of DOIs for the
+LAMMPS source code. There are two types of DOIs for the LAMMPS source code.
-LAMMPS source code: the canonical DOI for **all** versions of LAMMPS,
+
-which will always point to the **latest** stable release version is:
+The canonical DOI for **all** versions of LAMMPS, which will always
 point to the **latest** stable release version is:
 - DOI: `10.5281/zenodo.3726416 <https://dx.doi.org/10.5281/zenodo.3726416>`_
@ -45,11 +58,13 @@ about LAMMPS and its features.
 Citing contributions
 ^^^^^^^^^^^^^^^^^^^^
-LAMMPS has many features and that use either previously published
+LAMMPS has many features that use either previously published methods
-methods and algorithms or novel features.  It also includes potential
+and algorithms or novel features.  It also includes potential parameter
-parameter filed for specific models.  Where available, a reminder about
+files for specific models.  Where available, a reminder about references
-references for optional features used in a specific run is printed to
+for optional features used in a specific run is printed to the screen
-the screen and log file.  Style and output location can be selected with
+and log file.  Style and output location can be selected with the
-the :ref:`-cite command-line switch <cite>`.  Additional references are
+:ref:`-cite command-line switch <cite>`.  Additional references are
 given in the documentation of the :doc:`corresponding commands
-<Commands_all>` or in the :doc:`Howto tutorials <Howto>`.
+<Commands_all>` or in the :doc:`Howto tutorials <Howto>`.  So please
 make certain, that you provide the proper acknowledgments and citations
 in any published works using LAMMPS.
--- a/doc/src/Library_create.rst
+++ b/doc/src/Library_create.rst
@ -34,7 +34,7 @@ simple example demonstrating its use:
     int lmpargc = sizeof(lmpargv)/sizeof(const char *);
     /* create LAMMPS instance */
-     handle = lammps_open_no_mpi(lmpargc, lmpargv, NULL);
+     handle = lammps_open_no_mpi(lmpargc, (char **)lmpargv, NULL);
     if (handle == NULL) {
       printf("LAMMPS initialization failed");
       lammps_mpi_finalize();
--- a/doc/src/PDF/colvars-refman-lammps.pdf
+++ b/doc/src/PDF/colvars-refman-lammps.pdf
--- a/doc/src/Run_basics.rst
+++ b/doc/src/Run_basics.rst
@ -2,17 +2,25 @@ Basics of running LAMMPS
 ========================
 LAMMPS is run from the command line, reading commands from a file via
-the -in command line flag, or from standard input.
+the -in command line flag, or from standard input.  Using the "-in
-Using the "-in in.file" variant is recommended:
+in.file" variant is recommended (see note below).  The name of the
 LAMMPS executable is either ``lmp`` or ``lmp_<machine>`` with
 `<machine>` being the machine string used when compiling LAMMPS.  This
 is required when compiling LAMMPS with the traditional build system
 (e.g. with ``make mpi``), but optional when using CMake to configure and
 build LAMMPS:
 .. code-block:: bash
   $ lmp_serial -in in.file
   $ lmp_serial < in.file
   $ lmp -in in.file
   $ lmp < in.file
   $ /path/to/lammps/src/lmp_serial -i in.file
   $ mpirun -np 4 lmp_mpi -in in.file
   $ mpiexec -np 4 lmp -in in.file
   $ mpirun -np 8 /path/to/lammps/src/lmp_mpi -in in.file
-   $ mpirun -np 6 /usr/local/bin/lmp -in in.file
+   $ mpiexec -n 6 /usr/local/bin/lmp -in in.file
 You normally run the LAMMPS command in the directory where your input
 script is located.  That is also where output files are produced by
@ -23,7 +31,7 @@ executable itself can be placed elsewhere.
 .. note::
   The redirection operator "<" will not always work when running
-   in parallel with mpirun; for those systems the -in form is required.
+   in parallel with mpirun or mpiexec; for those systems the -in form is required.
 As LAMMPS runs it prints info to the screen and a logfile named
 *log.lammps*\ .  More info about output is given on the
--- a/doc/src/fix_langevin.rst
+++ b/doc/src/fix_langevin.rst
@ -138,16 +138,18 @@ temperature with optional time-dependence as well.
 Like other fixes that perform thermostatting, this fix can be used
 with :doc:`compute commands <compute>` that remove a "bias" from the
-atom velocities.  E.g. removing the center-of-mass velocity from a
+atom velocities.  E.g. to apply the thermostat only to atoms within a
-group of atoms or removing the x-component of velocity from the
+spatial :doc:`region <region>`, or to remove the center-of-mass
-calculation.  This is not done by default, but only if the
+velocity from a group of atoms, or to remove the x-component of
-:doc:`fix_modify <fix_modify>` command is used to assign a temperature
+velocity from the calculation.
-compute to this fix that includes such a bias term.  See the doc pages
+
-for individual :doc:`compute commands <compute>` to determine which ones
+This is not done by default, but only if the :doc:`fix_modify
-include a bias.  In this case, the thermostat works in the following
+<fix_modify>` command is used to assign a temperature compute to this
-manner: bias is removed from each atom, thermostatting is performed on
+fix that includes such a bias term.  See the doc pages for individual
-the remaining thermal degrees of freedom, and the bias is added back
+:doc:`compute temp commands <compute>` to determine which ones include
-in.
+a bias.  In this case, the thermostat works in the following manner:
 bias is removed from each atom, thermostatting is performed on the
 remaining thermal degrees of freedom, and the bias is added back in.
 The *damp* parameter is specified in time units and determines how
 rapidly the temperature is relaxed.  For example, a value of 100.0 means
@ -183,7 +185,8 @@ omega (which is derived from the angular momentum in the case of
 aspherical particles).
 The rotational temperature of the particles can be monitored by the
-:doc:`compute temp/sphere <compute_temp_sphere>` and :doc:`compute temp/asphere <compute_temp_asphere>` commands with their rotate
+:doc:`compute temp/sphere <compute_temp_sphere>` and :doc:`compute
 temp/asphere <compute_temp_asphere>` commands with their rotate
 options.
 For the *omega* keyword there is also a scale factor of
--- a/doc/src/fix_langevin_drude.rst
+++ b/doc/src/fix_langevin_drude.rst
@ -167,17 +167,20 @@ functions, and include :doc:`thermo_style <thermo_style>` command
 keywords for the simulation box parameters and timestep and elapsed
 time.  Thus it is easy to specify a time-dependent temperature.
-Like other fixes that perform thermostatting, this fix can be used with
+Like other fixes that perform thermostatting, this fix can be used
-:doc:`compute commands <compute>` that remove a "bias" from the atom
+with :doc:`compute commands <compute>` that remove a "bias" from the
-velocities.  E.g. removing the center-of-mass velocity from a group of
+atom velocities.  E.g. to apply the thermostat only to atoms within a
-atoms.  This is not done by default, but only if the
+spatial :doc:`region <region>`, or to remove the center-of-mass
-:doc:`fix_modify <fix_modify>` command is used to assign a temperature
+velocity from a group of atoms, or to remove the x-component of
-compute to this fix that includes such a bias term.  See the doc pages
+velocity from the calculation.
-for individual :doc:`compute commands <compute>` to determine which ones
+
-include a bias.  In this case, the thermostat works in the following
+This is not done by default, but only if the :doc:`fix_modify
-manner: bias is removed from each atom, thermostatting is performed on
+<fix_modify>` command is used to assign a temperature compute to this
-the remaining thermal degrees of freedom, and the bias is added back
+fix that includes such a bias term.  See the doc pages for individual
-in.  NOTE: this feature has not been tested.
+:doc:`compute temp commands <compute>` to determine which ones include
 a bias.  In this case, the thermostat works in the following manner:
 bias is removed from each atom, thermostatting is performed on the
 remaining thermal degrees of freedom, and the bias is added back in.
 Note: The temperature thermostatting the core-Drude particle pairs
 should be chosen low enough, so as to mimic as closely as possible the
--- a/doc/src/fix_nh.rst
+++ b/doc/src/fix_nh.rst
@ -486,19 +486,20 @@ temperature or pressure during thermodynamic output via the
 compute-ID.  It also means that changing attributes of *thermo_temp*
 or *thermo_press* will have no effect on this fix.
-Like other fixes that perform thermostatting, fix nvt and fix npt can
+Like other fixes that perform thermostatting, this fix can be used
-be used with :doc:`compute commands <compute>` that calculate a
+with :doc:`compute commands <compute>` that remove a "bias" from the
-temperature after removing a "bias" from the atom velocities.
+atom velocities.  E.g. to apply the thermostat only to atoms within a
-E.g. removing the center-of-mass velocity from a group of atoms or
+spatial :doc:`region <region>`, or to remove the center-of-mass
-only calculating temperature on the x-component of velocity or only
+velocity from a group of atoms, or to remove the x-component of
-calculating temperature for atoms in a geometric region.  This is not
+velocity from the calculation.
-done by default, but only if the :doc:`fix_modify <fix_modify>` command
+
-is used to assign a temperature compute to this fix that includes such
+This is not done by default, but only if the :doc:`fix_modify
-a bias term.  See the doc pages for individual :doc:`compute commands <compute>` to determine which ones include a bias.  In
+<fix_modify>` command is used to assign a temperature compute to this
-this case, the thermostat works in the following manner: the current
+fix that includes such a bias term.  See the doc pages for individual
-temperature is calculated taking the bias into account, bias is
+:doc:`compute temp commands <compute>` to determine which ones include
-removed from each atom, thermostatting is performed on the remaining
+a bias.  In this case, the thermostat works in the following manner:
-thermal degrees of freedom, and the bias is added back in.
+bias is removed from each atom, thermostatting is performed on the
 remaining thermal degrees of freedom, and the bias is added back in.
 ----------
--- a/doc/src/fix_npt_asphere.rst
+++ b/doc/src/fix_npt_asphere.rst
@ -48,8 +48,9 @@ can also have a bias velocity removed from them before thermostatting
 takes place; see the description below.
 Additional parameters affecting the thermostat and barostat are
-specified by keywords and values documented with the :doc:`fix npt <fix_nh>` command.  See, for example, discussion of the *temp*,
+specified by keywords and values documented with the :doc:`fix npt
-*iso*, *aniso*, and *dilate* keywords.
+<fix_nh>` command.  See, for example, discussion of the *temp*, *iso*,
 *aniso*, and *dilate* keywords.
 The particles in the fix group are the only ones whose velocities and
 positions are updated by the velocity/position update portion of the
@ -89,18 +90,19 @@ It also means that changing attributes of *thermo_temp* or
 *thermo_press* will have no effect on this fix.
 Like other fixes that perform thermostatting, this fix can be used
-with :doc:`compute commands <compute>` that calculate a temperature
+with :doc:`compute commands <compute>` that remove a "bias" from the
-after removing a "bias" from the atom velocities.  E.g. removing the
+atom velocities.  E.g. to apply the thermostat only to atoms within a
-center-of-mass velocity from a group of atoms or only calculating
+spatial :doc:`region <region>`, or to remove the center-of-mass
-temperature on the x-component of velocity or only calculating
+velocity from a group of atoms, or to remove the x-component of
-temperature for atoms in a geometric region.  This is not done by
+velocity from the calculation.
-default, but only if the :doc:`fix_modify <fix_modify>` command is used
+
-to assign a temperature compute to this fix that includes such a bias
+This is not done by default, but only if the :doc:`fix_modify
-term.  See the doc pages for individual :doc:`compute commands <compute>` to determine which ones include a bias.  In
+<fix_modify>` command is used to assign a temperature compute to this
-this case, the thermostat works in the following manner: the current
+fix that includes such a bias term.  See the doc pages for individual
-temperature is calculated taking the bias into account, bias is
+:doc:`compute temp commands <compute>` to determine which ones include
-removed from each atom, thermostatting is performed on the remaining
+a bias.  In this case, the thermostat works in the following manner:
-thermal degrees of freedom, and the bias is added back in.
+bias is removed from each atom, thermostatting is performed on the
 remaining thermal degrees of freedom, and the bias is added back in.
 ----------
--- a/doc/src/fix_npt_body.rst
+++ b/doc/src/fix_npt_body.rst
@ -87,18 +87,19 @@ It also means that changing attributes of *thermo_temp* or
 *thermo_press* will have no effect on this fix.
 Like other fixes that perform thermostatting, this fix can be used
-with :doc:`compute commands <compute>` that calculate a temperature
+with :doc:`compute commands <compute>` that remove a "bias" from the
-after removing a "bias" from the atom velocities.  E.g. removing the
+atom velocities.  E.g. to apply the thermostat only to atoms within a
-center-of-mass velocity from a group of atoms or only calculating
+spatial :doc:`region <region>`, or to remove the center-of-mass
-temperature on the x-component of velocity or only calculating
+velocity from a group of atoms, or to remove the x-component of
-temperature for atoms in a geometric region.  This is not done by
+velocity from the calculation.
-default, but only if the :doc:`fix_modify <fix_modify>` command is used
+
-to assign a temperature compute to this fix that includes such a bias
+This is not done by default, but only if the :doc:`fix_modify
-term.  See the doc pages for individual :doc:`compute commands <compute>` to determine which ones include a bias.  In
+<fix_modify>` command is used to assign a temperature compute to this
-this case, the thermostat works in the following manner: the current
+fix that includes such a bias term.  See the doc pages for individual
-temperature is calculated taking the bias into account, bias is
+:doc:`compute temp commands <compute>` to determine which ones include
-removed from each atom, thermostatting is performed on the remaining
+a bias.  In this case, the thermostat works in the following manner:
-thermal degrees of freedom, and the bias is added back in.
+bias is removed from each atom, thermostatting is performed on the
 remaining thermal degrees of freedom, and the bias is added back in.
 ----------
--- a/doc/src/fix_npt_cauchy.rst
+++ b/doc/src/fix_npt_cauchy.rst
@ -400,19 +400,20 @@ temperature or pressure during thermodynamic output via the
 compute-ID.  It also means that changing attributes of *thermo_temp*
 or *thermo_press* will have no effect on this fix.
-Like other fixes that perform thermostatting, fix npt/cauchy can
+Like other fixes that perform thermostatting, this fix can be used
-be used with :doc:`compute commands <compute>` that calculate a
+with :doc:`compute commands <compute>` that remove a "bias" from the
-temperature after removing a "bias" from the atom velocities.
+atom velocities.  E.g. to apply the thermostat only to atoms within a
-E.g. removing the center-of-mass velocity from a group of atoms or
+spatial :doc:`region <region>`, or to remove the center-of-mass
-only calculating temperature on the x-component of velocity or only
+velocity from a group of atoms, or to remove the x-component of
-calculating temperature for atoms in a geometric region.  This is not
+velocity from the calculation.
-done by default, but only if the :doc:`fix_modify <fix_modify>` command
+
-is used to assign a temperature compute to this fix that includes such
+This is not done by default, but only if the :doc:`fix_modify
-a bias term.  See the doc pages for individual :doc:`compute commands <compute>` to determine which ones include a bias.  In
+<fix_modify>` command is used to assign a temperature compute to this
-this case, the thermostat works in the following manner: the current
+fix that includes such a bias term.  See the doc pages for individual
-temperature is calculated taking the bias into account, bias is
+:doc:`compute temp commands <compute>` to determine which ones include
-removed from each atom, thermostatting is performed on the remaining
+a bias.  In this case, the thermostat works in the following manner:
-thermal degrees of freedom, and the bias is added back in.
+bias is removed from each atom, thermostatting is performed on the
 remaining thermal degrees of freedom, and the bias is added back in.
 ----------
--- a/doc/src/fix_npt_sphere.rst
+++ b/doc/src/fix_npt_sphere.rst
@ -103,18 +103,19 @@ appropriate compute-ID.  It also means that changing attributes of
 *thermo_temp* or *thermo_press* will have no effect on this fix.
 Like other fixes that perform thermostatting, this fix can be used
-with :doc:`compute commands <compute>` that calculate a temperature
+with :doc:`compute commands <compute>` that remove a "bias" from the
-after removing a "bias" from the atom velocities.  E.g. removing the
+atom velocities.  E.g. to apply the thermostat only to atoms within a
-center-of-mass velocity from a group of atoms or only calculating
+spatial :doc:`region <region>`, or to remove the center-of-mass
-temperature on the x-component of velocity or only calculating
+velocity from a group of atoms, or to remove the x-component of
-temperature for atoms in a geometric region.  This is not done by
+velocity from the calculation.
-default, but only if the :doc:`fix_modify <fix_modify>` command is used
+
-to assign a temperature compute to this fix that includes such a bias
+This is not done by default, but only if the :doc:`fix_modify
-term.  See the doc pages for individual :doc:`compute commands <compute>` to determine which ones include a bias.  In
+<fix_modify>` command is used to assign a temperature compute to this
-this case, the thermostat works in the following manner: the current
+fix that includes such a bias term.  See the doc pages for individual
-temperature is calculated taking the bias into account, bias is
+:doc:`compute temp commands <compute>` to determine which ones include
-removed from each atom, thermostatting is performed on the remaining
+a bias.  In this case, the thermostat works in the following manner:
-thermal degrees of freedom, and the bias is added back in.
+bias is removed from each atom, thermostatting is performed on the
 remaining thermal degrees of freedom, and the bias is added back in.
 ----------
--- a/doc/src/fix_nvt_asphere.rst
+++ b/doc/src/fix_nvt_asphere.rst
@ -72,18 +72,19 @@ It also means that changing attributes of *thermo_temp* will have no
 effect on this fix.
 Like other fixes that perform thermostatting, this fix can be used
-with :doc:`compute commands <compute>` that calculate a temperature
+with :doc:`compute commands <compute>` that remove a "bias" from the
-after removing a "bias" from the atom velocities.  E.g. removing the
+atom velocities.  E.g. to apply the thermostat only to atoms within a
-center-of-mass velocity from a group of atoms or only calculating
+spatial :doc:`region <region>`, or to remove the center-of-mass
-temperature on the x-component of velocity or only calculating
+velocity from a group of atoms, or to remove the x-component of
-temperature for atoms in a geometric region.  This is not done by
+velocity from the calculation.
-default, but only if the :doc:`fix_modify <fix_modify>` command is used
+
-to assign a temperature compute to this fix that includes such a bias
+This is not done by default, but only if the :doc:`fix_modify
-term.  See the doc pages for individual :doc:`compute commands <compute>` to determine which ones include a bias.  In
+<fix_modify>` command is used to assign a temperature compute to this
-this case, the thermostat works in the following manner: the current
+fix that includes such a bias term.  See the doc pages for individual
-temperature is calculated taking the bias into account, bias is
+:doc:`compute temp commands <compute>` to determine which ones include
-removed from each atom, thermostatting is performed on the remaining
+a bias.  In this case, the thermostat works in the following manner:
-thermal degrees of freedom, and the bias is added back in.
+bias is removed from each atom, thermostatting is performed on the
 remaining thermal degrees of freedom, and the bias is added back in.
 ----------
--- a/doc/src/fix_nvt_body.rst
+++ b/doc/src/fix_nvt_body.rst
@ -69,18 +69,19 @@ It also means that changing attributes of *thermo_temp* will have no
 effect on this fix.
 Like other fixes that perform thermostatting, this fix can be used
-with :doc:`compute commands <compute>` that calculate a temperature
+with :doc:`compute commands <compute>` that remove a "bias" from the
-after removing a "bias" from the atom velocities.  E.g. removing the
+atom velocities.  E.g. to apply the thermostat only to atoms within a
-center-of-mass velocity from a group of atoms or only calculating
+spatial :doc:`region <region>`, or to remove the center-of-mass
-temperature on the x-component of velocity or only calculating
+velocity from a group of atoms, or to remove the x-component of
-temperature for atoms in a geometric region.  This is not done by
+velocity from the calculation.
-default, but only if the :doc:`fix_modify <fix_modify>` command is used
+
-to assign a temperature compute to this fix that includes such a bias
+This is not done by default, but only if the :doc:`fix_modify
-term.  See the doc pages for individual :doc:`compute commands <compute>` to determine which ones include a bias.  In
+<fix_modify>` command is used to assign a temperature compute to this
-this case, the thermostat works in the following manner: the current
+fix that includes such a bias term.  See the doc pages for individual
-temperature is calculated taking the bias into account, bias is
+:doc:`compute temp commands <compute>` to determine which ones include
-removed from each atom, thermostatting is performed on the remaining
+a bias.  In this case, the thermostat works in the following manner:
-thermal degrees of freedom, and the bias is added back in.
+bias is removed from each atom, thermostatting is performed on the
 remaining thermal degrees of freedom, and the bias is added back in.
 ----------
--- a/doc/src/fix_nvt_sllod.rst
+++ b/doc/src/fix_nvt_sllod.rst
@ -37,15 +37,16 @@ trajectory consistent with the canonical ensemble.
 This thermostat is used for a simulation box that is changing size
 and/or shape, for example in a non-equilibrium MD (NEMD) simulation.
-The size/shape change is induced by use of the :doc:`fix deform <fix_deform>` command, so each point in the simulation box
+The size/shape change is induced by use of the :doc:`fix deform
-can be thought of as having a "streaming" velocity.  This
+<fix_deform>` command, so each point in the simulation box can be
-position-dependent streaming velocity is subtracted from each atom's
+thought of as having a "streaming" velocity.  This position-dependent
-actual velocity to yield a thermal velocity which is used for
+streaming velocity is subtracted from each atom's actual velocity to
-temperature computation and thermostatting.  For example, if the box
+yield a thermal velocity which is used for temperature computation and
-is being sheared in x, relative to y, then points at the bottom of the
+thermostatting.  For example, if the box is being sheared in x,
-box (low y) have a small x velocity, while points at the top of the
+relative to y, then points at the bottom of the box (low y) have a
-box (hi y) have a large x velocity.  These velocities do not
+small x velocity, while points at the top of the box (hi y) have a
-contribute to the thermal "temperature" of the atom.
+large x velocity.  These velocities do not contribute to the thermal
 "temperature" of the atom.
 .. note::
@ -60,13 +61,15 @@ contribute to the thermal "temperature" of the atom.
   consistent.
 The SLLOD equations of motion, originally proposed by Hoover and Ladd
-(see :ref:`(Evans and Morriss) <Evans3>`), were proven to be equivalent to
+(see :ref:`(Evans and Morriss) <Evans3>`), were proven to be
-Newton's equations of motion for shear flow by :ref:`(Evans and Morriss) <Evans3>`. They were later shown to generate the desired
+equivalent to Newton's equations of motion for shear flow by
-velocity gradient and the correct production of work by stresses for
+:ref:`(Evans and Morriss) <Evans3>`. They were later shown to generate
-all forms of homogeneous flow by :ref:`(Daivis and Todd) <Daivis>`.  As
+the desired velocity gradient and the correct production of work by
-implemented in LAMMPS, they are coupled to a Nose/Hoover chain
+stresses for all forms of homogeneous flow by :ref:`(Daivis and Todd)
-thermostat in a velocity Verlet formulation, closely following the
+<Daivis>`.  As implemented in LAMMPS, they are coupled to a
-implementation used for the :doc:`fix nvt <fix_nh>` command.
+Nose/Hoover chain thermostat in a velocity Verlet formulation, closely
 following the implementation used for the :doc:`fix nvt <fix_nh>`
 command.
 .. note::
@ -94,27 +97,28 @@ underscore + "temp", and the group for the new compute is the same as
 the fix group.
 Note that this is NOT the compute used by thermodynamic output (see
-the :doc:`thermo_style <thermo_style>` command) with ID = *thermo_temp*.
+the :doc:`thermo_style <thermo_style>` command) with ID =
-This means you can change the attributes of this fix's temperature
+*thermo_temp*.  This means you can change the attributes of this fix's
-(e.g. its degrees-of-freedom) via the
+temperature (e.g. its degrees-of-freedom) via the :doc:`compute_modify
-:doc:`compute_modify <compute_modify>` command or print this temperature
+<compute_modify>` command or print this temperature during
-during thermodynamic output via the :doc:`thermo_style custom <thermo_style>` command using the appropriate compute-ID.
+thermodynamic output via the :doc:`thermo_style custom <thermo_style>`
-It also means that changing attributes of *thermo_temp* will have no
+command using the appropriate compute-ID.  It also means that changing
-effect on this fix.
+attributes of *thermo_temp* will have no effect on this fix.
 Like other fixes that perform thermostatting, this fix can be used
-with :doc:`compute commands <compute>` that calculate a temperature
+with :doc:`compute commands <compute>` that remove a "bias" from the
-after removing a "bias" from the atom velocities.  E.g. removing the
+atom velocities.  E.g. to apply the thermostat only to atoms within a
-center-of-mass velocity from a group of atoms or only calculating
+spatial :doc:`region <region>`, or to remove the center-of-mass
-temperature on the x-component of velocity or only calculating
+velocity from a group of atoms, or to remove the x-component of
-temperature for atoms in a geometric region.  This is not done by
+velocity from the calculation.
-default, but only if the :doc:`fix_modify <fix_modify>` command is used
+
-to assign a temperature compute to this fix that includes such a bias
+This is not done by default, but only if the :doc:`fix_modify
-term.  See the doc pages for individual :doc:`compute commands <compute>` to determine which ones include a bias.  In
+<fix_modify>` command is used to assign a temperature compute to this
-this case, the thermostat works in the following manner: the current
+fix that includes such a bias term.  See the doc pages for individual
-temperature is calculated taking the bias into account, bias is
+:doc:`compute temp commands <compute>` to determine which ones include
-removed from each atom, thermostatting is performed on the remaining
+a bias.  In this case, the thermostat works in the following manner:
-thermal degrees of freedom, and the bias is added back in.
+bias is removed from each atom, thermostatting is performed on the
 remaining thermal degrees of freedom, and the bias is added back in.
 ----------
--- a/doc/src/fix_nvt_sphere.rst
+++ b/doc/src/fix_nvt_sphere.rst
@ -86,18 +86,19 @@ It also means that changing attributes of *thermo_temp* will have no
 effect on this fix.
 Like other fixes that perform thermostatting, this fix can be used
-with :doc:`compute commands <compute>` that calculate a temperature
+with :doc:`compute commands <compute>` that remove a "bias" from the
-after removing a "bias" from the atom velocities.  E.g. removing the
+atom velocities.  E.g. to apply the thermostat only to atoms within a
-center-of-mass velocity from a group of atoms or only calculating
+spatial :doc:`region <region>`, or to remove the center-of-mass
-temperature on the x-component of velocity or only calculating
+velocity from a group of atoms, or to remove the x-component of
-temperature for atoms in a geometric region.  This is not done by
+velocity from the calculation.
-default, but only if the :doc:`fix_modify <fix_modify>` command is used
+
-to assign a temperature compute to this fix that includes such a bias
+This is not done by default, but only if the :doc:`fix_modify
-term.  See the doc pages for individual :doc:`compute commands <compute>` to determine which ones include a bias.  In
+<fix_modify>` command is used to assign a temperature compute to this
-this case, the thermostat works in the following manner: the current
+fix that includes such a bias term.  See the doc pages for individual
-temperature is calculated taking the bias into account, bias is
+:doc:`compute temp commands <compute>` to determine which ones include
-removed from each atom, thermostatting is performed on the remaining
+a bias.  In this case, the thermostat works in the following manner:
-thermal degrees of freedom, and the bias is added back in.
+bias is removed from each atom, thermostatting is performed on the
 remaining thermal degrees of freedom, and the bias is added back in.
 ----------
--- a/doc/src/fix_saed_vtk.rst
+++ b/doc/src/fix_saed_vtk.rst
@ -28,7 +28,6 @@ Syntax
         Nstart = start averaging on this timestep
       *file* arg = filename
         filename = name of file to output time averages to
       *overwrite* arg = none = overwrite output file with only latest output
 Examples
 """"""""
@ -161,10 +160,6 @@ the *file* keyword and this string is appended with _N.vtk where N is
 an index (0,1,2...) to account for situations with multiple diffraction
 intensity outputs.
 The *overwrite* keyword will continuously overwrite the output file
 with the latest output, so that it only contains one timestep worth of
 output.  This option can only be used with the *ave running* setting.
 Restart, fix_modify, output, run start/stop, minimize info
 """""""""""""""""""""""""""""""""""""""""""""""""""""""""""
--- a/doc/src/fix_temp_berendsen.rst
+++ b/doc/src/fix_temp_berendsen.rst
@ -102,18 +102,19 @@ It also means that changing attributes of *thermo_temp* will have no
 effect on this fix.
 Like other fixes that perform thermostatting, this fix can be used
-with :doc:`compute commands <compute>` that calculate a temperature
+with :doc:`compute commands <compute>` that remove a "bias" from the
-after removing a "bias" from the atom velocities.  E.g. removing the
+atom velocities.  E.g. to apply the thermostat only to atoms within a
-center-of-mass velocity from a group of atoms or only calculating
+spatial :doc:`region <region>`, or to remove the center-of-mass
-temperature on the x-component of velocity or only calculating
+velocity from a group of atoms, or to remove the x-component of
-temperature for atoms in a geometric region.  This is not done by
+velocity from the calculation.
-default, but only if the :doc:`fix_modify <fix_modify>` command is used
+
-to assign a temperature compute to this fix that includes such a bias
+This is not done by default, but only if the :doc:`fix_modify
-term.  See the doc pages for individual :doc:`compute commands <compute>` to determine which ones include a bias.  In
+<fix_modify>` command is used to assign a temperature compute to this
-this case, the thermostat works in the following manner: the current
+fix that includes such a bias term.  See the doc pages for individual
-temperature is calculated taking the bias into account, bias is
+:doc:`compute temp commands <compute>` to determine which ones include
-removed from each atom, thermostatting is performed on the remaining
+a bias.  In this case, the thermostat works in the following manner:
-thermal degrees of freedom, and the bias is added back in.
+bias is removed from each atom, thermostatting is performed on the
 remaining thermal degrees of freedom, and the bias is added back in.
 ----------
--- a/doc/src/fix_temp_csvr.rst
+++ b/doc/src/fix_temp_csvr.rst
@ -110,28 +110,29 @@ during thermodynamic output via the :doc:`thermo_style custom <thermo_style>` co
 It also means that changing attributes of *thermo_temp* will have no
 effect on this fix.
-Like other fixes that perform thermostatting, these fixes can be used
+Like other fixes that perform thermostatting, this fix can be used
-with :doc:`compute commands <compute>` that calculate a temperature
+with :doc:`compute commands <compute>` that remove a "bias" from the
-after removing a "bias" from the atom velocities.  E.g. removing the
+atom velocities.  E.g. to apply the thermostat only to atoms within a
-center-of-mass velocity from a group of atoms or only calculating
+spatial :doc:`region <region>`, or to remove the center-of-mass
-temperature on the x-component of velocity or only calculating
+velocity from a group of atoms, or to remove the x-component of
-temperature for atoms in a geometric region.  This is not done by
+velocity from the calculation.
-default, but only if the :doc:`fix_modify <fix_modify>` command is used
+
-to assign a temperature compute to this fix that includes such a bias
+This is not done by default, but only if the :doc:`fix_modify
-term.  See the doc pages for individual :doc:`compute commands <compute>` to determine which ones include a bias.  In
+<fix_modify>` command is used to assign a temperature compute to this
-this case, the thermostat works in the following manner: the current
+fix that includes such a bias term.  See the doc pages for individual
-temperature is calculated taking the bias into account, bias is
+:doc:`compute temp commands <compute>` to determine which ones include
-removed from each atom, thermostatting is performed on the remaining
+a bias.  In this case, the thermostat works in the following manner:
-thermal degrees of freedom, and the bias is added back in.
+bias is removed from each atom, thermostatting is performed on the
 remaining thermal degrees of freedom, and the bias is added back in.
 An important feature of these thermostats is that they have an
-associated effective energy that is a constant of motion.
+associated effective energy that is a constant of motion.  The
-The effective energy is the total energy (kinetic + potential) plus
+effective energy is the total energy (kinetic + potential) plus the
-the accumulated kinetic energy changes due to the thermostat. The
+accumulated kinetic energy changes due to the thermostat. The latter
-latter quantity is the global scalar computed by these fixes. This
+quantity is the global scalar computed by these fixes. This feature is
-feature is useful to check the integration of the equations of motion
+useful to check the integration of the equations of motion against
-against discretization errors. In other words, the conservation of
+discretization errors. In other words, the conservation of the
-the effective energy can be used to choose an appropriate integration
+effective energy can be used to choose an appropriate integration
 :doc:`timestep <timestep>`. This is similar to the usual paradigm of
 checking the conservation of the total energy in the microcanonical
 ensemble.
--- a/doc/src/fix_temp_rescale.rst
+++ b/doc/src/fix_temp_rescale.rst
@ -109,19 +109,19 @@ command using the appropriate compute-ID.  It also means that changing
 attributes of *thermo_temp* will have no effect on this fix.
 Like other fixes that perform thermostatting, this fix can be used
-with :doc:`compute commands <compute>` that calculate a temperature
+with :doc:`compute commands <compute>` that remove a "bias" from the
-after removing a "bias" from the atom velocities.  E.g. removing the
+atom velocities.  E.g. to apply the thermostat only to atoms within a
-center-of-mass velocity from a group of atoms or only calculating
+spatial :doc:`region <region>`, or to remove the center-of-mass
-temperature on the x-component of velocity or only calculating
+velocity from a group of atoms, or to remove the x-component of
-temperature for atoms in a geometric region.  This is not done by
+velocity from the calculation.
-default, but only if the :doc:`fix_modify <fix_modify>` command is
+
-used to assign a temperature compute to this fix that includes such a
+This is not done by default, but only if the :doc:`fix_modify
-bias term.  See the doc pages for individual :doc:`compute commands
+<fix_modify>` command is used to assign a temperature compute to this
-<compute>` to determine which ones include a bias.  In this case, the
+fix that includes such a bias term.  See the doc pages for individual
-thermostat works in the following manner: the current temperature is
+:doc:`compute temp commands <compute>` to determine which ones include
-calculated taking the bias into account, bias is removed from each
+a bias.  In this case, the thermostat works in the following manner:
-atom, thermostatting is performed on the remaining thermal degrees of
+bias is removed from each atom, thermostatting is performed on the
-freedom, and the bias is added back in.
+remaining thermal degrees of freedom, and the bias is added back in.
 ----------
--- a/doc/src/fix_tgnh_drude.rst
+++ b/doc/src/fix_tgnh_drude.rst
@ -187,26 +187,32 @@ barostatting.
 ----------
-Like other fixes that perform thermostatting, these fixes can
+Like other fixes that perform thermostatting, this fix can be used
-be used with :doc:`compute commands <compute>` that calculate a
+with :doc:`compute commands <compute>` that remove a "bias" from the
-temperature after removing a "bias" from the atom velocities.
+atom velocities.  E.g. to apply the thermostat only to atoms within a
-This is not done by default, but only if the :doc:`fix_modify <fix_modify>` command
+spatial :doc:`region <region>`, or to remove the center-of-mass
-is used to assign a temperature compute to this fix that includes such
+velocity from a group of atoms, or to remove the x-component of
-a bias term.  See the doc pages for individual :doc:`compute commands <compute>` to determine which ones include a bias.  In
+velocity from the calculation.
-this case, the thermostat works in the following manner: the current
+
-temperature is calculated taking the bias into account, bias is
+This is not done by default, but only if the :doc:`fix_modify
-removed from each atom, thermostatting is performed on the remaining
+<fix_modify>` command is used to assign a temperature compute to this
-thermal DOF, and the bias is added back in.
+fix that includes such a bias term.  See the doc pages for individual
 :doc:`compute temp commands <compute>` to determine which ones include
 a bias.  In this case, the thermostat works in the following manner:
 bias is removed from each atom, thermostatting is performed on the
 remaining thermal degrees of freedom, and the bias is added back in.
 .. note::
-   However, not all temperature compute commands are valid to be used with these fixes.
+   However, not all temperature compute commands are valid to be used
-   Precisely, only temperature compute that does not modify the DOF of the group can be used.
+   with these fixes.  Precisely, only temperature compute that does
-   E.g. :doc:`compute temp/ramp <compute_temp_ramp>` and :doc:`compute viscosity/cos <compute_viscosity_cos>`
+   not modify the DOF of the group can be used.  E.g. :doc:`compute
-   compute the kinetic energy after remove a velocity gradient without affecting the DOF of the group,
+   temp/ramp <compute_temp_ramp>` and :doc:`compute viscosity/cos
-   then they can be invoked in this way.
+   <compute_viscosity_cos>` compute the kinetic energy after remove a
-   In contrast, :doc:`compute temp/partial <compute_temp_partial>` may remove the DOF at one or more dimensions,
+   velocity gradient without affecting the DOF of the group, then they
-   therefore it cannot be used with these fixes.
+   can be invoked in this way.  In contrast, :doc:`compute
   temp/partial <compute_temp_partial>` may remove the DOF at one or
   more dimensions, therefore it cannot be used with these fixes.
 ----------
--- a/doc/src/group.rst
+++ b/doc/src/group.rst
@ -38,7 +38,7 @@ Syntax
       *intersect* args = two or more group IDs
       *dynamic* args = parent-ID keyword value ...
         one or more keyword/value pairs may be appended
-         keyword = *region* or *var* or *every*
+         keyword = *region* or *var* or *property* or *every*
           *region* value = region-ID
           *var* value = name of variable
           *property* value = name of custom integer or floating point vector
--- a/doc/src/img/decomp-balance.png
+++ b/doc/src/img/decomp-balance.png
--- a/doc/src/img/decomp-processors.png
+++ b/doc/src/img/decomp-processors.png
--- a/doc/src/img/decomp-rcb.png
+++ b/doc/src/img/decomp-rcb.png
--- a/doc/src/img/decomp-regular.png
+++ b/doc/src/img/decomp-regular.png
--- a/doc/src/img/domain-decomp.png
+++ b/doc/src/img/domain-decomp.png
--- a/doc/src/img/fft-decomp-parallel.png
+++ b/doc/src/img/fft-decomp-parallel.png
--- a/doc/src/img/ghost-comm.png
+++ b/doc/src/img/ghost-comm.png
--- a/doc/src/img/neigh-stencil.png
+++ b/doc/src/img/neigh-stencil.png
--- a/doc/utils/requirements.txt
+++ b/doc/utils/requirements.txt
@ -1,4 +1,4 @@
-Sphinx==4.0.3
+Sphinx
 sphinxcontrib-spelling
 git+git://github.com/akohlmey/sphinx-fortran@parallel-read
 sphinx_tabs
--- a/doc/utils/sphinx-config/false_positives.txt
+++ b/doc/utils/sphinx-config/false_positives.txt
@ -1135,6 +1135,7 @@ Germann
 Germano
 gerolf
 Gerolf
 getrusage
 Gershgorin
 getter
 gettimeofday
@ -1222,6 +1223,7 @@ Guo
 gw
 gyromagnetic
 gz
 gzip
 gzipped
 Haak
 Hafskjold
@ -1809,6 +1811,7 @@ lyon
 Lysogorskiy
 Lyulin
 lz
 lzma
 Maaravi
 MACHDYN
 machdyn
@ -2262,6 +2265,7 @@ Nmols
 nn
 nnodes
 Nocedal
 nO
 nocite
 nocoeff
 nodeless
@ -2761,6 +2765,7 @@ REAXFF
 ReaxFF
 reaxff
 rebo
 recurse
 recursing
 Ree
 refactored
@ -3440,6 +3445,7 @@ usec
 uSemiParallel
 userguide
 username
 usleep
 usr
 util
 utils
@ -3657,6 +3663,7 @@ Yc
 ycm
 Yeh
 yellowgreen
 yEs
 Yethiraj
 yflag
 yhi
--- a/examples/PACKAGES/charge_regulation/in.chreg-polymer
+++ b/examples/PACKAGES/charge_regulation/in.chreg-polymer
@ -8,7 +8,7 @@ bond_style      harmonic
 bond_coeff      1 100 1.122462 # K R0
 velocity        all create 1.0 8008 loop geom
-pair_style      lj/cut/coul/long 1.122462 20
+pair_style      lj/cut/coul/long/soft 2 0.5 10.0  1.122462 20
 pair_coeff      * *  1.0 1.0 1.122462 # charges
 kspace_style    pppm 1.0e-3
 pair_modify     shift yes
--- a/examples/plugins/CMakeLists.txt
+++ b/examples/plugins/CMakeLists.txt
@ -31,6 +31,15 @@ else()
  endif()
 endif()
 # ugly hacks for MSVC which by default always reports an old C++ standard in the __cplusplus macro
 # and prints lots of pointless warnings about "unsafe" functions
 #if(MSVC)
 #  add_compile_options(/Zc:__cplusplus)
 #  add_compile_options(/wd4244)
 #  add_compile_options(/wd4267)
 #  add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
 #endif()
 # C++11 is required
 set(CMAKE_CXX_STANDARD 11)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
@ -40,11 +49,6 @@ if((CMAKE_CXX_COMPILER_ID STREQUAL "Intel") OR (CMAKE_CXX_COMPILER_ID STREQUAL "
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -restrict")
 endif()
 # bail out on windows
 if(CMAKE_SYSTEM_NAME STREQUAL Windows)
  message(FATAL_ERROR "LAMMPS plugins are currently not supported on Windows")
 endif()
 set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR})
 include(CheckIncludeFileCXX)
 if(NOT LAMMPS_DIR)
@ -68,14 +72,23 @@ add_library(zero2plugin MODULE zero2plugin.cpp pair_zero2.cpp bond_zero2.cpp
                               angle_zero2.cpp dihedral_zero2.cpp improper_zero2.cpp)
 target_link_libraries(zero2plugin PRIVATE lammps)
-set_target_properties(morse2plugin nve2plugin helloplugin zero2plugin PROPERTIES
+set_target_properties(morse2plugin nve2plugin helloplugin zero2plugin PROPERTIES PREFIX "")
                      PREFIX ""
                      LINK_FLAGS "-rdynamic")
 # MacOS seems to need this
 if(CMAKE_SYSTEM_NAME STREQUAL Darwin)
  set_target_properties(morse2plugin nve2plugin helloplugin zero2plugin
    PROPERTIES LINK_FLAGS "-Wl,-undefined,dynamic_lookup")
 elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows")
 # tell CMake to export all symbols to a .dll on Windows with special case for MinGW cross-compilers
  set_target_properties(morse2plugin nve2plugin helloplugin zero2plugin
    PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS TRUE)
  if(CMAKE_CROSSCOMPILING)
    set_target_properties(morse2plugin nve2plugin helloplugin zero2plugin
      PROPERTIES LINK_FLAGS "-Wl,--export-all-symbols")
  endif()
 else()
  set_target_properties(morse2plugin nve2plugin helloplugin zero2plugin PROPERTIES
-                        LINK_FLAGS "-Wl,-undefined,dynamic_lookup")
+    LINK_FLAGS "-rdynamic")
 endif()
 add_custom_target(plugins ALL ${CMAKE_COMMAND} -E echo "Building Plugins"
--- a/examples/plugins/LAMMPSInterfaceCXX.cmake
+++ b/examples/plugins/LAMMPSInterfaceCXX.cmake
@ -23,7 +23,9 @@ endfunction(validate_option)
 # LAMMPS C++ interface. We only need the header related parts.
 add_library(lammps INTERFACE)
 target_include_directories(lammps INTERFACE ${LAMMPS_HEADER_DIR})
-
+if((CMAKE_SYSTEM_NAME STREQUAL "Windows") AND CMAKE_CROSSCOMPILING)
  target_link_libraries(lammps INTERFACE ${CMAKE_BINARY_DIR}/../liblammps.dll.a)
 endif()
 ################################################################################
 # MPI configuration
 if(NOT CMAKE_CROSSCOMPILING)
--- a/lib/colvars/colvarmodule.cpp
+++ b/lib/colvars/colvarmodule.cpp
@ -1476,7 +1476,9 @@ int colvarmodule::write_output_files()
       bi != biases.end();
       bi++) {
    // Only write output files if they have not already been written this time step
-    if ((*bi)->output_freq == 0 || (cvm::step_absolute() % (*bi)->output_freq) != 0) {
+    if ((*bi)->output_freq == 0    ||
        cvm::step_relative() == 0  ||
        (cvm::step_absolute() % (*bi)->output_freq) != 0) {
      error_code |= (*bi)->write_output_files();
    }
    error_code |= (*bi)->write_state_to_replicas();
--- a/lib/colvars/colvars_version.h
+++ b/lib/colvars/colvars_version.h
@ -1,3 +1,3 @@
 #ifndef COLVARS_VERSION
-#define COLVARS_VERSION "2021-08-06"
+#define COLVARS_VERSION "2021-09-21"
 #endif
--- a/lib/gpu/geryon/ocl_device.h
+++ b/lib/gpu/geryon/ocl_device.h
@ -481,7 +481,7 @@ int UCL_Device::set_platform(int pid) {
      cl_device_id *subdevice_list = new cl_device_id[num_subdevices];
      CL_SAFE_CALL(clCreateSubDevices(device_list[i], props, num_subdevices,
                                      subdevice_list, &num_subdevices));
-      for (int j=0; j<num_subdevices; j++) {
+      for (cl_uint j=0; j<num_subdevices; j++) {
        _cl_devices.push_back(device_list[i]);
        add_properties(device_list[i]);
        _num_devices++;
@ -556,16 +556,22 @@ void UCL_Device::add_properties(cl_device_id device_list) {
                               sizeof(float_width),&float_width,nullptr));
  op.preferred_vector_width32=float_width;
  // Determine if double precision is supported
  cl_uint double_width;
  CL_SAFE_CALL(clGetDeviceInfo(device_list,
                               CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE,
                               sizeof(double_width),&double_width,nullptr));
  op.preferred_vector_width64=double_width;
-  if (double_width==0)
+
-    op.double_precision=false;
+  // Determine if double precision is supported: All bits in the mask must be set.
-  else
+  cl_device_fp_config double_mask = (CL_FP_FMA|CL_FP_ROUND_TO_NEAREST|CL_FP_ROUND_TO_ZERO|
                                     CL_FP_ROUND_TO_INF|CL_FP_INF_NAN|CL_FP_DENORM);
  cl_device_fp_config double_avail;
  CL_SAFE_CALL(clGetDeviceInfo(device_list,CL_DEVICE_DOUBLE_FP_CONFIG,
                               sizeof(double_avail),&double_avail,nullptr));
  if ((double_avail & double_mask) == double_mask)
    op.double_precision=true;
  else
    op.double_precision=false;
  CL_SAFE_CALL(clGetDeviceInfo(device_list,
                               CL_DEVICE_PROFILING_TIMER_RESOLUTION,
@ -629,7 +635,7 @@ void UCL_Device::add_properties(cl_device_id device_list) {
  size_t ext_str_size_ret;
  CL_SAFE_CALL(clGetDeviceInfo(device_list, CL_DEVICE_EXTENSIONS, 0, nullptr,
                               &ext_str_size_ret));
-  char buffer2[ext_str_size_ret];
+  char *buffer2 = new char[ext_str_size_ret];
  CL_SAFE_CALL(clGetDeviceInfo(device_list, CL_DEVICE_EXTENSIONS,
                               ext_str_size_ret, buffer2, nullptr));
  #if defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0)
@ -660,6 +666,7 @@ void UCL_Device::add_properties(cl_device_id device_list) {
    if (arch >= 3.0)
      op.has_shuffle_support=true;
  }
  delete[] buffer2;
  #endif
  _properties.push_back(op);
@ -830,7 +837,7 @@ int UCL_Device::auto_set_platform(const enum UCL_DEVICE_TYPE type,
  bool vendor_match=false;
  bool type_match=false;
-  int max_cus=0;
+  unsigned int max_cus=0;
  int best_platform=0;
  std::string vendor_upper=vendor;
--- a/lib/gpu/lal_born_coul_long.cpp
+++ b/lib/gpu/lal_born_coul_long.cpp
@ -34,7 +34,7 @@ BornCoulLongT::BornCoulLong() : BaseCharge<numtyp,acctyp>(),
 }
 template <class numtyp, class acctyp>
-BornCoulLongT::~BornCoulLongT() {
+BornCoulLongT::~BornCoulLong() {
  clear();
 }
--- a/lib/gpu/lal_born_coul_wolf.cpp
+++ b/lib/gpu/lal_born_coul_wolf.cpp
@ -34,7 +34,7 @@ BornCoulWolfT::BornCoulWolf() : BaseCharge<numtyp,acctyp>(),
 }
 template <class numtyp, class acctyp>
-BornCoulWolfT::~BornCoulWolfT() {
+BornCoulWolfT::~BornCoulWolf() {
  clear();
 }
--- a/lib/gpu/lal_buck_coul_long.cpp
+++ b/lib/gpu/lal_buck_coul_long.cpp
@ -34,7 +34,7 @@ BuckCoulLongT::BuckCoulLong() : BaseCharge<numtyp,acctyp>(),
 }
 template <class numtyp, class acctyp>
-BuckCoulLongT::~BuckCoulLongT() {
+BuckCoulLongT::~BuckCoulLong() {
  clear();
 }
--- a/lib/gpu/lal_device.cpp
+++ b/lib/gpu/lal_device.cpp
@ -333,6 +333,12 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int ngpu,
    gpu_barrier();
  }
  // check if double precision support is available
  #if defined(_SINGLE_DOUBLE) || defined(_DOUBLE_DOUBLE)
  if (!gpu->double_precision())
    return -16;
  #endif
  // Setup auto bin size calculation for calls from atom::sort
  // - This is repeated in neighbor init with additional info
  if (_user_cell_size<0.0) {
@ -546,14 +552,9 @@ int DeviceT::init_nbor(Neighbor *nbor, const int nlocal,
    return -3;
  if (_user_cell_size<0.0) {
    #ifndef LAL_USE_OLD_NEIGHBOR
    _neighbor_shared.setup_auto_cell_size(true,cutoff,nbor->simd_size());
    #else
    _neighbor_shared.setup_auto_cell_size(false,cutoff,nbor->simd_size());
    #endif
  } else
-    _neighbor_shared.setup_auto_cell_size(false,_user_cell_size,
+    _neighbor_shared.setup_auto_cell_size(false,_user_cell_size,nbor->simd_size());
                                          nbor->simd_size());
  nbor->set_cutoff(cutoff);
  return 0;
--- a/lib/machdyn/Install.py
+++ b/lib/machdyn/Install.py
@ -17,11 +17,12 @@ parser = ArgumentParser(prog='Install.py',
 # settings
-version = '3.3.9'
+version = '3.4.0'
 tarball = "eigen.tar.gz"
 # known checksums for different Eigen versions. used to validate the download.
 checksums = { \
              '3.4.0' : '4c527a9171d71a72a9d4186e65bea559', \
              '3.3.9' : '609286804b0f79be622ccf7f9ff2b660', \
              '3.3.7' : '9e30f67e8531477de4117506fe44669b' \
 }
@ -35,7 +36,7 @@ Syntax from src dir: make lib-smd args="-b"
 Syntax from lib dir: python Install.py -b
                 or: python Install.py -p /usr/include/eigen3"
-                 or: python Install.py -v 3.3.7 -b
+                 or: python Install.py -v 3.4.0 -b
 Example:
@ -77,7 +78,7 @@ if pathflag:
 if buildflag:
  print("Downloading Eigen ...")
  eigentar = os.path.join(homepath, tarball)
-  url = "https://gitlab.com/libeigen/eigen/-/archive/%s/eigen-%s.tar.gz" %  (version,version)
+  url = "https://download.lammps.org/thirdparty/eigen-%s.tar.gz" %  version
  geturl(url, eigentar)
  # verify downloaded archive integrity via md5 checksum, if known.
--- a/lib/pace/Makefile
+++ b/lib/pace/Makefile
@ -2,8 +2,8 @@ SHELL = /bin/sh
 # ------ FILES ------
-SRC_FILES = $(wildcard src/ML-PACE/*.cpp)
+SRC_FILES = $(wildcard src/USER-PACE/*.cpp)
-SRC = $(filter-out src/ML-PACE/pair_pace.cpp, $(SRC_FILES))
+SRC = $(filter-out src/USER-PACE/pair_pace.cpp, $(SRC_FILES))
 # ------ DEFINITIONS ------
@ -12,7 +12,7 @@ OBJ =   $(SRC:.cpp=.o)
 # ------ SETTINGS ------
-CXXFLAGS = -O3 -fPIC -Isrc/ML-PACE
+CXXFLAGS = -O3 -fPIC -Isrc/USER-PACE
 ARCHIVE =	ar
 ARCHFLAG =	-rc
--- a/lib/pace/Makefile.lammps
+++ b/lib/pace/Makefile.lammps
@ -1,3 +1,3 @@
-pace_SYSINC =-I../../lib/pace/src/ML-PACE
+pace_SYSINC =-I../../lib/pace/src/USER-PACE
 pace_SYSLIB = -L../../lib/pace/ -lpace
 pace_SYSPATH =
--- a/src/.gitignore
+++ b/src/.gitignore
@ -27,6 +27,9 @@
 /*_ssa.h
 /*_ssa.cpp
 !accelerator_kokkos.h
 !accelerator_omp.h
 /fix_mdi_engine.cpp
 /fix_mdi_engine.h
 /library_mdi.cpp
@ -202,7 +205,6 @@
 /plugin.cpp
 /plugin.h
 /lammpsplugin.h
 /atom_vec_spin.cpp
 /atom_vec_spin.h
@ -265,8 +267,6 @@
 /fix_drag.h
 /fix_numdiff.cpp
 /fix_numdiff.h
 /fix_nve_noforce.cpp
 /fix_nve_noforce.h
 /fix_spring_rg.cpp
 /fix_spring_rg.h
 /fix_temp_csld.cpp
@ -367,8 +367,6 @@
 /atom_vec_dpd.h
 /atom_vec_electron.cpp
 /atom_vec_electron.h
 /atom_vec_ellipsoid.cpp
 /atom_vec_ellipsoid.h
 /atom_vec_full.cpp
 /atom_vec_full.h
 /atom_vec_full_hars.cpp
@ -535,8 +533,6 @@
 /dihedral_harmonic.h
 /dihedral_helix.cpp
 /dihedral_helix.h
 /dihedral_hybrid.cpp
 /dihedral_hybrid.h
 /dihedral_multi_harmonic.cpp
 /dihedral_multi_harmonic.h
 /dihedral_nharmonic.cpp
@ -858,8 +854,6 @@
 /fix_ti_rs.h
 /fix_ti_spring.cpp
 /fix_ti_spring.h
 /fix_ttm.cpp
 /fix_ttm.h
 /fix_tune_kspace.cpp
 /fix_tune_kspace.h
 /fix_wall_body_polygon.cpp
@ -885,8 +879,6 @@
 /fix_widom.cpp
 /fix_widom.h
 /gpu_extra.h
 /gridcomm.cpp
 /gridcomm.h
 /group_ndx.cpp
 /group_ndx.h
 /gz_file_writer.cpp
@ -911,14 +903,13 @@
 /improper_fourier.h
 /improper_harmonic.cpp
 /improper_harmonic.h
 /improper_hybrid.cpp
 /improper_hybrid.h
 /improper_inversion_harmonic.cpp
 /improper_inversion_harmonic.h
 /improper_ring.cpp
 /improper_ring.h
 /improper_umbrella.cpp
 /improper_umbrella.h
 /interlayer_taper.h
 /kissfft.h
 /lj_sdk_common.h
 /math_complex.h
@ -933,7 +924,6 @@
 /msm_cg.h
 /neb.cpp
 /neb.h
 /pair_adp.cpp
 /pair_adp.h
 /pair_agni.cpp
@ -994,6 +984,8 @@
 /pair_cosine_squared.h
 /pair_coul_diel.cpp
 /pair_coul_diel.h
 /pair_coul_exclude.cpp
 /pair_coul_exclude.h
 /pair_coul_long.cpp
 /pair_coul_long.h
 /pair_coul_msm.cpp
@ -1332,8 +1324,6 @@
 /thr_data.h
 /verlet_split.cpp
 /verlet_split.h
 /write_dump.cpp
 /write_dump.h
 /xdr_compat.cpp
 /xdr_compat.h
 /zstd_file_writer.cpp
@ -1431,6 +1421,10 @@
 /fix_srp.h
 /fix_tfmc.cpp
 /fix_tfmc.h
 /fix_ttm.cpp
 /fix_ttm.h
 /fix_ttm_grid.cpp
 /fix_ttm_grid.h
 /fix_ttm_mod.cpp
 /fix_ttm_mod.h
 /pair_born_coul_long_cs.cpp
--- a/src/BOCS/fix_bocs.cpp
+++ b/src/BOCS/fix_bocs.cpp
@ -233,9 +233,7 @@ FixBocs::FixBocs(LAMMPS *lmp, int narg, char **arg) :
      iarg += 2;
    } else if (strcmp(arg[iarg],"mtk") == 0) {
      if (iarg+2 > narg) error->all(FLERR,"Illegal fix bocs command");
-      if (strcmp(arg[iarg+1],"yes") == 0) mtk_flag = 1;
+      mtk_flag = utils::logical(FLERR,arg[iarg+1],false,lmp);
      else if (strcmp(arg[iarg+1],"no") == 0) mtk_flag = 0;
      else error->all(FLERR,"Illegal fix bocs command");
      iarg += 2;
    } else if (strcmp(arg[iarg],"tloop") == 0) {
      if (iarg+2 > narg) error->all(FLERR,"Illegal fix bocs command");
--- a/src/COLVARS/fix_colvars.cpp
+++ b/src/COLVARS/fix_colvars.cpp
@ -303,7 +303,7 @@ FixColvars::FixColvars(LAMMPS *lmp, int narg, char **arg) :
  me = comm->me;
  root2root = MPI_COMM_NULL;
-  conf_file = strdup(arg[3]);
+  conf_file = utils::strdup(arg[3]);
  rng_seed = 1966;
  unwrap_flag = 1;
@ -312,35 +312,29 @@ FixColvars::FixColvars(LAMMPS *lmp, int narg, char **arg) :
  tmp_name = nullptr;
  /* parse optional arguments */
-  int argsdone = 4;
+  int iarg = 4;
-  while (argsdone < narg) {
+  while (iarg < narg) {
    // we have keyword/value pairs. check if value is missing
-    if (argsdone+1 == narg)
+    if (iarg+1 == narg)
      error->all(FLERR,"Missing argument to keyword");
-    if (0 == strcmp(arg[argsdone], "input")) {
+    if (0 == strcmp(arg[iarg], "input")) {
-      inp_name = strdup(arg[argsdone+1]);
+      inp_name = utils::strdup(arg[iarg+1]);
-    } else if (0 == strcmp(arg[argsdone], "output")) {
+    } else if (0 == strcmp(arg[iarg], "output")) {
-      out_name = strdup(arg[argsdone+1]);
+      out_name = utils::strdup(arg[iarg+1]);
-    } else if (0 == strcmp(arg[argsdone], "seed")) {
+    } else if (0 == strcmp(arg[iarg], "seed")) {
-      rng_seed = utils::inumeric(FLERR,arg[argsdone+1],false,lmp);
+      rng_seed = utils::inumeric(FLERR,arg[iarg+1],false,lmp);
-    } else if (0 == strcmp(arg[argsdone], "unwrap")) {
+    } else if (0 == strcmp(arg[iarg], "unwrap")) {
-      if (0 == strcmp(arg[argsdone+1], "yes")) {
+      unwrap_flag = utils::logical(FLERR,arg[iarg+1],false,lmp);
-        unwrap_flag = 1;
+    } else if (0 == strcmp(arg[iarg], "tstat")) {
-      } else if (0 == strcmp(arg[argsdone+1], "no")) {
+      tmp_name = utils::strdup(arg[iarg+1]);
        unwrap_flag = 0;
      } else {
        error->all(FLERR,"Incorrect fix colvars unwrap flag");
      }
    } else if (0 == strcmp(arg[argsdone], "tstat")) {
      tmp_name = strdup(arg[argsdone+1]);
    } else {
      error->all(FLERR,"Unknown fix colvars parameter");
    }
-    ++argsdone; ++argsdone;
+    ++iarg; ++iarg;
  }
-  if (!out_name) out_name = strdup("out");
+  if (!out_name) out_name = utils::strdup("out");
  /* initialize various state variables. */
  tstat_id = -1;
@ -365,10 +359,10 @@ FixColvars::FixColvars(LAMMPS *lmp, int narg, char **arg) :
 FixColvars::~FixColvars()
 {
-  memory->sfree(conf_file);
+  delete[] conf_file;
-  memory->sfree(inp_name);
+  delete[] inp_name;
-  memory->sfree(out_name);
+  delete[] out_name;
-  memory->sfree(tmp_name);
+  delete[] tmp_name;
  memory->sfree(comm_buf);
  if (proxy) {
@ -436,17 +430,15 @@ void FixColvars::one_time_init()
  // create and initialize the colvars proxy
  if (me == 0) {
-    if (screen) fputs("colvars: Creating proxy instance\n",screen);
+    utils::logmesg(lmp,"colvars: Creating proxy instance\n");
    if (logfile) fputs("colvars: Creating proxy instance\n",logfile);
 #ifdef LAMMPS_BIGBIG
-    if (screen) fputs("colvars: cannot handle atom ids > 2147483647\n",screen);
+    utils::logmesg(lmp,"colvars: cannot handle atom ids > 2147483647\n");
    if (logfile) fputs("colvars: cannot handle atom ids > 2147483647\n",logfile);
 #endif
    if (inp_name) {
      if (strcmp(inp_name,"NULL") == 0) {
-        memory->sfree(inp_name);
+        delete[] inp_name;
        inp_name = nullptr;
      }
    }
@ -464,8 +456,7 @@ void FixColvars::one_time_init()
      }
    }
-    proxy = new colvarproxy_lammps(lmp,inp_name,out_name,
+    proxy = new colvarproxy_lammps(lmp,inp_name,out_name,rng_seed,t_target,root2root);
                                   rng_seed,t_target,root2root);
    proxy->init(conf_file);
    num_coords = (proxy->modify_atom_positions()->size());
--- a/src/COMPRESS/dump_atom_gz.cpp
+++ b/src/COMPRESS/dump_atom_gz.cpp
@ -33,7 +33,7 @@ DumpAtomGZ::~DumpAtomGZ() {}
 /* ----------------------------------------------------------------------
   generic opening of a dump file
-   ASCII or binary or gzipped
+   ASCII or binary or compressed
   some derived classes override this function
 ------------------------------------------------------------------------- */
--- a/src/COMPRESS/dump_atom_zstd.cpp
+++ b/src/COMPRESS/dump_atom_zstd.cpp
@ -188,17 +188,11 @@ int DumpAtomZstd::modify_param(int narg, char **arg)
    try {
      if (strcmp(arg[0], "checksum") == 0) {
        if (narg < 2) error->all(FLERR, "Illegal dump_modify command");
-        if (strcmp(arg[1], "yes") == 0)
+        writer.setChecksum(utils::logical(FLERR, arg[1], false, lmp) == 1);
          writer.setChecksum(true);
        else if (strcmp(arg[1], "no") == 0)
          writer.setChecksum(false);
        else
          error->all(FLERR, "Illegal dump_modify command");
        return 2;
      } else if (strcmp(arg[0], "compression_level") == 0) {
        if (narg < 2) error->all(FLERR, "Illegal dump_modify command");
-        int compression_level = utils::inumeric(FLERR, arg[1], false, lmp);
+        writer.setCompressionLevel(utils::inumeric(FLERR, arg[1], false, lmp));
        writer.setCompressionLevel(compression_level);
        return 2;
      }
    } catch (FileWriterException &e) {
--- a/src/COMPRESS/dump_cfg_gz.cpp
+++ b/src/COMPRESS/dump_cfg_gz.cpp
@ -35,7 +35,7 @@ DumpCFGGZ::~DumpCFGGZ() {}
 /* ----------------------------------------------------------------------
   generic opening of a dump file
-   ASCII or binary or gzipped
+   ASCII or binary or compressed
   some derived classes override this function
 ------------------------------------------------------------------------- */
--- a/src/COMPRESS/dump_cfg_zstd.cpp
+++ b/src/COMPRESS/dump_cfg_zstd.cpp
@ -233,17 +233,11 @@ int DumpCFGZstd::modify_param(int narg, char **arg)
    try {
      if (strcmp(arg[0], "checksum") == 0) {
        if (narg < 2) error->all(FLERR, "Illegal dump_modify command");
-        if (strcmp(arg[1], "yes") == 0)
+        writer.setChecksum(utils::logical(FLERR, arg[1], false, lmp) == 1);
          writer.setChecksum(true);
        else if (strcmp(arg[1], "no") == 0)
          writer.setChecksum(false);
        else
          error->all(FLERR, "Illegal dump_modify command");
        return 2;
      } else if (strcmp(arg[0], "compression_level") == 0) {
        if (narg < 2) error->all(FLERR, "Illegal dump_modify command");
-        int compression_level = utils::inumeric(FLERR, arg[1], false, lmp);
+        writer.setCompressionLevel(utils::inumeric(FLERR, arg[1], false, lmp));
        writer.setCompressionLevel(compression_level);
        return 2;
      }
    } catch (FileWriterException &e) {
--- a/src/COMPRESS/dump_custom_gz.cpp
+++ b/src/COMPRESS/dump_custom_gz.cpp
@ -33,7 +33,7 @@ DumpCustomGZ::~DumpCustomGZ() {}
 /* ----------------------------------------------------------------------
   generic opening of a dump file
-   ASCII or binary or gzipped
+   ASCII or binary or compressed
   some derived classes override this function
 ------------------------------------------------------------------------- */
--- a/src/COMPRESS/dump_custom_zstd.cpp
+++ b/src/COMPRESS/dump_custom_zstd.cpp
@ -45,7 +45,7 @@ DumpCustomZstd::~DumpCustomZstd()
 /* ----------------------------------------------------------------------
   generic opening of a dump file
-   ASCII or binary or gzipped
+   ASCII or binary or compressed
   some derived classes override this function
 ------------------------------------------------------------------------- */
@ -203,16 +203,13 @@ int DumpCustomZstd::modify_param(int narg, char **arg)
  int consumed = DumpCustom::modify_param(narg, arg);
  if (consumed == 0) {
    try {
-      if (strcmp(arg[0],"checksum") == 0) {
+      if (strcmp(arg[0], "checksum") == 0) {
-        if (narg < 2) error->all(FLERR,"Illegal dump_modify command");
+        if (narg < 2) error->all(FLERR, "Illegal dump_modify command");
-        if (strcmp(arg[1],"yes") == 0) writer.setChecksum(true);
+        writer.setChecksum(utils::logical(FLERR, arg[1], false, lmp) == 1);
        else if (strcmp(arg[1],"no") == 0) writer.setChecksum(false);
        else error->all(FLERR,"Illegal dump_modify command");
        return 2;
-      } else if (strcmp(arg[0],"compression_level") == 0) {
+      } else if (strcmp(arg[0], "compression_level") == 0) {
-        if (narg < 2) error->all(FLERR,"Illegal dump_modify command");
+        if (narg < 2) error->all(FLERR, "Illegal dump_modify command");
-        int compression_level = utils::inumeric(FLERR, arg[1], false, lmp);
+        writer.setCompressionLevel(utils::inumeric(FLERR, arg[1], false, lmp));
        writer.setCompressionLevel(compression_level);
        return 2;
      }
    } catch (FileWriterException &e) {
--- a/src/COMPRESS/dump_local_gz.cpp
+++ b/src/COMPRESS/dump_local_gz.cpp
@ -33,7 +33,7 @@ DumpLocalGZ::~DumpLocalGZ() {}
 /* ----------------------------------------------------------------------
   generic opening of a dump file
-   ASCII or binary or gzipped
+   ASCII or binary or compressed
   some derived classes override this function
 ------------------------------------------------------------------------- */
--- a/src/COMPRESS/dump_local_zstd.cpp
+++ b/src/COMPRESS/dump_local_zstd.cpp
@ -39,7 +39,7 @@ DumpLocalZstd::~DumpLocalZstd() {}
 /* ----------------------------------------------------------------------
   generic opening of a dump file
-   ASCII or binary or gzipped
+   ASCII or binary or compressed
   some derived classes override this function
 ------------------------------------------------------------------------- */
@ -190,17 +190,11 @@ int DumpLocalZstd::modify_param(int narg, char **arg)
    try {
      if (strcmp(arg[0], "checksum") == 0) {
        if (narg < 2) error->all(FLERR, "Illegal dump_modify command");
-        if (strcmp(arg[1], "yes") == 0)
+        writer.setChecksum(utils::logical(FLERR, arg[1], false, lmp) == 1);
          writer.setChecksum(true);
        else if (strcmp(arg[1], "no") == 0)
          writer.setChecksum(false);
        else
          error->all(FLERR, "Illegal dump_modify command");
        return 2;
      } else if (strcmp(arg[0], "compression_level") == 0) {
        if (narg < 2) error->all(FLERR, "Illegal dump_modify command");
-        int compression_level = utils::inumeric(FLERR, arg[1], false, lmp);
+        writer.setCompressionLevel(utils::inumeric(FLERR, arg[1], false, lmp));
        writer.setCompressionLevel(compression_level);
        return 2;
      }
    } catch (FileWriterException &e) {
--- a/src/COMPRESS/dump_xyz_gz.cpp
+++ b/src/COMPRESS/dump_xyz_gz.cpp
@ -32,7 +32,7 @@ DumpXYZGZ::~DumpXYZGZ() {}
 /* ----------------------------------------------------------------------
   generic opening of a dump file
-   ASCII or binary or gzipped
+   ASCII or binary or compressed
   some derived classes override this function
 ------------------------------------------------------------------------- */
--- a/src/COMPRESS/dump_xyz_zstd.cpp
+++ b/src/COMPRESS/dump_xyz_zstd.cpp
@ -38,7 +38,7 @@ DumpXYZZstd::~DumpXYZZstd() {}
 /* ----------------------------------------------------------------------
   generic opening of a dump file
-   ASCII or binary or gzipped
+   ASCII or binary or compressed
   some derived classes override this function
 ------------------------------------------------------------------------- */
@ -156,17 +156,11 @@ int DumpXYZZstd::modify_param(int narg, char **arg)
    try {
      if (strcmp(arg[0], "checksum") == 0) {
        if (narg < 2) error->all(FLERR, "Illegal dump_modify command");
-        if (strcmp(arg[1], "yes") == 0)
+        writer.setChecksum(utils::logical(FLERR, arg[1], false, lmp) == 1);
          writer.setChecksum(true);
        else if (strcmp(arg[1], "no") == 0)
          writer.setChecksum(false);
        else
          error->all(FLERR, "Illegal dump_modify command");
        return 2;
      } else if (strcmp(arg[0], "compression_level") == 0) {
        if (narg < 2) error->all(FLERR, "Illegal dump_modify command");
-        int compression_level = utils::inumeric(FLERR, arg[1], false, lmp);
+        writer.setCompressionLevel(utils::inumeric(FLERR, arg[1], false, lmp));
        writer.setCompressionLevel(compression_level);
        return 2;
      }
    } catch (FileWriterException &e) {
--- a/src/DIELECTRIC/fix_polarize_bem_gmres.cpp
+++ b/src/DIELECTRIC/fix_polarize_bem_gmres.cpp
@ -796,12 +796,7 @@ int FixPolarizeBEMGMRES::modify_param(int narg, char **arg)
      iarg += 2;
    } else if (strcmp(arg[iarg], "kspace") == 0) {
      if (iarg + 2 > narg) error->all(FLERR, "Illegal fix_modify command");
-      if (strcmp(arg[iarg + 1], "yes") == 0)
+      kspaceflag = utils::logical(FLERR, arg[iarg + 1], false, lmp);
        kspaceflag = 1;
      else if (strcmp(arg[iarg + 1], "no") == 0)
        kspaceflag = 0;
      else
        error->all(FLERR, "Illegal fix_modify command for fix polarize");
      iarg += 2;
    } else if (strcmp(arg[iarg], "dielectrics") == 0) {
      if (iarg + 6 > narg) error->all(FLERR, "Illegal fix_modify command");
--- a/src/DIELECTRIC/fix_polarize_bem_icc.cpp
+++ b/src/DIELECTRIC/fix_polarize_bem_icc.cpp
@ -355,12 +355,7 @@ int FixPolarizeBEMICC::modify_param(int narg, char **arg)
      iarg += 2;
    } else if (strcmp(arg[iarg], "kspace") == 0) {
      if (iarg + 2 > narg) error->all(FLERR, "Illegal fix_modify command");
-      if (strcmp(arg[iarg + 1], "yes") == 0)
+      kspaceflag = utils::logical(FLERR, arg[iarg + 1], false, lmp);
        kspaceflag = 1;
      else if (strcmp(arg[iarg + 1], "no") == 0)
        kspaceflag = 0;
      else
        error->all(FLERR, "Illegal fix_modify command for fix polarize");
      iarg += 2;
    } else if (strcmp(arg[iarg], "dielectrics") == 0) {
      if (iarg + 6 > narg) error->all(FLERR, "Illegal fix_modify command");
--- a/src/DIELECTRIC/fix_polarize_functional.cpp
+++ b/src/DIELECTRIC/fix_polarize_functional.cpp
@ -478,12 +478,7 @@ int FixPolarizeFunctional::modify_param(int narg, char **arg)
  while (iarg < narg) {
    if (strcmp(arg[iarg], "kspace") == 0) {
      if (iarg + 2 > narg) error->all(FLERR, "Illegal fix_modify command");
-      if (strcmp(arg[iarg + 1], "yes") == 0)
+      kspaceflag = utils::logical(FLERR, arg[iarg + 1], false, lmp);
        kspaceflag = 1;
      else if (strcmp(arg[iarg + 1], "no") == 0)
        kspaceflag = 0;
      else
        error->all(FLERR, "Illegal fix_modify command for fix polarize/functional");
      iarg += 2;
    } else if (strcmp(arg[iarg], "dielectrics") == 0) {
      if (iarg + 6 > narg) error->all(FLERR, "Illegal fix_modify command");
--- a/src/DIFFRACTION/compute_saed.cpp
+++ b/src/DIFFRACTION/compute_saed.cpp
@ -31,7 +31,6 @@
 #include <cmath>
 #include <cstring>
 #include <strings.h>    // for strcasecmp()
 #include "omp_compat.h"
 using namespace LAMMPS_NS;
@ -85,13 +84,13 @@ ComputeSAED::ComputeSAED(LAMMPS *lmp, int narg, char **arg) :
    ztype[i] = SAEDmaxType + 1;
  }
  for (int i=0; i<ntypes; i++) {
-       for (int j = 0; j < SAEDmaxType; j++) {
+     for (int j = 0; j < SAEDmaxType; j++) {
-         if (strcasecmp(arg[iarg],SAEDtypeList[j]) == 0) {
+       if (utils::lowercase(arg[iarg]) == utils::lowercase(SAEDtypeList[j])) {
         ztype[i] = j;
         }
       }
-       if (ztype[i] == SAEDmaxType + 1)
+     }
-          error->all(FLERR,"Compute SAED: Invalid ASF atom type");
+     if (ztype[i] == SAEDmaxType + 1)
       error->all(FLERR,"Compute SAED: Invalid ASF atom type");
    iarg++;
  }
@ -348,7 +347,7 @@ void ComputeSAED::compute_vector()
  if (me == 0 && echo)
    utils::logmesg(lmp,"-----\nComputing SAED intensities");
-  double t0 = MPI_Wtime();
+  double t0 = platform::walltime();
  double *Fvec = new double[2*nRows]; // Strct factor (real & imaginary)
  // -- Note, vector entries correspond to different RELP
@ -491,7 +490,7 @@ void ComputeSAED::compute_vector()
    vector[i] = (scratch[2*i] * scratch[2*i] + scratch[2*i+1] * scratch[2*i+1]) / natoms;
  }
-  double t2 = MPI_Wtime();
+  double t2 = platform::walltime();
  // compute memory usage per processor
  double bytes = memory_usage();
--- a/src/DIFFRACTION/compute_xrd.cpp
+++ b/src/DIFFRACTION/compute_xrd.cpp
@ -32,7 +32,6 @@
 #include <cmath>
 #include <cstring>
 #include <strings.h>    // for strcasecmp()
 #include "omp_compat.h"
 using namespace LAMMPS_NS;
@ -87,7 +86,7 @@ ComputeXRD::ComputeXRD(LAMMPS *lmp, int narg, char **arg) :
  }
  for (int i = 0; i < ntypes; i++) {
    for (int j = 0; j < XRDmaxType; j++) {
-      if (strcasecmp(arg[iarg],XRDtypeList[j]) == 0) {
+      if (utils::lowercase(arg[iarg]) == utils::lowercase(XRDtypeList[j])) {
        ztype[i] = j;
       }
     }
@ -300,7 +299,7 @@ void ComputeXRD::compute_array()
  if (me == 0 && echo) utils::logmesg(lmp, "-----\nComputing XRD intensities");
-  double t0 = MPI_Wtime();
+  double t0 = platform::walltime();
  double *Fvec = new double[2*size_array_rows]; // Strct factor (real & imaginary)
  // -- Note: array rows correspond to different RELP
@ -496,7 +495,7 @@ void ComputeXRD::compute_array()
    array[i][1] = (scratch[2*i] * scratch[2*i] + scratch[2*i+1] * scratch[2*i+1]) / natoms;
  }
-  double t2 = MPI_Wtime();
+  double t2 = platform::walltime();
  // compute memory usage per processor
  double bytes = memory_usage();
--- a/src/DIFFRACTION/fix_saed_vtk.cpp
+++ b/src/DIFFRACTION/fix_saed_vtk.cpp
@ -31,6 +31,7 @@
 #include <cstring>
 #include <cmath>
 using namespace LAMMPS_NS;
 using namespace FixConst;
@ -100,8 +101,6 @@ FixSAEDVTK::FixSAEDVTK(LAMMPS *lmp, int narg, char **arg) :
    error->all(FLERR,"Illegal fix saed/vtk command");
  if (nfreq % nevery || nrepeat*nevery > nfreq)
    error->all(FLERR,"Illegal fix saed/vtk command");
  if (ave != RUNNING && overwrite)
    error->all(FLERR,"Illegal fix saed/vtk command");
  // allocate memory for averaging
@ -315,7 +314,7 @@ void FixSAEDVTK::invoke_vector(bigint ntimestep)
  if (irepeat == 0)
    for (int i = 0; i < nrows; i++)
-       vector[i] = 0.0;
+      vector[i] = 0.0;
  // accumulate results of computes,fixes,variables to local copy
  // compute/fix/variable may invoke computes so wrap with clear/add
@ -369,7 +368,7 @@ void FixSAEDVTK::invoke_vector(bigint ntimestep)
    for (int i = 0; i < nrows; i++) {
      vector_total[i] += vector[i];
      if (window_limit) vector_total[i] -= vector_list[iwindow][i];
-        vector_list[iwindow][i] = vector[i];
+      vector_list[iwindow][i] = vector[i];
    }
    iwindow++;
@ -391,8 +390,7 @@ void FixSAEDVTK::invoke_vector(bigint ntimestep)
      fp = fopen(nName.c_str(),"w");
      if (fp == nullptr)
-        error->one(FLERR,"Cannot open fix saed/vtk file {}: {}",
+        error->one(FLERR,"Cannot open fix saed/vtk file {}: {}", nName,utils::getsyserror());
                                     nName,utils::getsyserror());
    }
    fprintf(fp,"# vtk DataFile Version 3.0 c_%s\n",ids);
@ -406,71 +404,68 @@ void FixSAEDVTK::invoke_vector(bigint ntimestep)
    fprintf(fp,"SCALARS intensity float\n");
    fprintf(fp,"LOOKUP_TABLE default\n");
    filepos = ftell(fp);
-    if (overwrite) fseek(fp,filepos,SEEK_SET);
+    // Finding the intersection of the reciprical space and Ewald sphere
    int NROW1 = 0;
    int NROW2 = 0;
    double dinv2 = 0.0;
    double r = 0.0;
    double K[3];
-     // Finding the intersection of the reciprical space and Ewald sphere
+    // Zone flag to capture entire recrocal space volume
-      int NROW1 = 0;
+    if ((Zone[0] == 0) && (Zone[1] == 0) && (Zone[2] == 0)) {
-      int NROW2 = 0;
+      for (int k = Knmin[2]; k <= Knmax[2]; k++) {
-      double dinv2 = 0.0;
+        for (int j = Knmin[1]; j <= Knmax[1]; j++) {
-      double r = 0.0;
+          for (int i = Knmin[0]; i <= Knmax[0]; i++) {
-      double K[3];
+            K[0] = i * dK[0];
-
+            K[1] = j * dK[1];
-      // Zone flag to capture entire recrocal space volume
+            K[2] = k * dK[2];
-      if ((Zone[0] == 0) && (Zone[1] == 0) && (Zone[2] == 0)) {
+            dinv2 = (K[0] * K[0] + K[1] * K[1] + K[2] * K[2]);
-        for (int k = Knmin[2]; k <= Knmax[2]; k++) {
+            if (dinv2 < Kmax * Kmax) {
-          for (int j = Knmin[1]; j <= Knmax[1]; j++) {
+              fprintf(fp,"%g\n",vector_total[NROW1]/norm);
-            for (int i = Knmin[0]; i <= Knmax[0]; i++) {
+              fflush(fp);
-              K[0] = i * dK[0];
+              NROW1++;
-              K[1] = j * dK[1];
+              NROW2++;
-              K[2] = k * dK[2];
+            } else {
              dinv2 = (K[0] * K[0] + K[1] * K[1] + K[2] * K[2]);
              if (dinv2 < Kmax * Kmax) {
                 fprintf(fp,"%g\n",vector_total[NROW1]/norm);
                 fflush(fp);
                 NROW1++;
                 NROW2++;
              } else {
              fprintf(fp,"%d\n",-1);
              fflush(fp);
              NROW2++;
              }
            }
          }
        }
-      } else {
+      }
-        for (int k = Knmin[2]; k <= Knmax[2]; k++) {
+    } else {
-          for (int j = Knmin[1]; j <= Knmax[1]; j++) {
+      for (int k = Knmin[2]; k <= Knmax[2]; k++) {
-            for (int i = Knmin[0]; i <= Knmax[0]; i++) {
+        for (int j = Knmin[1]; j <= Knmax[1]; j++) {
-              K[0] = i * dK[0];
+          for (int i = Knmin[0]; i <= Knmax[0]; i++) {
-              K[1] = j * dK[1];
+            K[0] = i * dK[0];
-              K[2] = k * dK[2];
+            K[1] = j * dK[1];
-              dinv2 = (K[0] * K[0] + K[1] * K[1] + K[2] * K[2]);
+            K[2] = k * dK[2];
-              if (dinv2 < Kmax * Kmax) {
+            dinv2 = (K[0] * K[0] + K[1] * K[1] + K[2] * K[2]);
-                r=0.0;
+            if (dinv2 < Kmax * Kmax) {
-                for (int m=0; m<3; m++) r += pow(K[m] - Zone[m],2.0);
+              r=0.0;
-                r = sqrt(r);
+              for (int m=0; m<3; m++) r += pow(K[m] - Zone[m],2.0);
-                if  ( (r >  (R_Ewald - dR_Ewald) ) && (r < (R_Ewald + dR_Ewald) )) {
+              r = sqrt(r);
-                 fprintf(fp,"%g\n",vector_total[NROW1]/norm);
+              if  ( (r >  (R_Ewald - dR_Ewald) ) && (r < (R_Ewald + dR_Ewald) )) {
-                 fflush(fp);
+                fprintf(fp,"%g\n",vector_total[NROW1]/norm);
-                 NROW2++;
+                fflush(fp);
-                 NROW1++;
+                NROW2++;
-                } else {
+                NROW1++;
                  fprintf(fp,"%d\n",-1);
                  fflush(fp);
                  NROW2++;
                }
              } else {
                fprintf(fp,"%d\n",-1);
                fflush(fp);
                NROW2++;
              }
            } else {
              fprintf(fp,"%d\n",-1);
              fflush(fp);
              NROW2++;
             }
            }
          }
        }
      }
    }
  }
  nOutput++;
 }
@ -497,7 +492,6 @@ void FixSAEDVTK::options(int narg, char **arg)
  fp = nullptr;
  ave = ONE;
  startstep = 0;
  overwrite = 0;
  // optional args
  int iarg = 7;
@ -534,9 +528,6 @@ void FixSAEDVTK::options(int narg, char **arg)
      if (iarg+2 > narg) error->all(FLERR,"Illegal fix saed/vtk command");
      startstep = utils::inumeric(FLERR,arg[iarg+1],false,lmp);
      iarg += 2;
    } else if (strcmp(arg[iarg],"overwrite") == 0) {
      overwrite = 1;
      iarg += 1;
    } else error->all(FLERR,"Illegal fix saed/vtk command");
  }
 }
--- a/src/DIFFRACTION/fix_saed_vtk.h
+++ b/src/DIFFRACTION/fix_saed_vtk.h
@ -43,8 +43,6 @@ class FixSAEDVTK : public Fix {
  int nrows;
  int ave, nwindow, nsum, startstep;
  int overwrite;
  long filepos;
  int norm, iwindow, window_limit;
  double *vector;
--- a/src/DPD-MESO/pair_mdpd.cpp
+++ b/src/DPD-MESO/pair_mdpd.cpp
@ -19,19 +19,19 @@
 #include "pair_mdpd.h"
 #include "atom.h"
 #include "citeme.h"
 #include "comm.h"
 #include "error.h"
 #include "force.h"
 #include "memory.h"
 #include "neigh_list.h"
 #include "neighbor.h"
 #include "random_mars.h"
 #include "update.h"
 #include <cmath>
 #include <ctime>
 #include "atom.h"
 #include "comm.h"
 #include "update.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "random_mars.h"
 #include "citeme.h"
 #include "memory.h"
 #include "error.h"
 using namespace LAMMPS_NS;
@ -217,12 +217,13 @@ void PairMDPD::settings(int narg, char **arg)
  seed = utils::inumeric(FLERR,arg[2],false,lmp);
  // initialize Marsaglia RNG with processor-unique seed
  // create a positive seed based on the system clock, if requested.
  if (seed <= 0) {
-    struct timespec time;
+    constexpr double LARGE_NUM = 2<<30;
-    clock_gettime( CLOCK_REALTIME, &time );
+    seed = int(fmod(platform::walltime() * LARGE_NUM, LARGE_NUM)) + 1;
    seed = time.tv_nsec;  // if seed is non-positive, get the current time as the seed
  }
  delete random;
  random = new RanMars(lmp,(seed + comm->me) % 900000000);
--- a/src/DPD-MESO/pair_tdpd.cpp
+++ b/src/DPD-MESO/pair_tdpd.cpp
@ -18,19 +18,19 @@
 ------------------------------------------------------------------------- */
 #include "pair_tdpd.h"
 #include <cmath>
 #include <ctime>
 #include "atom.h"
 #include "comm.h"
 #include "update.h"
 #include "force.h"
 #include "neighbor.h"
 #include "neigh_list.h"
 #include "random_mars.h"
 #include "citeme.h"
 #include "memory.h"
 #include "error.h"
 #include "atom.h"
 #include "citeme.h"
 #include "comm.h"
 #include "error.h"
 #include "force.h"
 #include "memory.h"
 #include "neigh_list.h"
 #include "neighbor.h"
 #include "random_mars.h"
 #include "update.h"
 #include <cmath>
 using namespace LAMMPS_NS;
@ -239,12 +239,13 @@ void PairTDPD::settings(int narg, char **arg)
  seed = utils::inumeric(FLERR,arg[2],false,lmp);
  // initialize Marsaglia RNG with processor-unique seed
  // create a positive seed based on the system clock, if requested.
  if (seed <= 0) {
-    struct timespec time;
+    constexpr double LARGE_NUM = 2<<30;
-    clock_gettime( CLOCK_REALTIME, &time );
+    seed = int(fmod(platform::walltime() * LARGE_NUM, LARGE_NUM)) + 1;
    seed = time.tv_nsec;  // if seed is non-positive, get the current time as the seed
  }
  delete random;
  random = new RanMars(lmp,(seed + comm->me) % 900000000);
--- a/src/DPD-REACT/fix_rx.cpp
+++ b/src/DPD-REACT/fix_rx.cpp
@ -58,7 +58,7 @@ namespace /* anonymous */
 {
 typedef double TimerType;
-TimerType getTimeStamp() { return MPI_Wtime(); }
+TimerType getTimeStamp() { return platform::walltime(); }
 double getElapsedTime( const TimerType &t0, const TimerType &t1) { return t1-t0; }
 } // end namespace
@ -126,7 +126,7 @@ FixRX::FixRX(LAMMPS *lmp, int narg, char **arg) :
      error->all(FLERR, errmsg);
    }
-    if (comm->me == 0 and Verbosity > 1) {
+    if (comm->me == 0 && Verbosity > 1) {
      std::string msg = "FixRX: matrix format is ";
      if (useSparseKinetics)
         msg += std::string("sparse");
@ -172,7 +172,7 @@ FixRX::FixRX(LAMMPS *lmp, int narg, char **arg) :
    char *word = arg[iarg++];
    minSteps = atoi( word );
-    if (comm->me == 0 and Verbosity > 1) {
+    if (comm->me == 0 && Verbosity > 1) {
      char msg[128];
      sprintf(msg, "FixRX: RK4 numSteps= %d", minSteps);
      error->message(FLERR, msg);
@ -197,7 +197,7 @@ FixRX::FixRX(LAMMPS *lmp, int narg, char **arg) :
    // maxIters must be at least minSteps.
    maxIters = std::max( minSteps, maxIters );
-    if (comm->me == 0 and Verbosity > 1) {
+    if (comm->me == 0 && Verbosity > 1) {
      //printf("FixRX: RKF45 minSteps= %d maxIters= %d absTol= %e relTol= %e\n", minSteps, maxIters, absTol, relTol);
      char msg[128];
      sprintf(msg, "FixRX: RKF45 minSteps= %d maxIters= %d relTol= %.1e absTol= %.1e diagnosticFrequency= %d", minSteps, maxIters, relTol, absTol, diagnosticFrequency);
@ -371,7 +371,7 @@ void FixRX::initSparse()
 {
  const int Verbosity = 1;
-  if (comm->me == 0 and Verbosity > 1) {
+  if (comm->me == 0 && Verbosity > 1) {
    for (int k = 0; k < nspecies; ++k)
      printf("atom->dvname[%d]= %s\n", k, atom->dvname[k]);
@ -421,7 +421,7 @@ void FixRX::initSparse()
    std::string pstr, rstr;
    bool allAreIntegral = true;
    for (int k = 0; k < nspecies; ++k) {
-      if (stoichReactants[i][k] == 0 and stoichProducts[i][k] == 0)
+      if (stoichReactants[i][k] == 0 && stoichProducts[i][k] == 0)
        nzeros++;
      if (stoichReactants[i][k] > 0.0) {
@ -448,7 +448,7 @@ void FixRX::initSparse()
        pstr += atom->dvname[k];
      }
    }
-    if (comm->me == 0 and Verbosity > 1)
+    if (comm->me == 0 && Verbosity > 1)
      printf("rx%3d: %d %d %d ... %s %s %s\n", i, nreac_i, nprod_i, allAreIntegral, rstr.c_str(), /*reversible[i]*/ (false) ? "<=>" : "=", pstr.c_str());
    mxreac = std::max( mxreac, nreac_i );
@ -457,7 +457,7 @@ void FixRX::initSparse()
    if (allAreIntegral) nIntegral++;
  }
-  if (comm->me == 0 and Verbosity > 1) {
+  if (comm->me == 0 && Verbosity > 1) {
    char msg[256];
    sprintf(msg, "FixRX: Sparsity of Stoichiometric Matrix= %.1f%% non-zeros= %d nspecies= %d nreactions= %d maxReactants= %d maxProducts= %d maxSpecies= %d integralReactions= %d", 100*(double(nzeros) / (nspecies * nreactions)), nzeros, nspecies, nreactions, mxreac, mxprod, (mxreac + mxprod), SparseKinetics_enableIntegralReactions);
    error->message(FLERR, msg);
@ -539,7 +539,7 @@ void FixRX::initSparse()
       sparseKinetics_isIntegralReaction[i] = isIntegral_i;
  }
-  if (comm->me == 0 and Verbosity > 1) {
+  if (comm->me == 0 && Verbosity > 1) {
    for (int i = 1; i < nu_bin.size(); ++i)
      if (nu_bin[i] > 0)
        printf("nu_bin[%d] = %d\n", i, nu_bin[i]);
@ -554,7 +554,7 @@ void FixRX::initSparse()
            rstr += " + ";
          char digit[6];
-          if (SparseKinetics_enableIntegralReactions and sparseKinetics_isIntegralReaction[i])
+          if (SparseKinetics_enableIntegralReactions && sparseKinetics_isIntegralReaction[i])
            sprintf(digit,"%d ", sparseKinetics_inu[i][kk]);
          else
            sprintf(digit,"%4.1f ", sparseKinetics_nu[i][kk]);
@ -570,7 +570,7 @@ void FixRX::initSparse()
            pstr += " + ";
          char digit[6];
-          if (SparseKinetics_enableIntegralReactions and sparseKinetics_isIntegralReaction[i])
+          if (SparseKinetics_enableIntegralReactions && sparseKinetics_isIntegralReaction[i])
            sprintf(digit,"%d ", sparseKinetics_inu[i][kk]);
          else
            sprintf(digit,"%4.1f ", sparseKinetics_nu[i][kk]);
@ -578,7 +578,7 @@ void FixRX::initSparse()
          pstr += atom->dvname[k];
        }
      }
-      if (comm->me == 0 and Verbosity > 1)
+      if (comm->me == 0 && Verbosity > 1)
        printf("rx%3d: %s %s %s\n", i, rstr.c_str(), /*reversible[i]*/ (false) ? "<=>" : "=", pstr.c_str());
    }
    // end for nreactions
--- a/src/DPD-REACT/random_external_state.h
+++ b/src/DPD-REACT/random_external_state.h
@ -78,8 +78,8 @@
 namespace random_external_state {
 typedef uint64_t es_RNG_t;
-enum { MAX_URAND = 0xffffffffU };
+constexpr uint32_t MAX_URAND = 0xffffffffU;
-enum { MAX_URAND64 = 0xffffffffffffffffULL - 1 };
+constexpr uint64_t MAX_URAND64 = 0xffffffffffffffffULL - 1;
 LAMMPS_INLINE
 uint32_t es_urand(es_RNG_t &state_)
--- a/src/DRUDE/fix_drude_transform.cpp
+++ b/src/DRUDE/fix_drude_transform.cpp
@ -13,16 +13,18 @@
 ------------------------------------------------------------------------- */
 /** Fix Drude Transform ******************************************************/
 #include "fix_drude_transform.h"
 #include "atom.h"
 #include "comm.h"
 #include "domain.h"
 #include "error.h"
 #include "fix_drude.h"
 #include "modify.h"
 #include <cmath>
 #include <cstring>
 #include "fix_drude.h"
 #include "atom.h"
 #include "domain.h"
 #include "comm.h"
 #include "error.h"
 #include "modify.h"
 using namespace LAMMPS_NS;
 using namespace FixConst;
--- a/src/DRUDE/fix_drude_transform.h
+++ b/src/DRUDE/fix_drude_transform.h
@ -25,10 +25,10 @@ FixStyle(drude/transform/inverse,FixDrudeTransform<true>);
 namespace LAMMPS_NS {
-template <bool inverse> class FixDrudeTransform : public Fix {
+template <bool inverse> class FixDrudeTransform: public Fix {
 public:
-  FixDrudeTransform<inverse>(class LAMMPS *, int, char **);
+  FixDrudeTransform(class LAMMPS *, int, char **);
-  ~FixDrudeTransform<inverse>();
+  ~FixDrudeTransform();
  int setmask();
  void init();
  void setup(int vflag);
--- a/src/DRUDE/fix_langevin_drude.cpp
+++ b/src/DRUDE/fix_langevin_drude.cpp
@ -91,9 +91,7 @@ FixLangevinDrude::FixLangevinDrude(LAMMPS *lmp, int narg, char **arg) :
  while (iarg < narg) {
    if (strcmp(arg[iarg],"zero") == 0) {
      if (iarg+2 > narg) error->all(FLERR,"Illegal fix langevin/drude command");
-      if (strcmp(arg[iarg+1],"no") == 0) zero = 0;
+      zero = utils::logical(FLERR, arg[iarg + 1], false, lmp);
      else if (strcmp(arg[iarg+1],"yes") == 0) zero = 1;
      else error->all(FLERR,"Illegal fix langevin/drude command");
      iarg += 2;
    } else error->all(FLERR,"Illegal fix langevin/drude command");
  }
--- a/Show More
+++ b/Show More