Merge branch 'master' into multi-config-support

# Conflicts: # cmake/Modules/Packages/MSCG.cmake # examples/plugins/CMakeLists.txt
2021-10-11 17:03:41 -04:00
parent 342ca7ff1d a6cde11896
commit 510987dc80
412 changed files with 5927 additions and 3856 deletions
--- a/.github/workflows/compile-msvc.yml
+++ b/.github/workflows/compile-msvc.yml
@ -0,0 +1,33 @@
+# GitHub action to build LAMMPS on Windows with Visual C++
+name: "Native Windows Compilation"
+
+on:
+  push:
+    branches: [master]
+
+jobs:
+  build:
+    name: Windows Compilation Test
+    if: ${{ github.repository == 'lammps/lammps' }}
+    runs-on: windows-latest
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v2
+      with:
+        fetch-depth: 2
+
+    - name: Building LAMMPS via CMake
+      shell: bash
+      run: |
+        cmake -C cmake/presets/windows.cmake \
+              -S cmake -B build \
+              -D BUILD_SHARED_LIBS=on \
+              -D LAMMPS_EXCEPTIONS=on
+        cmake --build build --config Release
+
+    - name: Run LAMMPS executable
+      shell: bash
+      run: |
+        ./build/Release/lmp.exe -h
+        ./build/Release/lmp.exe -in bench/in.lj
--- a/.gitignore
+++ b/.gitignore
@ -37,8 +37,8 @@ vgcore.*
 .Trashes
 ehthumbs.db
 Thumbs.db
-.clang-format
 .lammps_history
+.vs

 #cmake
 /build*
@ -49,3 +49,8 @@ Thumbs.db
 /Testing
 /cmake_install.cmake
 /lmp
+out/Debug
+out/RelWithDebInfo
+out/Release
+out/x86
+out/x64
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@ -82,6 +82,16 @@ include(CheckIncludeFileCXX)

 # set required compiler flags and compiler/CPU arch specific optimizations
 if((CMAKE_CXX_COMPILER_ID STREQUAL "Intel") OR (CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM"))
+  if(CMAKE_SYSTEM_NAME STREQUAL "Windows")
+    if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
+      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qrestrict")
+    endif()
+    if(CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.3 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.4)
+      set(CMAKE_TUNE_DEFAULT "/QxCOMMON-AVX512")
+    else()
+      set(CMAKE_TUNE_DEFAULT "/QxHost")
+    endif()
+  else()
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -restrict")
    if(CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.3 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.4)
      set(CMAKE_TUNE_DEFAULT "-xCOMMON-AVX512")
@ -89,14 +99,22 @@ if((CMAKE_CXX_COMPILER_ID STREQUAL "Intel") OR (CMAKE_CXX_COMPILER_ID STREQUAL "
      set(CMAKE_TUNE_DEFAULT "-xHost")
    endif()
  endif()
+endif()

-# we require C++11 without extensions
+# we require C++11 without extensions. Kokkos requires at least C++14 (currently)
 set(CMAKE_CXX_STANDARD 11)
+if(PKG_KOKKOS AND (CMAKE_CXX_STANDARD LESS 14))
+  set(CMAKE_CXX_STANDARD 14)
+endif()
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_EXTENSIONS OFF CACHE BOOL "Use compiler extensions")
-# ugly hack for MSVC which by default always reports an old C++ standard in the __cplusplus macro
+# ugly hacks for MSVC which by default always reports an old C++ standard in the __cplusplus macro
+# and prints lots of pointless warnings about "unsafe" functions
 if(MSVC)
  add_compile_options(/Zc:__cplusplus)
+  add_compile_options(/wd4244)
+  add_compile_options(/wd4267)
+  add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
 endif()

 # export all symbols when building a .dll file on windows
@ -281,6 +299,11 @@ else()
  target_include_directories(mpi_stubs PUBLIC $<BUILD_INTERFACE:${LAMMPS_SOURCE_DIR}/STUBS>)
  if(BUILD_SHARED_LIBS)
    target_link_libraries(lammps PRIVATE mpi_stubs)
+    if(MSVC)
+      target_link_libraries(lmp PRIVATE mpi_stubs)
+      target_include_directories(lmp INTERFACE $<BUILD_INTERFACE:${LAMMPS_SOURCE_DIR}/STUBS>)
+      target_compile_definitions(lmp INTERFACE $<INSTALL_INTERFACE:LAMMPS_LIB_NO_MPI>)
+    endif(MSVC)
    target_include_directories(lammps INTERFACE $<BUILD_INTERFACE:${LAMMPS_SOURCE_DIR}/STUBS>)
    target_compile_definitions(lammps INTERFACE $<INSTALL_INTERFACE:LAMMPS_LIB_NO_MPI>)
  else()
@ -468,9 +491,12 @@ foreach(HEADER cmath)
  endif(NOT FOUND_${HEADER})
 endforeach(HEADER)

-set(MATH_LIBRARIES "m" CACHE STRING "math library")
-mark_as_advanced( MATH_LIBRARIES )
-target_link_libraries(lammps PRIVATE ${MATH_LIBRARIES})
+# make the standard math library overrideable and autodetected (for systems that don't have it)
+find_library(STANDARD_MATH_LIB m DOC "Standard Math library")
+mark_as_advanced(STANDARD_MATH_LIB)
+if(STANDARD_MATH_LIB)
+  target_link_libraries(lammps PRIVATE ${STANDARD_MATH_LIB})
+endif()

 ######################################
 # Generate Basic Style files
@ -608,7 +634,7 @@ endif()
 # and after everything else that is compiled locally
 ######################################################################
 if(CMAKE_SYSTEM_NAME STREQUAL "Windows")
-  target_link_libraries(lammps PRIVATE -lwsock32 -lpsapi)
+  target_link_libraries(lammps PRIVATE "wsock32;psapi")
 endif()

 ######################################################
--- a/cmake/CMakeSettings.json
+++ b/cmake/CMakeSettings.json
@ -0,0 +1,55 @@
+{
+    "configurations": [
+        {
+            "name": "x64-Debug-MSVC",
+            "generator": "Ninja",
+            "configurationType": "Debug",
+            "buildRoot": "${workspaceRoot}\\build\\${name}",
+            "installRoot": "${workspaceRoot}\\install\\${name}",
+            "cmakeCommandArgs": "-S ${workspaceRoot}\\cmake -C ${workspaceRoot}\\cmake\\presets\\windows.cmake",
+            "buildCommandArgs": "",
+            "ctestCommandArgs": "",
+            "inheritEnvironments": [ "msvc_x64_x64" ],
+            "variables": [
+                {
+                    "name": "BUILD_SHARED_LIBS",
+                    "value": "True",
+                    "type": "BOOL"
+                },
+                {
+                    "name": "BUILD_TOOLS",
+                    "value": "True",
+                    "type": "BOOL"
+                },
+                {
+                    "name": "LAMMPS_EXCEPTIONS",
+                    "value": "True",
+                    "type": "BOOL"
+                }
+            ]
+        },
+        {
+            "name": "x64-Debug-Clang",
+            "generator": "Ninja",
+            "configurationType": "Debug",
+            "buildRoot": "${workspaceRoot}\\build\\${name}",
+            "installRoot": "${workspaceRoot}\\install\\${name}",
+            "cmakeCommandArgs": "-S ${workspaceRoot}\\cmake -C ${workspaceRoot}\\cmake\\presets\\windows.cmake",
+            "buildCommandArgs": "",
+            "ctestCommandArgs": "",
+            "inheritEnvironments": [ "clang_cl_x64" ],
+            "variables": [
+                {
+                    "name": "BUILD_TOOLS",
+                    "value": "True",
+                    "type": "BOOL"
+                },
+                {
+                    "name": "LAMMPS_EXCEPTIONS",
+                    "value": "True",
+                    "type": "BOOL"
+                }
+            ]
+        }
+    ]
+}
--- a/cmake/Modules/Packages/GPU.cmake
+++ b/cmake/Modules/Packages/GPU.cmake
@ -217,13 +217,20 @@ elseif(GPU_API STREQUAL "OPENCL")
 elseif(GPU_API STREQUAL "HIP")
  if(NOT DEFINED HIP_PATH)
      if(NOT DEFINED ENV{HIP_PATH})
-          set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed")
+          set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to HIP installation")
      else()
-          set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed")
+          set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to HIP installation")
      endif()
  endif()
-  set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
-  find_package(HIP REQUIRED)
+  if(NOT DEFINED ROCM_PATH)
+      if(NOT DEFINED ENV{ROCM_PATH})
+          set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to ROCm installation")
+      else()
+          set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to ROCm installation")
+      endif()
+  endif()
+  list(APPEND CMAKE_PREFIX_PATH ${HIP_PATH} ${ROCM_PATH})
+  find_package(hip REQUIRED)
  option(HIP_USE_DEVICE_SORT "Use GPU sorting" ON)

  if(NOT DEFINED HIP_PLATFORM)
@ -325,10 +332,11 @@ elseif(GPU_API STREQUAL "HIP")

  set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "${LAMMPS_LIB_BINARY_DIR}/gpu/*_cubin.h ${LAMMPS_LIB_BINARY_DIR}/gpu/*.cu.cpp")

-  hip_add_library(gpu STATIC ${GPU_LIB_SOURCES})
+  add_library(gpu STATIC ${GPU_LIB_SOURCES})
  target_include_directories(gpu PRIVATE ${LAMMPS_LIB_BINARY_DIR}/gpu)
  target_compile_definitions(gpu PRIVATE -D_${GPU_PREC_SETTING} -DMPI_GERYON -DUCL_NO_EXIT)
  target_compile_definitions(gpu PRIVATE -DUSE_HIP)
+  target_link_libraries(gpu PRIVATE hip::host)

  if(HIP_USE_DEVICE_SORT)
    # add hipCUB
@ -377,8 +385,9 @@ elseif(GPU_API STREQUAL "HIP")
    endif()
  endif()

-  hip_add_executable(hip_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp)
+  add_executable(hip_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp)
  target_compile_definitions(hip_get_devices PRIVATE -DUCL_HIP)
+  target_link_libraries(hip_get_devices hip::host)

  if(HIP_PLATFORM STREQUAL "nvcc")
    target_compile_definitions(gpu PRIVATE -D__HIP_PLATFORM_NVCC__)
--- a/cmake/Modules/Packages/KOKKOS.cmake
+++ b/cmake/Modules/Packages/KOKKOS.cmake
@ -1,6 +1,8 @@
 ########################################################################
 # As of version 3.3.0 Kokkos requires C++14
-set(CMAKE_CXX_STANDARD 14)
+if(CMAKE_CXX_STANDARD LESS 14)
+  message(FATAL_ERROR "The KOKKOS package requires the C++ standard to be set to at least C++14")
+endif()
 ########################################################################
 # consistency checks and Kokkos options/settings required by LAMMPS
 if(Kokkos_ENABLE_CUDA)
--- a/cmake/Modules/Packages/LATTE.cmake
+++ b/cmake/Modules/Packages/LATTE.cmake
@ -19,6 +19,14 @@ if(DOWNLOAD_LATTE)
  set(LATTE_MD5 "820e73a457ced178c08c71389a385de7" CACHE STRING "MD5 checksum of LATTE tarball")
  mark_as_advanced(LATTE_URL)
  mark_as_advanced(LATTE_MD5)
+
+  # CMake cannot pass BLAS or LAPACK library variable to external project if they are a list
+  list(LENGTH BLAS_LIBRARIES} NUM_BLAS)
+  list(LENGTH LAPACK_LIBRARIES NUM_LAPACK)
+  if((NUM_BLAS GREATER 1) OR (NUM_LAPACK GREATER 1))
+    message(FATAL_ERROR "Cannot compile downloaded LATTE library due to a technical limitation")
+  endif()
+
  include(ExternalProject)
  ExternalProject_Add(latte_build
    URL     ${LATTE_URL}
--- a/cmake/Modules/Packages/MACHDYN.cmake
+++ b/cmake/Modules/Packages/MACHDYN.cmake
@ -7,8 +7,9 @@ endif()
 option(DOWNLOAD_EIGEN3 "Download Eigen3 instead of using an already installed one)" ${DOWNLOAD_EIGEN3_DEFAULT})
 if(DOWNLOAD_EIGEN3)
  message(STATUS "Eigen3 download requested - we will build our own")
-  set(EIGEN3_URL "https://gitlab.com/libeigen/eigen/-/archive/3.3.9/eigen-3.3.9.tar.gz" CACHE STRING "URL for Eigen3 tarball")
-  set(EIGEN3_MD5 "609286804b0f79be622ccf7f9ff2b660" CACHE STRING "MD5 checksum of Eigen3 tarball")
+
+  set(EIGEN3_URL "https://download.lammps.org/thirdparty/eigen-3.4.0.tar.gz" CACHE STRING "URL for Eigen3 tarball")
+  set(EIGEN3_MD5 "4c527a9171d71a72a9d4186e65bea559" CACHE STRING "MD5 checksum of Eigen3 tarball")
  mark_as_advanced(EIGEN3_URL)
  mark_as_advanced(EIGEN3_MD5)
  include(ExternalProject)
--- a/cmake/Modules/Packages/ML-HDNNP.cmake
+++ b/cmake/Modules/Packages/ML-HDNNP.cmake
@ -45,12 +45,12 @@ if(DOWNLOAD_N2P2)
    # get path to MPI include directory when cross-compiling to windows
    if((CMAKE_SYSTEM_NAME STREQUAL Windows) AND CMAKE_CROSSCOMPILING)
      get_target_property(N2P2_MPI_INCLUDE MPI::MPI_CXX INTERFACE_INCLUDE_DIRECTORIES)
-      set(N2P2_PROJECT_OPTIONS "-I ${N2P2_MPI_INCLUDE} -DMPICH_SKIP_MPICXX=1")
+      set(N2P2_PROJECT_OPTIONS "-I${N2P2_MPI_INCLUDE}")
      set(MPI_CXX_COMPILER ${CMAKE_CXX_COMPILER})
    endif()
    if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
      get_target_property(N2P2_MPI_INCLUDE MPI::MPI_CXX INTERFACE_INCLUDE_DIRECTORIES)
-      set(N2P2_PROJECT_OPTIONS "-I ${N2P2_MPI_INCLUDE} -DMPICH_SKIP_MPICXX=1")
+      set(N2P2_PROJECT_OPTIONS "-I${N2P2_MPI_INCLUDE}")
      set(MPI_CXX_COMPILER ${CMAKE_CXX_COMPILER})
    endif()
  endif()
@ -69,6 +69,12 @@ if(DOWNLOAD_N2P2)
  # echo final flag for debugging
  message(STATUS "N2P2 BUILD OPTIONS: ${N2P2_BUILD_OPTIONS}")

+  # must have "sed" command to compile n2p2 library (for now)
+  find_program(HAVE_SED sed)
+  if(NOT HAVE_SED)
+    message(FATAL_ERROR "Must have 'sed' program installed to compile 'n2p2' library for ML-HDNNP package")
+  endif()
+
  # download compile n2p2 library. much patch MPI calls in LAMMPS interface to accommodate MPI-2 (e.g. for cross-compiling)
  include(ExternalProject)
  ExternalProject_Add(n2p2_build
--- a/cmake/Modules/Packages/ML-QUIP.cmake
+++ b/cmake/Modules/Packages/ML-QUIP.cmake
@ -50,7 +50,7 @@ if(DOWNLOAD_QUIP)
    GIT_TAG origin/public
    GIT_SHALLOW YES
    GIT_PROGRESS YES
-    PATCH_COMMAND cp ${CMAKE_BINARY_DIR}/quip.config <SOURCE_DIR>/arch/Makefile.lammps
+    PATCH_COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_BINARY_DIR}/quip.config <SOURCE_DIR>/arch/Makefile.lammps
    CONFIGURE_COMMAND env QUIP_ARCH=lammps make config
    BUILD_COMMAND env QUIP_ARCH=lammps make libquip
    INSTALL_COMMAND ""
--- a/cmake/Modules/Packages/SCAFACOS.cmake
+++ b/cmake/Modules/Packages/SCAFACOS.cmake
@ -23,6 +23,11 @@ if(DOWNLOAD_SCAFACOS)
  file(DOWNLOAD ${LAMMPS_THIRDPARTY_URL}/scafacos-1.0.1-fix.diff ${CMAKE_CURRENT_BINARY_DIR}/scafacos-1.0.1.fix.diff
          EXPECTED_HASH MD5=4baa1333bb28fcce102d505e1992d032)

+  find_program(HAVE_PATCH patch)
+  if(NOT HAVE_PATCH)
+    message(FATAL_ERROR "The 'patch' program is required to build the ScaFaCoS library")
+  endif()
+
  include(ExternalProject)
  ExternalProject_Add(scafacos_build
    URL     ${SCAFACOS_URL}
--- a/cmake/Modules/Packages/VORONOI.cmake
+++ b/cmake/Modules/Packages/VORONOI.cmake
@ -26,6 +26,11 @@ if(DOWNLOAD_VORO)
    set(VORO_BUILD_OPTIONS CXX=${CMAKE_CXX_COMPILER} CFLAGS=${VORO_BUILD_CFLAGS})
  endif()

+  find_program(HAVE_PATCH patch)
+  if(NOT HAVE_PATCH)
+    message(FATAL_ERROR "The 'patch' program is required to build the voro++ library")
+  endif()
+
  ExternalProject_Add(voro_build
    URL     ${VORO_URL}
    URL_MD5 ${VORO_MD5}
--- a/cmake/Modules/Tools.cmake
+++ b/cmake/Modules/Tools.cmake
@ -25,7 +25,9 @@ if(BUILD_TOOLS)
  get_filename_component(MSI2LMP_SOURCE_DIR ${LAMMPS_TOOLS_DIR}/msi2lmp/src ABSOLUTE)
  file(GLOB MSI2LMP_SOURCES ${MSI2LMP_SOURCE_DIR}/[^.]*.c)
  add_executable(msi2lmp ${MSI2LMP_SOURCES})
-  target_link_libraries(msi2lmp PRIVATE ${MATH_LIBRARIES})
+  if(STANDARD_MATH_LIB)
+    target_link_libraries(msi2lmp PRIVATE ${STANDARD_MATH_LIB})
+  endif()
  install(TARGETS msi2lmp DESTINATION ${CMAKE_INSTALL_BINDIR})
  install(FILES ${LAMMPS_DOC_DIR}/msi2lmp.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1)
 endif()
--- a/cmake/presets/hip_amd.cmake
+++ b/cmake/presets/hip_amd.cmake
@ -0,0 +1,30 @@
+# preset that will enable hip (clang/clang++) with support for MPI and OpenMP (on Linux boxes)
+
+# prefer flang over gfortran, if available
+find_program(CLANG_FORTRAN NAMES flang gfortran f95)
+set(ENV{OMPI_FC} ${CLANG_FORTRAN})
+
+set(CMAKE_CXX_COMPILER "hipcc" CACHE STRING "" FORCE)
+set(CMAKE_C_COMPILER "hipcc" CACHE STRING "" FORCE)
+set(CMAKE_Fortran_COMPILER ${CLANG_FORTRAN} CACHE STRING "" FORCE)
+set(CMAKE_CXX_FLAGS_DEBUG "-Wall -Wextra -g" CACHE STRING "" FORCE)
+set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-Wall -Wextra -g -O2 -DNDEBUG" CACHE STRING "" FORCE)
+set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG" CACHE STRING "" FORCE)
+set(CMAKE_Fortran_FLAGS_DEBUG "-Wall -Wextra -g -std=f2003" CACHE STRING "" FORCE)
+set(CMAKE_Fortran_FLAGS_RELWITHDEBINFO "-Wall -Wextra -g -O2 -DNDEBUG -std=f2003" CACHE STRING "" FORCE)
+set(CMAKE_Fortran_FLAGS_RELEASE "-O3 -DNDEBUG -std=f2003" CACHE STRING "" FORCE)
+set(CMAKE_C_FLAGS_DEBUG "-Wall -Wextra -g" CACHE STRING "" FORCE)
+set(CMAKE_C_FLAGS_RELWITHDEBINFO "-Wall -Wextra -g -O2 -DNDEBUG" CACHE STRING "" FORCE)
+set(CMAKE_C_FLAGS_RELEASE "-O3 -DNDEBUG" CACHE STRING "" FORCE)
+
+set(MPI_CXX "hipcc" CACHE STRING "" FORCE)
+set(MPI_CXX_COMPILER "mpicxx" CACHE STRING "" FORCE)
+
+unset(HAVE_OMP_H_INCLUDE CACHE)
+set(OpenMP_C "hipcc" CACHE STRING "" FORCE)
+set(OpenMP_C_FLAGS "-fopenmp" CACHE STRING "" FORCE)
+set(OpenMP_C_LIB_NAMES "omp" CACHE STRING "" FORCE)
+set(OpenMP_CXX "hipcc" CACHE STRING "" FORCE)
+set(OpenMP_CXX_FLAGS "-fopenmp" CACHE STRING "" FORCE)
+set(OpenMP_CXX_LIB_NAMES "omp" CACHE STRING "" FORCE)
+set(OpenMP_omp_LIBRARY "libomp.so" CACHE PATH "" FORCE)
--- a/cmake/presets/windows.cmake
+++ b/cmake/presets/windows.cmake
@ -0,0 +1,64 @@
+set(WIN_PACKAGES
+  ASPHERE
+  BOCS
+  BODY
+  BROWNIAN
+  CG-DNA
+  CG-SDK
+  CLASS2
+  COLLOID
+  COLVARS
+  CORESHELL
+  DIELECTRIC
+  DIFFRACTION
+  DIPOLE
+  DPD-BASIC
+  DPD-MESO
+  DPD-REACT
+  DPD-SMOOTH
+  DRUDE
+  EFF
+  EXTRA-COMPUTE
+  EXTRA-DUMP
+  EXTRA-FIX
+  EXTRA-MOLECULE
+  EXTRA-PAIR
+  FEP
+  GRANULAR
+  INTERLAYER
+  KSPACE
+  MANIFOLD
+  MANYBODY
+  MC
+  MEAM
+  MISC
+  ML-IAP
+  ML-SNAP
+  MOFFF
+  MOLECULE
+  MOLFILE
+  OPENMP
+  ORIENT
+  PERI
+  PHONON
+  POEMS
+  PTM
+  QEQ
+  QTB
+  REACTION
+  REAXFF
+  REPLICA
+  RIGID
+  SHOCK
+  SMTBQ
+  SPH
+  SPIN
+  SRD
+  TALLY
+  UEF
+  YAFF)
+
+foreach(PKG ${WIN_PACKAGES})
+  set(PKG_${PKG} ON CACHE BOOL "" FORCE)
+endforeach()
+
--- a/doc/doxygen/Doxyfile.in
+++ b/doc/doxygen/Doxyfile.in
@ -435,6 +435,8 @@ INPUT                  = @LAMMPS_SOURCE_DIR@/utils.cpp                 \
                         @LAMMPS_SOURCE_DIR@/my_pool_chunk.cpp         \
                         @LAMMPS_SOURCE_DIR@/my_pool_chunk.h           \
                         @LAMMPS_SOURCE_DIR@/math_eigen.h              \
+                         @LAMMPS_SOURCE_DIR@/platform.h                \
+                         @LAMMPS_SOURCE_DIR@/platform.cpp              \

 # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
 # directories that are symbolic links (a Unix file system feature) are excluded
--- a/doc/lammps.1
+++ b/doc/lammps.1
@ -1,4 +1,4 @@
-.TH LAMMPS "20 September 2021" "2021-09-20"
+.TH LAMMPS "29 September 2021" "2021-09-29"
 .SH NAME
 .B LAMMPS
 \- Molecular Dynamics Simulator.
--- a/doc/src/Build_settings.rst
+++ b/doc/src/Build_settings.rst
@ -71,7 +71,8 @@ LAMMPS can use them if they are available on your system.

         -D FFTW3_INCLUDE_DIR=path   # path to FFTW3 include files
         -D FFTW3_LIBRARY=path       # path to FFTW3 libraries
-         -D FFT_FFTW_THREADS=on      # enable using threaded FFTW3 libraries
+         -D FFTW3_OMP_LIBRARY=path   # path to FFTW3 OpenMP wrapper libraries
+         -D FFT_FFTW_THREADS=on      # enable using OpenMP threaded FFTW3 libraries
         -D MKL_INCLUDE_DIR=path     # ditto for Intel MKL library
         -D FFT_MKL_THREADS=on       # enable using threaded FFTs with MKL libraries
         -D MKL_LIBRARY=path         # path to MKL libraries
@ -353,8 +354,10 @@ Read or write compressed files
 -----------------------------------------

 If this option is enabled, large files can be read or written with
-gzip compression by several LAMMPS commands, including
-:doc:`read_data <read_data>`, :doc:`rerun <rerun>`, and :doc:`dump <dump>`.
+compression by ``gzip`` or similar tools by several LAMMPS commands,
+including :doc:`read_data <read_data>`, :doc:`rerun <rerun>`, and
+:doc:`dump <dump>`.  Currently supported compression tools are:
+``gzip``, ``bzip2``, ``zstd``, and ``lzma``.

 .. tabs::

@ -363,8 +366,7 @@ gzip compression by several LAMMPS commands, including
      .. code-block:: bash

         -D WITH_GZIP=value       # yes or no
-                                  # default is yes if CMake can find gzip, else no
-         -D GZIP_EXECUTABLE=path  # path to gzip executable if CMake cannot find it
+                                  # default is yes if CMake can find the gzip program, else no

   .. tab:: Traditional make

@ -372,14 +374,15 @@ gzip compression by several LAMMPS commands, including

         LMP_INC = -DLAMMPS_GZIP

-This option requires that your operating system fully supports the "popen()"
-function in the standard runtime library and that a ``gzip`` executable can be
-found by LAMMPS during a run.
+This option requires that your operating system fully supports the
+"popen()" function in the standard runtime library and that a ``gzip``
+or other executable can be found by LAMMPS in the standard search path
+during a run.

 .. note::

-   On some clusters with high-speed networks, using the "fork()" library
-   call (required by "popen()") can interfere with the fast communication
+   On clusters with high-speed networks, using the "fork()" library call
+   (required by "popen()") can interfere with the fast communication
   library and lead to simulations using compressed output or input to
   hang or crash. For selected operations, compressed file I/O is also
   available using a compression library instead, which is what the
--- a/doc/src/Build_windows.rst
+++ b/doc/src/Build_windows.rst
@ -4,6 +4,7 @@ Notes for building LAMMPS on Windows
 * :ref:`General remarks <generic>`
 * :ref:`Running Linux on Windows <linux>`
 * :ref:`Using GNU GCC ported to Windows <gnu>`
+* :ref:`Using Visual Studio <msvc>`
 * :ref:`Using a cross-compiler <cross>`

 ----------
@ -31,13 +32,13 @@ pre-compiled Windows binary packages are sufficient for your needs.  If
 it is necessary for you to compile LAMMPS on a Windows machine
 (e.g. because it is your main desktop), please also consider using a
 virtual machine software and compile and run LAMMPS in a Linux virtual
-machine, or - if you have a sufficiently up-to-date Windows 10
-installation - consider using the Windows subsystem for Linux.  This
-optional Windows feature allows you to run the bash shell from Ubuntu
-from within Windows and from there on, you can pretty much use that
-shell like you are running on an Ubuntu Linux machine (e.g. installing
-software via apt-get and more).  For more details on that, please see
-:doc:`this tutorial <Howto_wsl>`.
+machine, or - if you have a sufficiently up-to-date Windows 10 or
+Windows 11 installation - consider using the Windows subsystem for
+Linux.  This optional Windows feature allows you to run the bash shell
+from Ubuntu from within Windows and from there on, you can pretty much
+use that shell like you are running on an Ubuntu Linux machine
+(e.g. installing software via apt-get and more).  For more details on
+that, please see :doc:`this tutorial <Howto_wsl>`.

 .. _gnu:

@ -67,6 +68,35 @@ requiring changes to the LAMMPS source code, or figure out corrections
 yourself, please report them on the lammps-users mailing list, or file
 them as an issue or pull request on the LAMMPS GitHub project.

+.. _msvc:
+
+Using Microsoft Visual Studio
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Following the integration of the :doc:`platform namespace
+<Developer_platform>` into the LAMMPS code base, portability of LAMMPS
+to be compiled on Windows using Visual Studio has been significantly
+improved.  This has been tested with Visual Studio 2019 (aka version
+16).  Not all features and packages in LAMMPS are currently supported
+out of the box, but a preset ``cmake/presets/windows.cmake`` is provided
+that contains the packages that have been compiled successfully.  You
+must use the CMake based build procedure, and either use the integrated
+CMake support of Visual Studio or use an external CMake installation to
+create build files for the Visual Studio build system.  Please note that
+on launching Visual Studio it will scan the directory tree and likely
+miss the correct master ``CMakeLists.txt``.  Try to open the
+``cmake/CMakeSettings.json`` and use those CMake configurations as a
+starting point.  It is also possible to configure and compile LAMMPS
+from the command line with a CMake binary from `cmake.org <https://cmake.org>`_.
+
+To support running in parallel you can compile with OpenMP enabled using
+the OPENMP package or install Microsoft MPI (including the SDK) and compile
+LAMMPS with MPI enabled.
+
+This is work in progress and you should contact the LAMMPS developers
+via GitHub, the forum, or the mailing list, if you have questions or
+LAMMPS specific problems.
+
 .. _cross:

 Using a cross-compiler
--- a/doc/src/Developer.rst
+++ b/doc/src/Developer.rst
@ -11,10 +11,12 @@ of time and requests from the LAMMPS user community.
   :maxdepth: 1

   Developer_org
+   Developer_parallel
   Developer_flow
   Developer_write
   Developer_notes
   Developer_plugins
   Developer_unittest
   Classes
+   Developer_platform
   Developer_utils
--- a/doc/src/Developer_par_comm.rst
+++ b/doc/src/Developer_par_comm.rst
@ -0,0 +1,120 @@
+Communication
+^^^^^^^^^^^^^
+
+Following the partitioning scheme in use all per-atom data is
+distributed across the MPI processes, which allows LAMMPS to handle very
+large systems provided it uses a correspondingly large number of MPI
+processes.  Since The per-atom data (atom IDs, positions, velocities,
+types, etc.)  To be able to compute the short-range interactions MPI
+processes need not only access to data of atoms they "own" but also
+information about atoms from neighboring sub-domains, in LAMMPS referred
+to as "ghost" atoms.  These are copies of atoms storing required
+per-atom data for up to the communication cutoff distance. The green
+dashed-line boxes in the :ref:`domain-decomposition` figure illustrate
+the extended ghost-atom sub-domain for one processor.
+
+This approach is also used to implement periodic boundary
+conditions: atoms that lie within the cutoff distance across a periodic
+boundary are also stored as ghost atoms and taken from the periodic
+replication of the sub-domain, which may be the same sub-domain, e.g. if
+running in serial.  As a consequence of this, force computation in
+LAMMPS is not subject to minimum image conventions and thus cutoffs may
+be larger than half the simulation domain.
+
+.. _ghost-atom-comm:
+.. figure:: img/ghost-comm.png
+   :align: center
+
+   ghost atom communication
+
+   This figure shows the ghost atom communication patterns between
+   sub-domains for "brick" (left) and "tiled" communication styles for
+   2d simulations.  The numbers indicate MPI process ranks.  Here the
+   sub-domains are drawn spatially separated for clarity.  The
+   dashed-line box is the extended sub-domain of processor 0 which
+   includes its ghost atoms.  The red- and blue-shaded boxes are the
+   regions of communicated ghost atoms.
+
+Efficient communication patterns are needed to update the "ghost" atom
+data, since that needs to be done at every MD time step or minimization
+step.  The diagrams of the `ghost-atom-comm` figure illustrate how ghost
+atom communication is performed in two stages for a 2d simulation (three
+in 3d) for both a regular and irregular partitioning of the simulation
+box.  For the regular case (left) atoms are exchanged first in the
+*x*-direction, then in *y*, with four neighbors in the grid of processor
+sub-domains.
+
+In the *x* stage, processor ranks 1 and 2 send owned atoms in their
+red-shaded regions to rank 0 (and vice versa).  Then in the *y* stage,
+ranks 3 and 4 send atoms in their blue-shaded regions to rank 0, which
+includes ghost atoms they received in the *x* stage.  Rank 0 thus
+acquires all its ghost atoms; atoms in the solid blue corner regions
+are communicated twice before rank 0 receives them.
+
+For the irregular case (right) the two stages are similar, but a
+processor can have more than one neighbor in each direction.  In the
+*x* stage, MPI ranks 1,2,3 send owned atoms in their red-shaded regions to
+rank 0 (and vice versa).  These include only atoms between the lower
+and upper *y*-boundary of rank 0's sub-domain.  In the *y* stage, ranks
+4,5,6 send atoms in their blue-shaded regions to rank 0.  This may
+include ghost atoms they received in the *x* stage, but only if they
+are needed by rank 0 to fill its extended ghost atom regions in the
+/-*y* directions (blue rectangles).  Thus in this case, ranks 5 and
+6 do not include ghost atoms they received from each other (in the *x*
+stage) in the atoms they send to rank 0.  The key point is that while
+the pattern of communication is more complex in the irregular
+partitioning case, it can still proceed in two stages (three in 3d)
+via atom exchanges with only neighboring processors.
+
+When attributes of owned atoms are sent to neighboring processors to
+become attributes of their ghost atoms, LAMMPS calls this a "forward"
+communication.  On timesteps when atoms migrate to new owning processors
+and neighbor lists are rebuilt, each processor creates a list of its
+owned atoms which are ghost atoms in each of its neighbor processors.
+These lists are used to pack per-atom coordinates (for example) into
+message buffers in subsequent steps until the next reneighboring.
+
+A "reverse" communication is when computed ghost atom attributes are
+sent back to the processor who owns the atom.  This is used (for
+example) to sum partial forces on ghost atoms to the complete force on
+owned atoms.  The order of the two stages described in the
+:ref:`ghost-atom-comm` figure is inverted and the same lists of atoms
+are used to pack and unpack message buffers with per-atom forces.  When
+a received buffer is unpacked, the ghost forces are summed to owned atom
+forces.  As in forward communication, forces on atoms in the four blue
+corners of the diagrams are sent, received, and summed twice (once at
+each stage) before owning processors have the full force.
+
+These two operations are used many places within LAMMPS aside from
+exchange of coordinates and forces, for example by manybody potentials
+to share intermediate per-atom values, or by rigid-body integrators to
+enable each atom in a body to access body properties.  Here are
+additional details about how these communication operations are
+performed in LAMMPS:
+
+- When exchanging data with different processors, forward and reverse
+  communication is done using ``MPI_Send()`` and ``MPI_IRecv()`` calls.
+  If a processor is "exchanging" atoms with itself, only the pack and
+  unpack operations are performed, e.g. to create ghost atoms across
+  periodic boundaries when running on a single processor.
+
+- For forward communication of owned atom coordinates, periodic box
+  lengths are added and subtracted when the receiving processor is
+  across a periodic boundary from the sender.  There is then no need to
+  apply a minimum image convention when calculating distances between
+  atom pairs when building neighbor lists or computing forces.
+
+- The cutoff distance for exchanging ghost atoms is typically equal to
+  the neighbor cutoff.  But it can also chosen to be longer if needed,
+  e.g. half the diameter of a rigid body composed of multiple atoms or
+  over 3x the length of a stretched bond for dihedral interactions.  It
+  can also exceed the periodic box size.  For the regular communication
+  pattern (left), if the cutoff distance extends beyond a neighbor
+  processor's sub-domain, then multiple exchanges are performed in the
+  same direction.  Each exchange is with the same neighbor processor,
+  but buffers are packed/unpacked using a different list of atoms. For
+  forward communication, in the first exchange a processor sends only
+  owned atoms.  In subsequent exchanges, it sends ghost atoms received
+  in previous exchanges.  For the irregular pattern (right) overlaps of
+  a processor's extended ghost-atom sub-domain with all other processors
+  in each dimension are detected.
--- a/doc/src/Developer_par_long.rst
+++ b/doc/src/Developer_par_long.rst
@ -0,0 +1,188 @@
+Long-range interactions
+^^^^^^^^^^^^^^^^^^^^^^^
+
+For charged systems, LAMMPS can compute long-range Coulombic
+interactions via the FFT-based particle-particle/particle-mesh (PPPM)
+method implemented in :doc:`kspace style pppm and its variants
+<kspace_style>`.  For that Coulombic interactions are partitioned into
+short- and long-range components.  The short-ranged portion is computed
+in real space as a loop over pairs of charges within a cutoff distance,
+using neighbor lists.  The long-range portion is computed in reciprocal
+space using a kspace style.  For the PPPM implementation the simulation
+cell is overlaid with a regular FFT grid in 3d. It proceeds in several stages:
+
+a) each atom's point charge is interpolated to nearby FFT grid points,
+b) a forward 3d FFT is performed,
+c) a convolution operation is performed in reciprocal space,
+d) one or more inverse 3d FFTs are performed, and
+e) electric field values from grid points near each atom are interpolated to compute
+   its forces.
+
+For any of the spatial-decomposition partitioning schemes each processor
+owns the brick-shaped portion of FFT grid points contained within its
+sub-domain.  The two interpolation operations use a stencil of grid
+points surrounding each atom.  To accommodate the stencil size, each
+processor also stores a few layers of ghost grid points surrounding its
+brick.  Forward and reverse communication of grid point values is
+performed similar to the corresponding :doc:`atom data communication
+<Developer_par_comm>`.  In this case, electric field values on owned
+grid points are sent to neighboring processors to become ghost point
+values.  Likewise charge values on ghost points are sent and summed to
+values on owned points.
+
+For triclinic simulation boxes, the FFT grid planes are parallel to
+the box faces, but the mapping of charge and electric field values
+to/from grid points is done in reduced coordinates where the tilted
+box is conceptually a unit cube, so that the stencil and FFT
+operations are unchanged.  However the FFT grid size required for a
+given accuracy is larger for triclinic domains than it is for
+orthogonal boxes.
+
+.. _fft-parallel:
+.. figure:: img/fft-decomp-parallel.png
+   :align: center
+
+   parallel FFT in PPPM
+
+   Stages of a parallel FFT for a simulation domain overlaid
+   with an 8x8x8 3d FFT grid, partitioned across 64 processors.
+   Within each of the 4 diagrams, grid cells of the same color are
+   owned by a single processor; for simplicity only cells owned by 4
+   or 8 of the 64 processors are colored.  The two images on the left
+   illustrate brick-to-pencil communication.  The two images on the
+   right illustrate pencil-to-pencil communication, which in this
+   case transposes the *y* and *z* dimensions of the grid.
+
+Parallel 3d FFTs require substantial communication relative to their
+computational cost.  A 3d FFT is implemented by a series of 1d FFTs
+along the *x-*, *y-*, and *z-*\ direction of the FFT grid.  Thus the FFT
+grid cannot be decomposed like atoms into 3 dimensions for parallel
+processing of the FFTs but only in 1 (as planes) or 2 (as pencils)
+dimensions and in between the steps the grid needs to be transposed to
+have the FFT grid portion "owned" by each MPI process complete in the
+direction of the 1d FFTs it has to perform. LAMMPS uses the
+pencil-decomposition algorithm as shown in the :ref:`fft-parallel` figure.
+
+Initially (far left), each processor owns a brick of same-color grid
+cells (actually grid points) contained within in its sub-domain.  A
+brick-to-pencil communication operation converts this layout to 1d
+pencils in the *x*-dimension (center left).  Again, cells of the same
+color are owned by the same processor.  Each processor can then compute
+a 1d FFT on each pencil of data it wholly owns using a call to the
+configured FFT library.  A pencil-to-pencil communication then converts
+this layout to pencils in the *y* dimension (center right) which
+effectively transposes the *x* and *y* dimensions of the grid, followed
+by 1d FFTs in *y*.  A final transpose of pencils from *y* to *z* (far
+right) followed by 1d FFTs in *z* completes the forward FFT.  The data
+is left in a *z*-pencil layout for the convolution operation.  One or
+more inverse FFTs then perform the sequence of 1d FFTs and communication
+steps in reverse order; the final layout of resulting grid values is the
+same as the initial brick layout.
+
+Each communication operation within the FFT (brick-to-pencil or
+pencil-to-pencil or pencil-to-brick) converts one tiling of the 3d grid
+to another, where a tiling in this context means an assignment of a
+small brick-shaped subset of grid points to each processor, the union of
+which comprise the entire grid.  The parallel `fftMPI library
+<https://lammps.github.io/fftmpi/>`_ written for LAMMPS allows arbitrary
+definitions of the tiling so that an irregular partitioning of the
+simulation domain can use it directly.  Transforming data from one
+tiling to another is implemented in `fftMPI` using point-to-point
+communication, where each processor sends data to a few other
+processors, since each tile in the initial tiling overlaps with a
+handful of tiles in the final tiling.
+
+The transformations could also be done using collective communication
+across all $P$ processors with a single call to ``MPI_Alltoall()``, but
+this is typically much slower.  However, for the specialized brick and
+pencil tiling illustrated in :ref:`fft-parallel` figure, collective
+communication across the entire MPI communicator is not required.  In
+the example an :math:`8^3` grid with 512 grid cells is partitioned
+across 64 processors; each processor owns a 2x2x2 3d brick of grid
+cells.  The initial brick-to-pencil communication (upper left to upper
+right) only requires collective communication within subgroups of 4
+processors, as illustrated by the 4 colors.  More generally, a
+brick-to-pencil communication can be performed by partitioning *P*
+processors into :math:`P^{\frac{2}{3}}` subgroups of
+:math:`P^{\frac{1}{3}}` processors each.  Each subgroup performs
+collective communication only within its subgroup.  Similarly,
+pencil-to-pencil communication can be performed by partitioning *P*
+processors into :math:`P^{\frac{1}{2}}` subgroups of
+:math:`P^{\frac{1}{2}}` processors each.  This is illustrated in the
+figure for the :math:`y \Rightarrow z` communication (center).  An
+eight-processor subgroup owns the front *yz* plane of data and performs
+collective communication within the subgroup to transpose from a
+*y*-pencil to *z*-pencil layout.
+
+LAMMPS invokes point-to-point communication by default, but also
+provides the option of partitioned collective communication when using a
+:doc:`kspace_modify collective yes <kspace_modify>` command to switch to
+that mode.  In the latter case, the code detects the size of the
+disjoint subgroups and partitions the single *P*-size communicator into
+multiple smaller communicators, each of which invokes collective
+communication.  Testing on a large IBM Blue Gene/Q machine at Argonne
+National Labs showed a significant improvement in FFT performance for
+large processor counts; partitioned collective communication was faster
+than point-to-point communication or global collective communication
+involving all *P* processors.
+
+Here are some additional details about FFTs for long-range and related
+grid/particle operations that LAMMPS supports:
+
+- The fftMPI library allows each grid dimension to be a multiple of
+  small prime factors (2,3,5), and allows any number of processors to
+  perform the FFT.  The resulting brick and pencil decompositions are
+  thus not always as well-aligned but the size of subgroups of
+  processors for the two modes of communication (brick/pencil and
+  pencil/pencil) still scale as :math:`O(P^{\frac{1}{3}})` and
+  :math:`O(P^{\frac{1}{2}})`.
+
+- For efficiency in performing 1d FFTs, the grid transpose
+  operations illustrated in Figure \ref{fig:fft} also involve
+  reordering the 3d data so that a different dimension is contiguous
+  in memory.  This reordering can be done during the packing or
+  unpacking of buffers for MPI communication.
+
+- For large systems and particularly a large number of MPI processes,
+  the dominant cost for parallel FFTs is often the communication, not
+  the computation of 1d FFTs, even though the latter scales as :math:`N
+  \log(N)` in the number of grid points *N* per grid direction.  This is
+  due to the fact that only a 2d decomposition into pencils is possible
+  while atom data (and their corresponding short-range force and energy
+  computations) can be decomposed efficiently in 3d.
+
+  This can be addressed by reducing the number of MPI processes involved
+  in the MPI communication by using :doc:`hybrid MPI + OpenMP
+  parallelization <Speed_omp>`.  This will use OpenMP parallelization
+  inside the MPI domains and while that may have a lower parallel
+  efficiency, it reduces the communication overhead.
+
+  As an alternative it is also possible to start a :ref:`multi-partition
+  <partition>` calculation and then use the :doc:`verlet/split
+  integrator <run_style>` to perform the PPPM computation on a
+  dedicated, separate partition of MPI processes.  This uses an integer
+  "1:*p*" mapping of *p* sub-domains of the atom decomposition to one
+  sub-domain of the FFT grid decomposition and where pairwise non-bonded
+  and bonded forces and energies are computed on the larger partition
+  and the PPPM kspace computation concurrently on the smaller partition.
+
+- LAMMPS also implements PPPM-based solvers for other long-range
+  interactions, dipole and dispersion (Lennard-Jones), which can be used
+  in conjunction with long-range  Coulombics for point charges.
+
+- LAMMPS implements a ``GridComm`` class which overlays the simulation
+  domain with a regular grid, partitions it across processors in a
+  manner consistent with processor sub-domains, and provides methods for
+  forward and reverse communication of owned and ghost grid point
+  values.  It is used for PPPM as an FFT grid (as outlined above) and
+  also for the MSM algorithm which uses a cascade of grid sizes from
+  fine to coarse to compute long-range Coulombic forces.  The GridComm
+  class is also useful for models where continuum fields interact with
+  particles.  For example, the two-temperature model (TTM) defines heat
+  transfer between atoms (particles) and electrons (continuum gas) where
+  spatial variations in the electron temperature are computed by finite
+  differences of a discretized heat equation on a regular grid.  The
+  :doc:`fix ttm/grid <fix_ttm>` command uses the ``GridComm`` class
+  internally to perform its grid operations on a distributed grid
+  instead of the original :doc:`fix ttm <fix_ttm>` which uses a
+  replicated grid.
--- a/doc/src/Developer_par_neigh.rst
+++ b/doc/src/Developer_par_neigh.rst
@ -0,0 +1,159 @@
+Neighbor lists
+^^^^^^^^^^^^^^
+
+To compute forces efficiently, each processor creates a Verlet-style
+neighbor list which enumerates all pairs of atoms *i,j* (*i* = owned,
+*j* = owned or ghost) with separation less than the applicable
+neighbor list cutoff distance.  In LAMMPS the neighbor lists are stored
+in a multiple-page data structure; each page is a contiguous chunk of
+memory which stores vectors of neighbor atoms *j* for many *i* atoms.
+This allows pages to be incrementally allocated or deallocated in blocks
+as needed.  Neighbor lists typically consume the most memory of any data
+structure in LAMMPS.  The neighbor list is rebuilt (from scratch) once
+every few timesteps, then used repeatedly each step for force or other
+computations.  The neighbor cutoff distance is :math:`R_n = R_f +
+\Delta_s`, where :math:`R_f` is the (largest) force cutoff defined by
+the interatomic potential for computing short-range pairwise or manybody
+forces and :math:`\Delta_s` is a "skin" distance that allows the list to
+be used for multiple steps assuming that atoms do not move very far
+between consecutive time steps.  Typically the code triggers
+reneighboring when any atom has moved half the skin distance since the
+last reneighboring; this and other options of the neighbor list rebuild
+can be adjusted with the :doc:`neigh_modify <neigh_modify>` command.
+
+On steps when reneighboring is performed, atoms which have moved outside
+their owning processor's sub-domain are first migrated to new processors
+via communication.  Periodic boundary conditions are also (only)
+enforced on these steps to ensure each atom is re-assigned to the
+correct processor.  After migration, the atoms owned by each processor
+are stored in a contiguous vector.  Periodically each processor
+spatially sorts owned atoms within its vector to reorder it for improved
+cache efficiency in force computations and neighbor list building.  For
+that atoms are spatially binned and then reordered so that atoms in the
+same bin are adjacent in the vector.  Atom sorting can be disabled or
+its settings modified with the :doc:`atom_modify <atom_modify>` command.
+
+.. _neighbor-stencil:
+.. figure:: img/neigh-stencil.png
+   :align: center
+
+   neighbor list stencils
+
+   A 2d simulation sub-domain (thick black line) and the corresponding
+   ghost atom cutoff region (dashed blue line) for both orthogonal
+   (left) and triclinic (right) domains.  A regular grid of neighbor
+   bins (thin lines) overlays the entire simulation domain and need not
+   align with sub-domain boundaries; only the portion overlapping the
+   augmented sub-domain is shown.  In the triclinic case it overlaps the
+   bounding box of the tilted rectangle.  The blue- and red-shaded bins
+   represent a stencil of bins searched to find neighbors of a particular
+   atom (black dot).
+
+To build a local neighbor list in linear time, the simulation domain is
+overlaid (conceptually) with a regular 3d (or 2d) grid of neighbor bins,
+as shown in the :ref:`neighbor-stencil` figure for 2d models and a
+single MPI processor's sub-domain.  Each processor stores a set of
+neighbor bins which overlap its sub-domain extended by the neighbor
+cutoff distance :math:`R_n`.  As illustrated, the bins need not align
+with processor boundaries; an integer number in each dimension is fit to
+the size of the entire simulation box.
+
+Most often LAMMPS builds what it calls a "half" neighbor list where
+each *i,j* neighbor pair is stored only once, with either atom *i* or
+*j* as the central atom.  The build can be done efficiently by using a
+pre-computed "stencil" of bins around a central origin bin which
+contains the atom whose neighbors are being searched for.  A stencil
+is simply a list of integer offsets in *x,y,z* of nearby bins
+surrounding the origin bin which are close enough to contain any
+neighbor atom *j* within a distance :math:`R_n` from any atom *i* in the
+origin bin.  Note that for a half neighbor list, the stencil can be
+asymmetric since each atom only need store half its nearby neighbors.
+
+These stencils are illustrated in the figure for a half list and a bin
+size of :math:`\frac{1}{2} R_n`.  There are 13 red+blue stencil bins in
+2d (for the orthogonal case, 15 for triclinic).  In 3d there would be
+63, 13 in the plane of bins that contain the origin bin and 25 in each
+of the two planes above it in the *z* direction (75 for triclinic).  The
+reason the triclinic stencil has extra bins is because the bins tile the
+bounding box of the entire triclinic domain and thus are not periodic
+with respect to the simulation box itself.  The stencil and logic for
+determining which *i,j* pairs to include in the neighbor list are
+altered slightly to account for this.
+
+To build a neighbor list, a processor first loops over its "owned" plus
+"ghost" atoms and assigns each to a neighbor bin.  This uses an integer
+vector to create a linked list of atom indices within each bin.  It then
+performs a triply-nested loop over its owned atoms *i*, the stencil of
+bins surrounding atom *i*'s bin, and the *j* atoms in each stencil bin
+(including ghost atoms).  If the distance :math:`r_{ij} < R_n`, then
+atom *j* is added to the vector of atom *i*'s neighbors.
+
+Here are additional details about neighbor list build options LAMMPS
+supports:
+
+- The choice of bin size is an option; a size half of :math:`R_n` has
+  been found to be optimal for many typical cases.  Smaller bins incur
+  additional overhead to loop over; larger bins require more distance
+  calculations.  Note that for smaller bin sizes, the 2d stencil in the
+  figure would be more semi-circular in shape (hemispherical in 3d),
+  with bins near the corners of the square eliminated due to their
+  distance from the origin bin.
+
+- Depending on the interatomic potential(s) and other commands used in
+  an input script, multiple neighbor lists and stencils with different
+  attributes may be needed.  This includes lists with different cutoff
+  distances, e.g. for force computation versus occasional diagnostic
+  computations such as a radial distribution function, or for the
+  r-RESPA time integrator which can partition pairwise forces by
+  distance into subsets computed at different time intervals.  It
+  includes "full" lists (as opposed to half lists) where each *i,j* pair
+  appears twice, stored once with *i* and *j*, and which use a larger
+  symmetric stencil.  It also includes lists with partial enumeration of
+  ghost atom neighbors.  The full and ghost-atom lists are used by
+  various manybody interatomic potentials.  Lists may also use different
+  criteria for inclusion of a pair interaction.  Typically this simply
+  depends only on the distance between two atoms and the cutoff
+  distance.  But for finite-size coarse-grained particles with
+  individual diameters (e.g. polydisperse granular particles), it can
+  also depend on the diameters of the two particles.
+
+- When using :doc:`pair style hybrid <pair_hybrid>` multiple sub-lists
+  of the master neighbor list for the full system need to be generated,
+  one for each sub-style, which contains only the *i,j* pairs needed to
+  compute interactions between subsets of atoms for the corresponding
+  potential.  This means not all *i* or *j* atoms owned by a processor
+  are included in a particular sub-list.
+
+- Some models use different cutoff lengths for pairwise interactions
+  between different kinds of particles which are stored in a single
+  neighbor list.  One example is a solvated colloidal system with large
+  colloidal particles where colloid/colloid, colloid/solvent, and
+  solvent/solvent interaction cutoffs can be dramatically different.
+  Another is a model of polydisperse finite-size granular particles;
+  pairs of particles interact only when they are in contact with each
+  other.  Mixtures with particle size ratios as high as 10-100x may be
+  used to model realistic systems.  Efficient neighbor list building
+  algorithms for these kinds of systems are available in LAMMPS.  They
+  include a method which uses different stencils for different cutoff
+  lengths and trims the stencil to only include bins that straddle the
+  cutoff sphere surface.  More recently a method which uses both
+  multiple stencils and multiple bin sizes was developed; it builds
+  neighbor lists efficiently for systems with particles of any size
+  ratio, though other considerations (timestep size, force computations)
+  may limit the ability to model systems with huge polydispersity.
+
+- For small and sparse systems and as a fallback method, LAMMPS also
+  supports neighbor list construction without binning by using a full
+  :math:`O(N^2)` loop over all *i,j* atom pairs in a sub-domain when
+  using the :doc:`neighbor nsq <neighbor>` command.
+
+- Dependent on the "pair" setting of the :doc:`newton <newton>` command,
+  the "half" neighbor lists may contain **all** pairs of atoms where
+  atom *j* is a ghost atom (i.e. when the newton pair setting is *off*)
+  For the newton pair *on* setting the atom *j* is only added to the
+  list if its *z* coordinate is larger, or if equal the *y* coordinate
+  is larger, and that is equal, too, the *x* coordinate is larger.  For
+  homogeneously dense systems that will result in picking neighbors from
+  a same size sector in always the same direction relative to the
+  "owned" atom and thus it should lead to similar length neighbor lists
+  and thus reduce the chance of a load imbalance.
--- a/doc/src/Developer_par_openmp.rst
+++ b/doc/src/Developer_par_openmp.rst
@ -0,0 +1,114 @@
+OpenMP Parallelism
+^^^^^^^^^^^^^^^^^^
+
+The styles in the INTEL, KOKKOS, and OPENMP package offer to use OpenMP
+thread parallelism to predominantly distribute loops over local data
+and thus follow an orthogonal parallelization strategy to the
+decomposition into spatial domains used by the :doc:`MPI partitioning
+<Developer_par_part>`.  For clarity, this section discusses only the
+implementation in the OPENMP package as it is the simplest. The INTEL
+and KOKKOS package offer additional options and are more complex since
+they support more features and different hardware like co-processors
+or GPUs.
+
+One of the key decisions when implementing the OPENMP package was to
+keep the changes to the source code small, so that it would be easier to
+maintain the code and keep it in sync with the non-threaded standard
+implementation.  this is achieved by a) making the OPENMP version a
+derived class from the regular version (e.g. ``PairLJCutOMP`` from
+``PairLJCut``) and overriding only methods that are multi-threaded or
+need to be modified to support multi-threading (similar to what was done
+in the OPT package), b) keeping the structure in the modified code very
+similar so that side-by-side comparisons are still useful, and c)
+offloading additional functionality and multi-thread support functions
+into three separate classes ``ThrOMP``, ``ThrData``, and ``FixOMP``.
+``ThrOMP`` provides additional, multi-thread aware functionality not
+available in the corresponding base class (e.g. ``Pair`` for
+``PairLJCutOMP``) like multi-thread aware variants of the "tally"
+functions. Those functions are made available through multiple
+inheritance so those new functions have to have unique names to avoid
+ambiguities; typically ``_thr`` is appended to the name of the function.
+``ThrData`` is a classes that manages per-thread data structures.
+It is used instead of extending the corresponding storage to per-thread
+arrays to avoid slowdowns due to "false sharing" when multiple threads
+update adjacent elements in an array and thus force the CPU cache lines
+to be reset and re-fetched.  ``FixOMP`` finally manages the "multi-thread
+state" like settings and access to per-thread storage, it is activated
+by the :doc:`package omp <package>` command.
+
+Avoiding data races
+"""""""""""""""""""
+
+A key problem when implementing thread parallelism in an MD code is
+to avoid data races when updating accumulated properties like forces,
+energies, and stresses.  When interactions are computed, they always
+involve multiple atoms and thus there are race conditions when multiple
+threads want to update per-atom data of the same atoms.  Five possible
+strategies have been considered to avoid this:
+
+1) restructure the code so that there is no overlapping access possible
+   when computing in parallel, e.g. by breaking lists into multiple
+   parts and synchronizing threads in between.
+2) have each thread be "responsible" for a specific group of atoms and
+   compute these interactions multiple times, once on each thread that
+   is responsible for a given atom and then have each thread only update
+   the properties of this atom.
+3) use mutexes around functions and regions of code where the data race
+   could happen
+4) use atomic operations when updating per-atom properties
+5) use replicated per-thread data structures to accumulate data without
+   conflicts and then use a reduction to combine those results into the
+   data structures used by the regular style.
+
+Option 5 was chosen for the OPENMP package because it would retain the
+performance for the case of 1 thread and the code would be more
+maintainable.  Option 1 would require extensive code changes,
+particularly to the neighbor list code; options 2 would have incurred a
+2x or more performance penalty for the serial case; option 3 causes
+significant overhead and would enforce serialization of operations in
+inner loops and thus defeat the purpose of multi-threading; option 4
+slows down the serial case although not quite as bad as option 2.  The
+downside of option 5 is that the overhead of the reduction operations
+grows with the number of threads used, so there would be a crossover
+point where options 2 or 4 would result in faster executing.  That is
+why option 2 for example is used in the GPU package because a GPU is a
+processor with a massive number of threads.  However, since the MPI
+parallelization is generally more effective for typical MD systems, the
+expectation is that thread parallelism is only used for a smaller number
+of threads (2-8).  At the time of its implementation, that number was
+equivalent to the number of CPU cores per CPU socket on high-end
+supercomputers.
+
+Thus arrays like the force array are dimensioned to the number of atoms
+times the number of threads when enabling OpenMP support and inside the
+compute functions a pointer to a different chunk is obtained by each thread.
+Similarly, accumulators like potential energy or virial are kept in
+per-thread instances of the ``ThrData`` class and then only reduced and
+stored in their global counterparts at the end of the force computation.
+
+
+Loop scheduling
+"""""""""""""""
+
+Multi-thread parallelization is applied by distributing (outer) loops
+statically across threads.  Typically this would be the loop over local
+atoms *i* when processing *i,j* pairs of atoms from a neighbor list.
+The design of the neighbor list code results in atoms having a similar
+number of neighbors for homogeneous systems and thus load imbalances
+across threads are not common and typically happen for systems where
+also the MPI parallelization would be unbalanced, which would typically
+have a more pronounced impact on the performance.  This same loop
+scheduling scheme can also be applied to the reduction operations on
+per-atom data to try and reduce the overhead of the reduction operation.
+
+Neighbor list parallelization
+"""""""""""""""""""""""""""""
+
+In addition to the parallelization of force computations, also the
+generation of the neighbor lists is parallelized.  As explained
+previously, neighbor lists are built by looping over "owned" atoms and
+storing the neighbors in "pages".  In the OPENMP variants of the
+neighbor list code, each thread operates on a different chunk of "owned"
+atoms and allocates and fills its own set of pages with neighbor list
+data.  This is achieved by each thread keeping its own instance of the
+:cpp:class:`MyPage <LAMMPS_NS::MyPage>` page allocator class.
--- a/doc/src/Developer_par_part.rst
+++ b/doc/src/Developer_par_part.rst
@ -0,0 +1,89 @@
+Partitioning
+^^^^^^^^^^^^
+
+The underlying spatial decomposition strategy used by LAMMPS for
+distributed-memory parallelism is set with the :doc:`comm_style command
+<comm_style>` and can be either "brick" (a regular grid) or "tiled".
+
+.. _domain-decomposition:
+.. figure:: img/domain-decomp.png
+   :align: center
+
+   domain decomposition
+
+   This figure shows the different kinds of domain decomposition used
+   for MPI parallelization: "brick" on the left with an orthogonal
+   (left) and a triclinic (middle) simulation domain, and a "tiled"
+   decomposition (right).  The black lines show the division into
+   sub-domains and the contained atoms are "owned" by the corresponding
+   MPI process. The green dashed lines indicate how sub-domains are
+   extended with "ghost" atoms up to the communication cutoff distance.
+
+The LAMMPS simulation box is a 3d or 2d volume, which can be orthogonal
+or triclinic in shape, as illustrated in the :ref:`domain-decomposition`
+figure for the 2d case.  Orthogonal means the box edges are aligned with
+the *x*, *y*, *z* Cartesian axes, and the box faces are thus all
+rectangular.  Triclinic allows for a more general parallelepiped shape
+in which edges are aligned with three arbitrary vectors and the box
+faces are parallelograms.  In each dimension box faces can be periodic,
+or non-periodic with fixed or shrink-wrapped boundaries.  In the fixed
+case, atoms which move outside the face are deleted; shrink-wrapped
+means the position of the box face adjusts continuously to enclose all
+the atoms.
+
+For distributed-memory MPI parallelism, the simulation box is spatially
+decomposed (partitioned) into non-overlapping sub-domains which fill the
+box. The default partitioning, "brick", is most suitable when atom
+density is roughly uniform, as shown in the left-side images of the
+:ref:`domain-decomposition` figure.  The sub-domains comprise a regular
+grid and all sub-domains are identical in size and shape.  Both the
+orthogonal and triclinic boxes can deform continuously during a
+simulation, e.g. to compress a solid or shear a liquid, in which case
+the processor sub-domains likewise deform.
+
+
+For models with non-uniform density, the number of particles per
+processor can be load-imbalanced with the default partitioning.  This
+reduces parallel efficiency, as the overall simulation rate is limited
+by the slowest processor, i.e. the one with the largest computational
+load.  For such models, LAMMPS supports multiple strategies to reduce
+the load imbalance:
+
+- The processor grid decomposition is by default based on the simulation
+  cell volume and tries to optimize the volume to surface ratio for the sub-domains.
+  This can be changed with the :doc:`processors command <processors>`.
+- The parallel planes defining the size of the sub-domains can be shifted
+  with the :doc:`balance command <balance>`. Which can be done in addition
+  to choosing a more optimal processor grid.
+- The recursive bisectioning algorithm in combination with the "tiled"
+  communication style can produce a partitioning with equal numbers of
+  particles in each sub-domain.
+
+
+.. |decomp1| image:: img/decomp-regular.png
+   :width: 24%
+
+.. |decomp2| image:: img/decomp-processors.png
+   :width: 24%
+
+.. |decomp3| image:: img/decomp-balance.png
+   :width: 24%
+
+.. |decomp4| image:: img/decomp-rcb.png
+   :width: 24%
+
+|decomp1|  |decomp2|  |decomp3|  |decomp4|
+
+The pictures above demonstrate different decompositions for a 2d system
+with 12 MPI ranks.  The atom colors indicate the load imbalance of each
+sub-domain with green being optimal and red the least optimal.
+
+Due to the vacuum in the system, the default decomposition is unbalanced
+with several MPI ranks without atoms (left). By forcing a 1x12x1
+processor grid, every MPI rank does computations now, but number of
+atoms per sub-domain is still uneven and the thin slice shape increases
+the amount of communication between sub-domains (center left). With a
+2x6x1 processor grid and shifting the sub-domain divisions, the load
+imbalance is further reduced and the amount of communication required
+between sub-domains is less (center right).  And using the recursive
+bisectioning leads to further improved decomposition (right).
--- a/doc/src/Developer_parallel.rst
+++ b/doc/src/Developer_parallel.rst
@ -0,0 +1,28 @@
+Parallel algorithms
+-------------------
+
+LAMMPS is designed to enable running simulations in parallel using the
+MPI parallel communication standard with distributed data via domain
+decomposition.  The parallelization aims to be efficient result in good
+strong scaling (= good speedup for the same system) and good weak
+scaling (= the computational cost of enlarging the system is
+proportional to the system size).  Additional parallelization using GPUs
+or OpenMP can also be applied within the sub-domain assigned to an MPI
+process.  For clarity, most of the following illustrations show the 2d
+simulation case. The underlying algorithms in those cases, however,
+apply to both 2d and 3d cases equally well.
+
+.. note::
+
+   The text and most of the figures in this chapter were adapted
+   for the manual from the section on parallel algorithms in the
+   :ref:`new LAMMPS paper <lammps_paper>`.
+
+.. toctree::
+   :maxdepth: 1
+
+   Developer_par_part
+   Developer_par_comm
+   Developer_par_neigh
+   Developer_par_long
+   Developer_par_openmp
--- a/doc/src/Developer_platform.rst
+++ b/doc/src/Developer_platform.rst
@ -0,0 +1,149 @@
+Platform abstraction functions
+------------------------------
+
+The ``platform`` sub-namespace inside the ``LAMMPS_NS`` namespace
+provides a collection of wrapper and convenience functions and utilities
+that perform common tasks for which platform specific code would be
+required or for which a more high-level abstraction would be convenient
+and reduce duplicated code.  This reduces redundant implementations and
+encourages consistent behavior and thus has some overlap with the
+:doc:`"utils" sub-namespace <Developer_utils>`.
+
+Time functions
+^^^^^^^^^^^^^^
+
+.. doxygenfunction:: cputime
+   :project: progguide
+
+.. doxygenfunction:: walltime
+   :project: progguide
+
+.. doxygenfunction:: usleep
+   :project: progguide
+
+Platform information functions
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. doxygenfunction:: os_info
+   :project: progguide
+
+.. doxygenfunction:: compiler_info
+   :project: progguide
+
+.. doxygenfunction:: cxx_standard
+   :project: progguide
+
+.. doxygenfunction:: openmp_standard
+   :project: progguide
+
+.. doxygenfunction:: mpi_vendor
+   :project: progguide
+
+.. doxygenfunction:: mpi_info
+   :project: progguide
+
+
+File and path functions and global constants
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. doxygenvariable:: filepathsep
+   :project: progguide
+
+.. doxygenvariable:: pathvarsep
+   :project: progguide
+
+.. doxygenfunction:: guesspath
+   :project: progguide
+
+.. doxygenfunction:: path_basename
+   :project: progguide
+
+.. doxygenfunction:: path_join
+   :project: progguide
+
+.. doxygenfunction:: file_is_readable
+   :project: progguide
+
+.. doxygenfunction:: is_console
+   :project: progguide
+
+.. doxygenfunction:: path_is_directory
+   :project: progguide
+
+.. doxygenfunction:: current_directory
+   :project: progguide
+
+.. doxygenfunction:: list_directory
+   :project: progguide
+
+.. doxygenfunction:: chdir
+   :project: progguide
+
+.. doxygenfunction:: mkdir
+   :project: progguide
+
+.. doxygenfunction:: rmdir
+   :project: progguide
+
+.. doxygenfunction:: unlink
+   :project: progguide
+
+Standard I/O function wrappers
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. doxygenvariable:: END_OF_FILE
+   :project: progguide
+
+.. doxygenfunction:: ftell
+   :project: progguide
+
+.. doxygenfunction:: fseek
+   :project: progguide
+
+.. doxygenfunction:: ftruncate
+   :project: progguide
+
+.. doxygenfunction:: popen
+   :project: progguide
+
+.. doxygenfunction:: pclose
+   :project: progguide
+
+Environment variable functions
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. doxygenfunction:: putenv
+   :project: progguide
+
+.. doxygenfunction:: list_pathenv
+   :project: progguide
+
+.. doxygenfunction:: find_exe_path
+   :project: progguide
+
+Dynamically loaded object or library functions
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. doxygenfunction:: dlopen
+   :project: progguide
+
+.. doxygenfunction:: dlclose
+   :project: progguide
+
+.. doxygenfunction:: dlsym
+   :project: progguide
+
+.. doxygenfunction:: dlerror
+   :project: progguide
+
+Compressed file I/O functions
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. doxygenfunction:: has_compress_extension
+   :project: progguide
+
+.. doxygenfunction:: compressed_read
+   :project: progguide
+
+.. doxygenfunction:: compressed_write
+   :project: progguide
--- a/doc/src/Developer_utils.rst
+++ b/doc/src/Developer_utils.rst
@ -7,7 +7,9 @@ a collection of convenience functions and utilities that perform common
 tasks that are required repeatedly throughout the LAMMPS code like
 reading or writing to files with error checking or translation of
 strings into specific types of numbers with checking for validity.  This
-reduces redundant implementations and encourages consistent behavior.
+reduces redundant implementations and encourages consistent behavior and
+thus has some overlap with the :doc:`"platform" sub-namespace
+<Developer_platform>`.

 I/O with status check and similar functions
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@ -60,6 +62,9 @@ silently returning the result of a partial conversion or zero in cases
 where the string is not a valid number.  This behavior allows to more
 easily detect typos or issues when processing input files.

+Similarly the :cpp:func:`logical() <LAMMPS_NS::utils::logical>` function
+will convert a string into a boolean and will only accept certain words.
+
 The *do_abort* flag should be set to ``true`` in case  this function
 is called only on a single MPI rank, as that will then trigger the
 a call to ``Error::one()`` for errors instead of ``Error::all()``
@ -83,6 +88,9 @@ strings for compliance without conversion.
 .. doxygenfunction:: tnumeric
   :project: progguide

+.. doxygenfunction:: logical
+   :project: progguide
+

 String processing
 ^^^^^^^^^^^^^^^^^
@ -95,6 +103,12 @@ and parsing files or arguments.
 .. doxygenfunction:: strdup
   :project: progguide

+.. doxygenfunction:: lowercase
+   :project: progguide
+
+.. doxygenfunction:: uppercase
+   :project: progguide
+
 .. doxygenfunction:: trim
   :project: progguide

@ -137,21 +151,6 @@ and parsing files or arguments.
 .. doxygenfunction:: is_double
   :project: progguide

-File and path functions
-^^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. doxygenfunction:: guesspath
-   :project: progguide
-
-.. doxygenfunction:: path_basename
-   :project: progguide
-
-.. doxygenfunction:: path_join
-   :project: progguide
-
-.. doxygenfunction:: file_is_readable
-   :project: progguide
-
 Potential file functions
 ^^^^^^^^^^^^^^^^^^^^^^^^

--- a/doc/src/Howto_thermostat.rst
+++ b/doc/src/Howto_thermostat.rst
@ -2,8 +2,8 @@ Thermostats
 ===========

 Thermostatting means controlling the temperature of particles in an MD
-simulation.  :doc:`Barostatting <Howto_barostat>` means controlling the
-pressure.  Since the pressure includes a kinetic component due to
+simulation.  :doc:`Barostatting <Howto_barostat>` means controlling
+the pressure.  Since the pressure includes a kinetic component due to
 particle velocities, both these operations require calculation of the
 temperature.  Typically a target temperature (T) and/or pressure (P)
 is specified by the user, and the thermostat or barostat attempts to
@ -26,11 +26,13 @@ can be invoked via the *dpd/tstat* pair style:
 * :doc:`pair_style dpd/tstat <pair_dpd>`

 :doc:`Fix nvt <fix_nh>` only thermostats the translational velocity of
-particles.  :doc:`Fix nvt/sllod <fix_nvt_sllod>` also does this, except
-that it subtracts out a velocity bias due to a deforming box and
-integrates the SLLOD equations of motion.  See the :doc:`Howto nemd <Howto_nemd>` page for further details.  :doc:`Fix nvt/sphere <fix_nvt_sphere>` and :doc:`fix nvt/asphere <fix_nvt_asphere>` thermostat not only translation
-velocities but also rotational velocities for spherical and aspherical
-particles.
+particles.  :doc:`Fix nvt/sllod <fix_nvt_sllod>` also does this,
+except that it subtracts out a velocity bias due to a deforming box
+and integrates the SLLOD equations of motion.  See the :doc:`Howto
+nemd <Howto_nemd>` page for further details.  :doc:`Fix nvt/sphere
+<fix_nvt_sphere>` and :doc:`fix nvt/asphere <fix_nvt_asphere>`
+thermostat not only translation velocities but also rotational
+velocities for spherical and aspherical particles.

 .. note::

@ -40,25 +42,31 @@ particles.
   e.g. molecular systems.  The latter can be tricky to do correctly.

 DPD thermostatting alters pairwise interactions in a manner analogous
-to the per-particle thermostatting of :doc:`fix langevin <fix_langevin>`.
+to the per-particle thermostatting of :doc:`fix langevin
+<fix_langevin>`.

-Any of the thermostatting fixes can be instructed to use custom temperature
-computes that remove bias which has two effects:  first, the current
-calculated temperature, which is compared to the requested target temperature,
-is calculated with the velocity bias removed;  second, the thermostat adjusts
-only the thermal temperature component of the particle's velocities, which are
-the velocities with the bias removed.  The removed bias is then added back
-to the adjusted velocities.  See the doc pages for the individual
-fixes and for the :doc:`fix_modify <fix_modify>` command for
-instructions on how to assign a temperature compute to a
-thermostatting fix.  For example, you can apply a thermostat to only
-the x and z components of velocity by using it in conjunction with
-:doc:`compute temp/partial <compute_temp_partial>`.  Of you could
-thermostat only the thermal temperature of a streaming flow of
-particles without affecting the streaming velocity, by using
-:doc:`compute temp/profile <compute_temp_profile>`.
+Any of the thermostatting fixes can be instructed to use custom
+temperature computes that remove bias which has two effects: first,
+the current calculated temperature, which is compared to the requested
+target temperature, is calculated with the velocity bias removed;
+second, the thermostat adjusts only the thermal temperature component
+of the particle's velocities, which are the velocities with the bias
+removed.  The removed bias is then added back to the adjusted
+velocities.  See the doc pages for the individual fixes and for the
+:doc:`fix_modify <fix_modify>` command for instructions on how to
+assign a temperature compute to a thermostatting fix.

-Below is a list of some custom temperature computes that can be used like that:
+For example, you can apply a thermostat only to atoms in a spatial
+region by using it in conjunction with :doc:`compute temp/region
+<compute_temp_region>`.  Or you can apply a thermostat to only the x
+and z components of velocity by using it with :doc:`compute
+temp/partial <compute_temp_partial>`.  Of you could thermostat only
+the thermal temperature of a streaming flow of particles without
+affecting the streaming velocity, by using :doc:`compute temp/profile
+<compute_temp_profile>`.
+
+Below is a list of custom temperature computes that can be used like
+that:

 * :doc:`compute_temp_asphere`
 * :doc:`compute_temp_body`
@ -72,8 +80,6 @@ Below is a list of some custom temperature computes that can be used like that:
 * :doc:`compute_temp_rotate`
 * :doc:`compute_temp_sphere`

-
-
 .. note::

   Only the nvt fixes perform time integration, meaning they update
@ -86,17 +92,17 @@ Below is a list of some custom temperature computes that can be used like that:
 * :doc:`fix nve/sphere <fix_nve_sphere>`
 * :doc:`fix nve/asphere <fix_nve_asphere>`

-Thermodynamic output, which can be setup via the
-:doc:`thermo_style <thermo_style>` command, often includes temperature
-values.  As explained on the page for the
-:doc:`thermo_style <thermo_style>` command, the default temperature is
-setup by the thermo command itself.  It is NOT the temperature
-associated with any thermostatting fix you have defined or with any
-compute you have defined that calculates a temperature.  The doc pages
-for the thermostatting fixes explain the ID of the temperature compute
-they create.  Thus if you want to view these temperatures, you need to
-specify them explicitly via the :doc:`thermo_style custom <thermo_style>` command.  Or you can use the
-:doc:`thermo_modify <thermo_modify>` command to re-define what
+Thermodynamic output, which can be setup via the :doc:`thermo_style
+<thermo_style>` command, often includes temperature values.  As
+explained on the page for the :doc:`thermo_style <thermo_style>`
+command, the default temperature is setup by the thermo command
+itself.  It is NOT the temperature associated with any thermostatting
+fix you have defined or with any compute you have defined that
+calculates a temperature.  The doc pages for the thermostatting fixes
+explain the ID of the temperature compute they create.  Thus if you
+want to view these temperatures, you need to specify them explicitly
+via the :doc:`thermo_style custom <thermo_style>` command.  Or you can
+use the :doc:`thermo_modify <thermo_modify>` command to re-define what
 temperature compute is used for default thermodynamic output.

 ----------
--- a/doc/src/Intro_citing.rst
+++ b/doc/src/Intro_citing.rst
@ -4,28 +4,41 @@ Citing LAMMPS
 Core Algorithms
 ^^^^^^^^^^^^^^^

-Since LAMMPS is a community project, there is not a single one
-publication or reference that describes **all** of LAMMPS.
-The canonical publication that describes the foundation, that is
-the basic spatial decomposition approach, the neighbor finding,
-and basic communications algorithms used in LAMMPS is:
+The paper mentioned below is the best overview of LAMMPS, but there are
+also publications describing particular models or algorithms implemented
+in LAMMPS or complementary software that is has interfaces to.  Please
+see below for how to cite contributions to LAMMPS.
+
+.. _lammps_paper:
+
+The latest canonical publication that describes the basic features, the
+source code design, the program structure, the spatial decomposition
+approach, the neighbor finding, basic communications algorithms, and how
+users and developers have contributed to LAMMPS is:
+
+  `LAMMPS - A flexible simulation tool for particle-based materials modeling at the atomic, meso, and continuum scales, Comp. Phys. Comm. (accepted 09/2021), DOI:10.1016/j.cpc.2021.108171 <https://doi.org/10.1016/j.cpc.2021.108171>`_
+
+So a project using LAMMPS or a derivative application that uses LAMMPS
+as a simulation engine should cite this paper.  The paper is expected to
+be published in its final form under the same DOI in the first half
+of 2022.  Please also give the URL of the LAMMPS website in your paper,
+namely https://www.lammps.org.
+
+The original publication describing the parallel algorithms used in the
+initial versions of LAMMPS is:

  `S. Plimpton, Fast Parallel Algorithms for Short-Range Molecular Dynamics, J Comp Phys, 117, 1-19 (1995). <http://www.sandia.gov/~sjplimp/papers/jcompphys95.pdf>`_

-So any project using LAMMPS (or a derivative application using LAMMPS as
-a simulation engine) should cite this paper. A new publication
-describing the developments and improvements of LAMMPS in the 25 years
-since then is currently in preparation.
-

 DOI for the LAMMPS code
 ^^^^^^^^^^^^^^^^^^^^^^^

-LAMMPS developers use the `Zenodo service at CERN
-<https://zenodo.org/>`_ to create digital object identifies (DOI) for
-stable releases of the LAMMPS code. There are two types of DOIs for the
-LAMMPS source code: the canonical DOI for **all** versions of LAMMPS,
-which will always point to the **latest** stable release version is:
+LAMMPS developers use the `Zenodo service at CERN <https://zenodo.org/>`_
+to create digital object identifies (DOI) for stable releases of the
+LAMMPS source code. There are two types of DOIs for the LAMMPS source code.
+
+The canonical DOI for **all** versions of LAMMPS, which will always
+point to the **latest** stable release version is:

 - DOI: `10.5281/zenodo.3726416 <https://dx.doi.org/10.5281/zenodo.3726416>`_

@ -45,11 +58,13 @@ about LAMMPS and its features.
 Citing contributions
 ^^^^^^^^^^^^^^^^^^^^

-LAMMPS has many features and that use either previously published
-methods and algorithms or novel features.  It also includes potential
-parameter filed for specific models.  Where available, a reminder about
-references for optional features used in a specific run is printed to
-the screen and log file.  Style and output location can be selected with
-the :ref:`-cite command-line switch <cite>`.  Additional references are
+LAMMPS has many features that use either previously published methods
+and algorithms or novel features.  It also includes potential parameter
+files for specific models.  Where available, a reminder about references
+for optional features used in a specific run is printed to the screen
+and log file.  Style and output location can be selected with the
+:ref:`-cite command-line switch <cite>`.  Additional references are
 given in the documentation of the :doc:`corresponding commands
-<Commands_all>` or in the :doc:`Howto tutorials <Howto>`.
+<Commands_all>` or in the :doc:`Howto tutorials <Howto>`.  So please
+make certain, that you provide the proper acknowledgments and citations
+in any published works using LAMMPS.
--- a/doc/src/Library_create.rst
+++ b/doc/src/Library_create.rst
@ -34,7 +34,7 @@ simple example demonstrating its use:
     int lmpargc = sizeof(lmpargv)/sizeof(const char *);

     /* create LAMMPS instance */
-     handle = lammps_open_no_mpi(lmpargc, lmpargv, NULL);
+     handle = lammps_open_no_mpi(lmpargc, (char **)lmpargv, NULL);
     if (handle == NULL) {
       printf("LAMMPS initialization failed");
       lammps_mpi_finalize();
--- a/doc/src/PDF/colvars-refman-lammps.pdf
+++ b/doc/src/PDF/colvars-refman-lammps.pdf
--- a/doc/src/Run_basics.rst
+++ b/doc/src/Run_basics.rst
@ -2,17 +2,25 @@ Basics of running LAMMPS
 ========================

 LAMMPS is run from the command line, reading commands from a file via
-the -in command line flag, or from standard input.
-Using the "-in in.file" variant is recommended:
+the -in command line flag, or from standard input.  Using the "-in
+in.file" variant is recommended (see note below).  The name of the
+LAMMPS executable is either ``lmp`` or ``lmp_<machine>`` with
+`<machine>` being the machine string used when compiling LAMMPS.  This
+is required when compiling LAMMPS with the traditional build system
+(e.g. with ``make mpi``), but optional when using CMake to configure and
+build LAMMPS:

 .. code-block:: bash

   $ lmp_serial -in in.file
   $ lmp_serial < in.file
+   $ lmp -in in.file
+   $ lmp < in.file
   $ /path/to/lammps/src/lmp_serial -i in.file
   $ mpirun -np 4 lmp_mpi -in in.file
+   $ mpiexec -np 4 lmp -in in.file
   $ mpirun -np 8 /path/to/lammps/src/lmp_mpi -in in.file
-   $ mpirun -np 6 /usr/local/bin/lmp -in in.file
+   $ mpiexec -n 6 /usr/local/bin/lmp -in in.file

 You normally run the LAMMPS command in the directory where your input
 script is located.  That is also where output files are produced by
@ -23,7 +31,7 @@ executable itself can be placed elsewhere.
 .. note::

   The redirection operator "<" will not always work when running
-   in parallel with mpirun; for those systems the -in form is required.
+   in parallel with mpirun or mpiexec; for those systems the -in form is required.

 As LAMMPS runs it prints info to the screen and a logfile named
 *log.lammps*\ .  More info about output is given on the
--- a/doc/src/fix_langevin.rst
+++ b/doc/src/fix_langevin.rst
@ -138,16 +138,18 @@ temperature with optional time-dependence as well.

 Like other fixes that perform thermostatting, this fix can be used
 with :doc:`compute commands <compute>` that remove a "bias" from the
-atom velocities.  E.g. removing the center-of-mass velocity from a
-group of atoms or removing the x-component of velocity from the
-calculation.  This is not done by default, but only if the
-:doc:`fix_modify <fix_modify>` command is used to assign a temperature
-compute to this fix that includes such a bias term.  See the doc pages
-for individual :doc:`compute commands <compute>` to determine which ones
-include a bias.  In this case, the thermostat works in the following
-manner: bias is removed from each atom, thermostatting is performed on
-the remaining thermal degrees of freedom, and the bias is added back
-in.
+atom velocities.  E.g. to apply the thermostat only to atoms within a
+spatial :doc:`region <region>`, or to remove the center-of-mass
+velocity from a group of atoms, or to remove the x-component of
+velocity from the calculation.
+
+This is not done by default, but only if the :doc:`fix_modify
+<fix_modify>` command is used to assign a temperature compute to this
+fix that includes such a bias term.  See the doc pages for individual
+:doc:`compute temp commands <compute>` to determine which ones include
+a bias.  In this case, the thermostat works in the following manner:
+bias is removed from each atom, thermostatting is performed on the
+remaining thermal degrees of freedom, and the bias is added back in.

 The *damp* parameter is specified in time units and determines how
 rapidly the temperature is relaxed.  For example, a value of 100.0 means
@ -183,7 +185,8 @@ omega (which is derived from the angular momentum in the case of
 aspherical particles).

 The rotational temperature of the particles can be monitored by the
-:doc:`compute temp/sphere <compute_temp_sphere>` and :doc:`compute temp/asphere <compute_temp_asphere>` commands with their rotate
+:doc:`compute temp/sphere <compute_temp_sphere>` and :doc:`compute
+temp/asphere <compute_temp_asphere>` commands with their rotate
 options.

 For the *omega* keyword there is also a scale factor of
--- a/doc/src/fix_langevin_drude.rst
+++ b/doc/src/fix_langevin_drude.rst
@ -167,17 +167,20 @@ functions, and include :doc:`thermo_style <thermo_style>` command
 keywords for the simulation box parameters and timestep and elapsed
 time.  Thus it is easy to specify a time-dependent temperature.

-Like other fixes that perform thermostatting, this fix can be used with
-:doc:`compute commands <compute>` that remove a "bias" from the atom
-velocities.  E.g. removing the center-of-mass velocity from a group of
-atoms.  This is not done by default, but only if the
-:doc:`fix_modify <fix_modify>` command is used to assign a temperature
-compute to this fix that includes such a bias term.  See the doc pages
-for individual :doc:`compute commands <compute>` to determine which ones
-include a bias.  In this case, the thermostat works in the following
-manner: bias is removed from each atom, thermostatting is performed on
-the remaining thermal degrees of freedom, and the bias is added back
-in.  NOTE: this feature has not been tested.
+Like other fixes that perform thermostatting, this fix can be used
+with :doc:`compute commands <compute>` that remove a "bias" from the
+atom velocities.  E.g. to apply the thermostat only to atoms within a
+spatial :doc:`region <region>`, or to remove the center-of-mass
+velocity from a group of atoms, or to remove the x-component of
+velocity from the calculation.
+
+This is not done by default, but only if the :doc:`fix_modify
+<fix_modify>` command is used to assign a temperature compute to this
+fix that includes such a bias term.  See the doc pages for individual
+:doc:`compute temp commands <compute>` to determine which ones include
+a bias.  In this case, the thermostat works in the following manner:
+bias is removed from each atom, thermostatting is performed on the
+remaining thermal degrees of freedom, and the bias is added back in.

 Note: The temperature thermostatting the core-Drude particle pairs
 should be chosen low enough, so as to mimic as closely as possible the
--- a/doc/src/fix_nh.rst
+++ b/doc/src/fix_nh.rst
@ -486,19 +486,20 @@ temperature or pressure during thermodynamic output via the
 compute-ID.  It also means that changing attributes of *thermo_temp*
 or *thermo_press* will have no effect on this fix.

-Like other fixes that perform thermostatting, fix nvt and fix npt can
-be used with :doc:`compute commands <compute>` that calculate a
-temperature after removing a "bias" from the atom velocities.
-E.g. removing the center-of-mass velocity from a group of atoms or
-only calculating temperature on the x-component of velocity or only
-calculating temperature for atoms in a geometric region.  This is not
-done by default, but only if the :doc:`fix_modify <fix_modify>` command
-is used to assign a temperature compute to this fix that includes such
-a bias term.  See the doc pages for individual :doc:`compute commands <compute>` to determine which ones include a bias.  In
-this case, the thermostat works in the following manner: the current
-temperature is calculated taking the bias into account, bias is
-removed from each atom, thermostatting is performed on the remaining
-thermal degrees of freedom, and the bias is added back in.
+Like other fixes that perform thermostatting, this fix can be used
+with :doc:`compute commands <compute>` that remove a "bias" from the
+atom velocities.  E.g. to apply the thermostat only to atoms within a
+spatial :doc:`region <region>`, or to remove the center-of-mass
+velocity from a group of atoms, or to remove the x-component of
+velocity from the calculation.
+
+This is not done by default, but only if the :doc:`fix_modify
+<fix_modify>` command is used to assign a temperature compute to this
+fix that includes such a bias term.  See the doc pages for individual
+:doc:`compute temp commands <compute>` to determine which ones include
+a bias.  In this case, the thermostat works in the following manner:
+bias is removed from each atom, thermostatting is performed on the
+remaining thermal degrees of freedom, and the bias is added back in.

 ----------

--- a/doc/src/fix_npt_asphere.rst
+++ b/doc/src/fix_npt_asphere.rst
@ -48,8 +48,9 @@ can also have a bias velocity removed from them before thermostatting
 takes place; see the description below.

 Additional parameters affecting the thermostat and barostat are
-specified by keywords and values documented with the :doc:`fix npt <fix_nh>` command.  See, for example, discussion of the *temp*,
-*iso*, *aniso*, and *dilate* keywords.
+specified by keywords and values documented with the :doc:`fix npt
+<fix_nh>` command.  See, for example, discussion of the *temp*, *iso*,
+*aniso*, and *dilate* keywords.

 The particles in the fix group are the only ones whose velocities and
 positions are updated by the velocity/position update portion of the
@ -89,18 +90,19 @@ It also means that changing attributes of *thermo_temp* or
 *thermo_press* will have no effect on this fix.

 Like other fixes that perform thermostatting, this fix can be used
-with :doc:`compute commands <compute>` that calculate a temperature
-after removing a "bias" from the atom velocities.  E.g. removing the
-center-of-mass velocity from a group of atoms or only calculating
-temperature on the x-component of velocity or only calculating
-temperature for atoms in a geometric region.  This is not done by
-default, but only if the :doc:`fix_modify <fix_modify>` command is used
-to assign a temperature compute to this fix that includes such a bias
-term.  See the doc pages for individual :doc:`compute commands <compute>` to determine which ones include a bias.  In
-this case, the thermostat works in the following manner: the current
-temperature is calculated taking the bias into account, bias is
-removed from each atom, thermostatting is performed on the remaining
-thermal degrees of freedom, and the bias is added back in.
+with :doc:`compute commands <compute>` that remove a "bias" from the
+atom velocities.  E.g. to apply the thermostat only to atoms within a
+spatial :doc:`region <region>`, or to remove the center-of-mass
+velocity from a group of atoms, or to remove the x-component of
+velocity from the calculation.
+
+This is not done by default, but only if the :doc:`fix_modify
+<fix_modify>` command is used to assign a temperature compute to this
+fix that includes such a bias term.  See the doc pages for individual
+:doc:`compute temp commands <compute>` to determine which ones include
+a bias.  In this case, the thermostat works in the following manner:
+bias is removed from each atom, thermostatting is performed on the
+remaining thermal degrees of freedom, and the bias is added back in.

 ----------

--- a/doc/src/fix_npt_body.rst
+++ b/doc/src/fix_npt_body.rst
@ -87,18 +87,19 @@ It also means that changing attributes of *thermo_temp* or
 *thermo_press* will have no effect on this fix.

 Like other fixes that perform thermostatting, this fix can be used
-with :doc:`compute commands <compute>` that calculate a temperature
-after removing a "bias" from the atom velocities.  E.g. removing the
-center-of-mass velocity from a group of atoms or only calculating
-temperature on the x-component of velocity or only calculating
-temperature for atoms in a geometric region.  This is not done by
-default, but only if the :doc:`fix_modify <fix_modify>` command is used
-to assign a temperature compute to this fix that includes such a bias
-term.  See the doc pages for individual :doc:`compute commands <compute>` to determine which ones include a bias.  In
-this case, the thermostat works in the following manner: the current
-temperature is calculated taking the bias into account, bias is
-removed from each atom, thermostatting is performed on the remaining
-thermal degrees of freedom, and the bias is added back in.
+with :doc:`compute commands <compute>` that remove a "bias" from the
+atom velocities.  E.g. to apply the thermostat only to atoms within a
+spatial :doc:`region <region>`, or to remove the center-of-mass
+velocity from a group of atoms, or to remove the x-component of
+velocity from the calculation.
+
+This is not done by default, but only if the :doc:`fix_modify
+<fix_modify>` command is used to assign a temperature compute to this
+fix that includes such a bias term.  See the doc pages for individual
+:doc:`compute temp commands <compute>` to determine which ones include
+a bias.  In this case, the thermostat works in the following manner:
+bias is removed from each atom, thermostatting is performed on the
+remaining thermal degrees of freedom, and the bias is added back in.

 ----------

--- a/doc/src/fix_npt_cauchy.rst
+++ b/doc/src/fix_npt_cauchy.rst
@ -400,19 +400,20 @@ temperature or pressure during thermodynamic output via the
 compute-ID.  It also means that changing attributes of *thermo_temp*
 or *thermo_press* will have no effect on this fix.

-Like other fixes that perform thermostatting, fix npt/cauchy can
-be used with :doc:`compute commands <compute>` that calculate a
-temperature after removing a "bias" from the atom velocities.
-E.g. removing the center-of-mass velocity from a group of atoms or
-only calculating temperature on the x-component of velocity or only
-calculating temperature for atoms in a geometric region.  This is not
-done by default, but only if the :doc:`fix_modify <fix_modify>` command
-is used to assign a temperature compute to this fix that includes such
-a bias term.  See the doc pages for individual :doc:`compute commands <compute>` to determine which ones include a bias.  In
-this case, the thermostat works in the following manner: the current
-temperature is calculated taking the bias into account, bias is
-removed from each atom, thermostatting is performed on the remaining
-thermal degrees of freedom, and the bias is added back in.
+Like other fixes that perform thermostatting, this fix can be used
+with :doc:`compute commands <compute>` that remove a "bias" from the
+atom velocities.  E.g. to apply the thermostat only to atoms within a
+spatial :doc:`region <region>`, or to remove the center-of-mass
+velocity from a group of atoms, or to remove the x-component of
+velocity from the calculation.
+
+This is not done by default, but only if the :doc:`fix_modify
+<fix_modify>` command is used to assign a temperature compute to this
+fix that includes such a bias term.  See the doc pages for individual
+:doc:`compute temp commands <compute>` to determine which ones include
+a bias.  In this case, the thermostat works in the following manner:
+bias is removed from each atom, thermostatting is performed on the
+remaining thermal degrees of freedom, and the bias is added back in.

 ----------

--- a/doc/src/fix_npt_sphere.rst
+++ b/doc/src/fix_npt_sphere.rst
@ -103,18 +103,19 @@ appropriate compute-ID.  It also means that changing attributes of
 *thermo_temp* or *thermo_press* will have no effect on this fix.

 Like other fixes that perform thermostatting, this fix can be used
-with :doc:`compute commands <compute>` that calculate a temperature
-after removing a "bias" from the atom velocities.  E.g. removing the
-center-of-mass velocity from a group of atoms or only calculating
-temperature on the x-component of velocity or only calculating
-temperature for atoms in a geometric region.  This is not done by
-default, but only if the :doc:`fix_modify <fix_modify>` command is used
-to assign a temperature compute to this fix that includes such a bias
-term.  See the doc pages for individual :doc:`compute commands <compute>` to determine which ones include a bias.  In
-this case, the thermostat works in the following manner: the current
-temperature is calculated taking the bias into account, bias is
-removed from each atom, thermostatting is performed on the remaining
-thermal degrees of freedom, and the bias is added back in.
+with :doc:`compute commands <compute>` that remove a "bias" from the
+atom velocities.  E.g. to apply the thermostat only to atoms within a
+spatial :doc:`region <region>`, or to remove the center-of-mass
+velocity from a group of atoms, or to remove the x-component of
+velocity from the calculation.
+
+This is not done by default, but only if the :doc:`fix_modify
+<fix_modify>` command is used to assign a temperature compute to this
+fix that includes such a bias term.  See the doc pages for individual
+:doc:`compute temp commands <compute>` to determine which ones include
+a bias.  In this case, the thermostat works in the following manner:
+bias is removed from each atom, thermostatting is performed on the
+remaining thermal degrees of freedom, and the bias is added back in.

 ----------

--- a/doc/src/fix_nvt_asphere.rst
+++ b/doc/src/fix_nvt_asphere.rst
@ -72,18 +72,19 @@ It also means that changing attributes of *thermo_temp* will have no
 effect on this fix.

 Like other fixes that perform thermostatting, this fix can be used
-with :doc:`compute commands <compute>` that calculate a temperature
-after removing a "bias" from the atom velocities.  E.g. removing the
-center-of-mass velocity from a group of atoms or only calculating
-temperature on the x-component of velocity or only calculating
-temperature for atoms in a geometric region.  This is not done by
-default, but only if the :doc:`fix_modify <fix_modify>` command is used
-to assign a temperature compute to this fix that includes such a bias
-term.  See the doc pages for individual :doc:`compute commands <compute>` to determine which ones include a bias.  In
-this case, the thermostat works in the following manner: the current
-temperature is calculated taking the bias into account, bias is
-removed from each atom, thermostatting is performed on the remaining
-thermal degrees of freedom, and the bias is added back in.
+with :doc:`compute commands <compute>` that remove a "bias" from the
+atom velocities.  E.g. to apply the thermostat only to atoms within a
+spatial :doc:`region <region>`, or to remove the center-of-mass
+velocity from a group of atoms, or to remove the x-component of
+velocity from the calculation.
+
+This is not done by default, but only if the :doc:`fix_modify
+<fix_modify>` command is used to assign a temperature compute to this
+fix that includes such a bias term.  See the doc pages for individual
+:doc:`compute temp commands <compute>` to determine which ones include
+a bias.  In this case, the thermostat works in the following manner:
+bias is removed from each atom, thermostatting is performed on the
+remaining thermal degrees of freedom, and the bias is added back in.

 ----------

--- a/doc/src/fix_nvt_body.rst
+++ b/doc/src/fix_nvt_body.rst
@ -69,18 +69,19 @@ It also means that changing attributes of *thermo_temp* will have no
 effect on this fix.

 Like other fixes that perform thermostatting, this fix can be used
-with :doc:`compute commands <compute>` that calculate a temperature
-after removing a "bias" from the atom velocities.  E.g. removing the
-center-of-mass velocity from a group of atoms or only calculating
-temperature on the x-component of velocity or only calculating
-temperature for atoms in a geometric region.  This is not done by
-default, but only if the :doc:`fix_modify <fix_modify>` command is used
-to assign a temperature compute to this fix that includes such a bias
-term.  See the doc pages for individual :doc:`compute commands <compute>` to determine which ones include a bias.  In
-this case, the thermostat works in the following manner: the current
-temperature is calculated taking the bias into account, bias is
-removed from each atom, thermostatting is performed on the remaining
-thermal degrees of freedom, and the bias is added back in.
+with :doc:`compute commands <compute>` that remove a "bias" from the
+atom velocities.  E.g. to apply the thermostat only to atoms within a
+spatial :doc:`region <region>`, or to remove the center-of-mass
+velocity from a group of atoms, or to remove the x-component of
+velocity from the calculation.
+
+This is not done by default, but only if the :doc:`fix_modify
+<fix_modify>` command is used to assign a temperature compute to this
+fix that includes such a bias term.  See the doc pages for individual
+:doc:`compute temp commands <compute>` to determine which ones include
+a bias.  In this case, the thermostat works in the following manner:
+bias is removed from each atom, thermostatting is performed on the
+remaining thermal degrees of freedom, and the bias is added back in.

 ----------

--- a/doc/src/fix_nvt_sllod.rst
+++ b/doc/src/fix_nvt_sllod.rst
@ -37,15 +37,16 @@ trajectory consistent with the canonical ensemble.

 This thermostat is used for a simulation box that is changing size
 and/or shape, for example in a non-equilibrium MD (NEMD) simulation.
-The size/shape change is induced by use of the :doc:`fix deform <fix_deform>` command, so each point in the simulation box
-can be thought of as having a "streaming" velocity.  This
-position-dependent streaming velocity is subtracted from each atom's
-actual velocity to yield a thermal velocity which is used for
-temperature computation and thermostatting.  For example, if the box
-is being sheared in x, relative to y, then points at the bottom of the
-box (low y) have a small x velocity, while points at the top of the
-box (hi y) have a large x velocity.  These velocities do not
-contribute to the thermal "temperature" of the atom.
+The size/shape change is induced by use of the :doc:`fix deform
+<fix_deform>` command, so each point in the simulation box can be
+thought of as having a "streaming" velocity.  This position-dependent
+streaming velocity is subtracted from each atom's actual velocity to
+yield a thermal velocity which is used for temperature computation and
+thermostatting.  For example, if the box is being sheared in x,
+relative to y, then points at the bottom of the box (low y) have a
+small x velocity, while points at the top of the box (hi y) have a
+large x velocity.  These velocities do not contribute to the thermal
+"temperature" of the atom.

 .. note::

@ -60,13 +61,15 @@ contribute to the thermal "temperature" of the atom.
   consistent.

 The SLLOD equations of motion, originally proposed by Hoover and Ladd
-(see :ref:`(Evans and Morriss) <Evans3>`), were proven to be equivalent to
-Newton's equations of motion for shear flow by :ref:`(Evans and Morriss) <Evans3>`. They were later shown to generate the desired
-velocity gradient and the correct production of work by stresses for
-all forms of homogeneous flow by :ref:`(Daivis and Todd) <Daivis>`.  As
-implemented in LAMMPS, they are coupled to a Nose/Hoover chain
-thermostat in a velocity Verlet formulation, closely following the
-implementation used for the :doc:`fix nvt <fix_nh>` command.
+(see :ref:`(Evans and Morriss) <Evans3>`), were proven to be
+equivalent to Newton's equations of motion for shear flow by
+:ref:`(Evans and Morriss) <Evans3>`. They were later shown to generate
+the desired velocity gradient and the correct production of work by
+stresses for all forms of homogeneous flow by :ref:`(Daivis and Todd)
+<Daivis>`.  As implemented in LAMMPS, they are coupled to a
+Nose/Hoover chain thermostat in a velocity Verlet formulation, closely
+following the implementation used for the :doc:`fix nvt <fix_nh>`
+command.

 .. note::

@ -94,27 +97,28 @@ underscore + "temp", and the group for the new compute is the same as
 the fix group.

 Note that this is NOT the compute used by thermodynamic output (see
-the :doc:`thermo_style <thermo_style>` command) with ID = *thermo_temp*.
-This means you can change the attributes of this fix's temperature
-(e.g. its degrees-of-freedom) via the
-:doc:`compute_modify <compute_modify>` command or print this temperature
-during thermodynamic output via the :doc:`thermo_style custom <thermo_style>` command using the appropriate compute-ID.
-It also means that changing attributes of *thermo_temp* will have no
-effect on this fix.
+the :doc:`thermo_style <thermo_style>` command) with ID =
+*thermo_temp*.  This means you can change the attributes of this fix's
+temperature (e.g. its degrees-of-freedom) via the :doc:`compute_modify
+<compute_modify>` command or print this temperature during
+thermodynamic output via the :doc:`thermo_style custom <thermo_style>`
+command using the appropriate compute-ID.  It also means that changing
+attributes of *thermo_temp* will have no effect on this fix.

 Like other fixes that perform thermostatting, this fix can be used
-with :doc:`compute commands <compute>` that calculate a temperature
-after removing a "bias" from the atom velocities.  E.g. removing the
-center-of-mass velocity from a group of atoms or only calculating
-temperature on the x-component of velocity or only calculating
-temperature for atoms in a geometric region.  This is not done by
-default, but only if the :doc:`fix_modify <fix_modify>` command is used
-to assign a temperature compute to this fix that includes such a bias
-term.  See the doc pages for individual :doc:`compute commands <compute>` to determine which ones include a bias.  In
-this case, the thermostat works in the following manner: the current
-temperature is calculated taking the bias into account, bias is
-removed from each atom, thermostatting is performed on the remaining
-thermal degrees of freedom, and the bias is added back in.
+with :doc:`compute commands <compute>` that remove a "bias" from the
+atom velocities.  E.g. to apply the thermostat only to atoms within a
+spatial :doc:`region <region>`, or to remove the center-of-mass
+velocity from a group of atoms, or to remove the x-component of
+velocity from the calculation.
+
+This is not done by default, but only if the :doc:`fix_modify
+<fix_modify>` command is used to assign a temperature compute to this
+fix that includes such a bias term.  See the doc pages for individual
+:doc:`compute temp commands <compute>` to determine which ones include
+a bias.  In this case, the thermostat works in the following manner:
+bias is removed from each atom, thermostatting is performed on the
+remaining thermal degrees of freedom, and the bias is added back in.

 ----------

--- a/doc/src/fix_nvt_sphere.rst
+++ b/doc/src/fix_nvt_sphere.rst
@ -86,18 +86,19 @@ It also means that changing attributes of *thermo_temp* will have no
 effect on this fix.

 Like other fixes that perform thermostatting, this fix can be used
-with :doc:`compute commands <compute>` that calculate a temperature
-after removing a "bias" from the atom velocities.  E.g. removing the
-center-of-mass velocity from a group of atoms or only calculating
-temperature on the x-component of velocity or only calculating
-temperature for atoms in a geometric region.  This is not done by
-default, but only if the :doc:`fix_modify <fix_modify>` command is used
-to assign a temperature compute to this fix that includes such a bias
-term.  See the doc pages for individual :doc:`compute commands <compute>` to determine which ones include a bias.  In
-this case, the thermostat works in the following manner: the current
-temperature is calculated taking the bias into account, bias is
-removed from each atom, thermostatting is performed on the remaining
-thermal degrees of freedom, and the bias is added back in.
+with :doc:`compute commands <compute>` that remove a "bias" from the
+atom velocities.  E.g. to apply the thermostat only to atoms within a
+spatial :doc:`region <region>`, or to remove the center-of-mass
+velocity from a group of atoms, or to remove the x-component of
+velocity from the calculation.
+
+This is not done by default, but only if the :doc:`fix_modify
+<fix_modify>` command is used to assign a temperature compute to this
+fix that includes such a bias term.  See the doc pages for individual
+:doc:`compute temp commands <compute>` to determine which ones include
+a bias.  In this case, the thermostat works in the following manner:
+bias is removed from each atom, thermostatting is performed on the
+remaining thermal degrees of freedom, and the bias is added back in.

 ----------

--- a/doc/src/fix_saed_vtk.rst
+++ b/doc/src/fix_saed_vtk.rst
@ -28,7 +28,6 @@ Syntax
         Nstart = start averaging on this timestep
       *file* arg = filename
         filename = name of file to output time averages to
-       *overwrite* arg = none = overwrite output file with only latest output

 Examples
 """"""""
@ -161,10 +160,6 @@ the *file* keyword and this string is appended with _N.vtk where N is
 an index (0,1,2...) to account for situations with multiple diffraction
 intensity outputs.

-The *overwrite* keyword will continuously overwrite the output file
-with the latest output, so that it only contains one timestep worth of
-output.  This option can only be used with the *ave running* setting.
-
 Restart, fix_modify, output, run start/stop, minimize info
 """""""""""""""""""""""""""""""""""""""""""""""""""""""""""

--- a/doc/src/fix_temp_berendsen.rst
+++ b/doc/src/fix_temp_berendsen.rst
@ -102,18 +102,19 @@ It also means that changing attributes of *thermo_temp* will have no
 effect on this fix.

 Like other fixes that perform thermostatting, this fix can be used
-with :doc:`compute commands <compute>` that calculate a temperature
-after removing a "bias" from the atom velocities.  E.g. removing the
-center-of-mass velocity from a group of atoms or only calculating
-temperature on the x-component of velocity or only calculating
-temperature for atoms in a geometric region.  This is not done by
-default, but only if the :doc:`fix_modify <fix_modify>` command is used
-to assign a temperature compute to this fix that includes such a bias
-term.  See the doc pages for individual :doc:`compute commands <compute>` to determine which ones include a bias.  In
-this case, the thermostat works in the following manner: the current
-temperature is calculated taking the bias into account, bias is
-removed from each atom, thermostatting is performed on the remaining
-thermal degrees of freedom, and the bias is added back in.
+with :doc:`compute commands <compute>` that remove a "bias" from the
+atom velocities.  E.g. to apply the thermostat only to atoms within a
+spatial :doc:`region <region>`, or to remove the center-of-mass
+velocity from a group of atoms, or to remove the x-component of
+velocity from the calculation.
+
+This is not done by default, but only if the :doc:`fix_modify
+<fix_modify>` command is used to assign a temperature compute to this
+fix that includes such a bias term.  See the doc pages for individual
+:doc:`compute temp commands <compute>` to determine which ones include
+a bias.  In this case, the thermostat works in the following manner:
+bias is removed from each atom, thermostatting is performed on the
+remaining thermal degrees of freedom, and the bias is added back in.

 ----------

--- a/doc/src/fix_temp_csvr.rst
+++ b/doc/src/fix_temp_csvr.rst
@ -110,28 +110,29 @@ during thermodynamic output via the :doc:`thermo_style custom <thermo_style>` co
 It also means that changing attributes of *thermo_temp* will have no
 effect on this fix.

-Like other fixes that perform thermostatting, these fixes can be used
-with :doc:`compute commands <compute>` that calculate a temperature
-after removing a "bias" from the atom velocities.  E.g. removing the
-center-of-mass velocity from a group of atoms or only calculating
-temperature on the x-component of velocity or only calculating
-temperature for atoms in a geometric region.  This is not done by
-default, but only if the :doc:`fix_modify <fix_modify>` command is used
-to assign a temperature compute to this fix that includes such a bias
-term.  See the doc pages for individual :doc:`compute commands <compute>` to determine which ones include a bias.  In
-this case, the thermostat works in the following manner: the current
-temperature is calculated taking the bias into account, bias is
-removed from each atom, thermostatting is performed on the remaining
-thermal degrees of freedom, and the bias is added back in.
+Like other fixes that perform thermostatting, this fix can be used
+with :doc:`compute commands <compute>` that remove a "bias" from the
+atom velocities.  E.g. to apply the thermostat only to atoms within a
+spatial :doc:`region <region>`, or to remove the center-of-mass
+velocity from a group of atoms, or to remove the x-component of
+velocity from the calculation.
+
+This is not done by default, but only if the :doc:`fix_modify
+<fix_modify>` command is used to assign a temperature compute to this
+fix that includes such a bias term.  See the doc pages for individual
+:doc:`compute temp commands <compute>` to determine which ones include
+a bias.  In this case, the thermostat works in the following manner:
+bias is removed from each atom, thermostatting is performed on the
+remaining thermal degrees of freedom, and the bias is added back in.

 An important feature of these thermostats is that they have an
-associated effective energy that is a constant of motion.
-The effective energy is the total energy (kinetic + potential) plus
-the accumulated kinetic energy changes due to the thermostat. The
-latter quantity is the global scalar computed by these fixes. This
-feature is useful to check the integration of the equations of motion
-against discretization errors. In other words, the conservation of
-the effective energy can be used to choose an appropriate integration
+associated effective energy that is a constant of motion.  The
+effective energy is the total energy (kinetic + potential) plus the
+accumulated kinetic energy changes due to the thermostat. The latter
+quantity is the global scalar computed by these fixes. This feature is
+useful to check the integration of the equations of motion against
+discretization errors. In other words, the conservation of the
+effective energy can be used to choose an appropriate integration
 :doc:`timestep <timestep>`. This is similar to the usual paradigm of
 checking the conservation of the total energy in the microcanonical
 ensemble.
--- a/doc/src/fix_temp_rescale.rst
+++ b/doc/src/fix_temp_rescale.rst
@ -109,19 +109,19 @@ command using the appropriate compute-ID.  It also means that changing
 attributes of *thermo_temp* will have no effect on this fix.

 Like other fixes that perform thermostatting, this fix can be used
-with :doc:`compute commands <compute>` that calculate a temperature
-after removing a "bias" from the atom velocities.  E.g. removing the
-center-of-mass velocity from a group of atoms or only calculating
-temperature on the x-component of velocity or only calculating
-temperature for atoms in a geometric region.  This is not done by
-default, but only if the :doc:`fix_modify <fix_modify>` command is
-used to assign a temperature compute to this fix that includes such a
-bias term.  See the doc pages for individual :doc:`compute commands
-<compute>` to determine which ones include a bias.  In this case, the
-thermostat works in the following manner: the current temperature is
-calculated taking the bias into account, bias is removed from each
-atom, thermostatting is performed on the remaining thermal degrees of
-freedom, and the bias is added back in.
+with :doc:`compute commands <compute>` that remove a "bias" from the
+atom velocities.  E.g. to apply the thermostat only to atoms within a
+spatial :doc:`region <region>`, or to remove the center-of-mass
+velocity from a group of atoms, or to remove the x-component of
+velocity from the calculation.
+
+This is not done by default, but only if the :doc:`fix_modify
+<fix_modify>` command is used to assign a temperature compute to this
+fix that includes such a bias term.  See the doc pages for individual
+:doc:`compute temp commands <compute>` to determine which ones include
+a bias.  In this case, the thermostat works in the following manner:
+bias is removed from each atom, thermostatting is performed on the
+remaining thermal degrees of freedom, and the bias is added back in.

 ----------

--- a/doc/src/fix_tgnh_drude.rst
+++ b/doc/src/fix_tgnh_drude.rst
@ -187,26 +187,32 @@ barostatting.

 ----------

-Like other fixes that perform thermostatting, these fixes can
-be used with :doc:`compute commands <compute>` that calculate a
-temperature after removing a "bias" from the atom velocities.
-This is not done by default, but only if the :doc:`fix_modify <fix_modify>` command
-is used to assign a temperature compute to this fix that includes such
-a bias term.  See the doc pages for individual :doc:`compute commands <compute>` to determine which ones include a bias.  In
-this case, the thermostat works in the following manner: the current
-temperature is calculated taking the bias into account, bias is
-removed from each atom, thermostatting is performed on the remaining
-thermal DOF, and the bias is added back in.
+Like other fixes that perform thermostatting, this fix can be used
+with :doc:`compute commands <compute>` that remove a "bias" from the
+atom velocities.  E.g. to apply the thermostat only to atoms within a
+spatial :doc:`region <region>`, or to remove the center-of-mass
+velocity from a group of atoms, or to remove the x-component of
+velocity from the calculation.
+
+This is not done by default, but only if the :doc:`fix_modify
+<fix_modify>` command is used to assign a temperature compute to this
+fix that includes such a bias term.  See the doc pages for individual
+:doc:`compute temp commands <compute>` to determine which ones include
+a bias.  In this case, the thermostat works in the following manner:
+bias is removed from each atom, thermostatting is performed on the
+remaining thermal degrees of freedom, and the bias is added back in.

 .. note::

-   However, not all temperature compute commands are valid to be used with these fixes.
-   Precisely, only temperature compute that does not modify the DOF of the group can be used.
-   E.g. :doc:`compute temp/ramp <compute_temp_ramp>` and :doc:`compute viscosity/cos <compute_viscosity_cos>`
-   compute the kinetic energy after remove a velocity gradient without affecting the DOF of the group,
-   then they can be invoked in this way.
-   In contrast, :doc:`compute temp/partial <compute_temp_partial>` may remove the DOF at one or more dimensions,
-   therefore it cannot be used with these fixes.
+   However, not all temperature compute commands are valid to be used
+   with these fixes.  Precisely, only temperature compute that does
+   not modify the DOF of the group can be used.  E.g. :doc:`compute
+   temp/ramp <compute_temp_ramp>` and :doc:`compute viscosity/cos
+   <compute_viscosity_cos>` compute the kinetic energy after remove a
+   velocity gradient without affecting the DOF of the group, then they
+   can be invoked in this way.  In contrast, :doc:`compute
+   temp/partial <compute_temp_partial>` may remove the DOF at one or
+   more dimensions, therefore it cannot be used with these fixes.

 ----------

--- a/doc/src/group.rst
+++ b/doc/src/group.rst
@ -38,7 +38,7 @@ Syntax
       *intersect* args = two or more group IDs
       *dynamic* args = parent-ID keyword value ...
         one or more keyword/value pairs may be appended
-         keyword = *region* or *var* or *every*
+         keyword = *region* or *var* or *property* or *every*
           *region* value = region-ID
           *var* value = name of variable
           *property* value = name of custom integer or floating point vector
--- a/doc/src/img/decomp-balance.png
+++ b/doc/src/img/decomp-balance.png
--- a/doc/src/img/decomp-processors.png
+++ b/doc/src/img/decomp-processors.png
--- a/doc/src/img/decomp-rcb.png
+++ b/doc/src/img/decomp-rcb.png
--- a/doc/src/img/decomp-regular.png
+++ b/doc/src/img/decomp-regular.png
--- a/doc/src/img/domain-decomp.png
+++ b/doc/src/img/domain-decomp.png
--- a/doc/src/img/fft-decomp-parallel.png
+++ b/doc/src/img/fft-decomp-parallel.png
--- a/doc/src/img/ghost-comm.png
+++ b/doc/src/img/ghost-comm.png
--- a/doc/src/img/neigh-stencil.png
+++ b/doc/src/img/neigh-stencil.png
--- a/doc/utils/requirements.txt
+++ b/doc/utils/requirements.txt
@ -1,4 +1,4 @@
-Sphinx==4.0.3
+Sphinx
 sphinxcontrib-spelling
 git+git://github.com/akohlmey/sphinx-fortran@parallel-read
 sphinx_tabs
--- a/doc/utils/sphinx-config/false_positives.txt
+++ b/doc/utils/sphinx-config/false_positives.txt
@ -1135,6 +1135,7 @@ Germann
 Germano
 gerolf
 Gerolf
+getrusage
 Gershgorin
 getter
 gettimeofday
@ -1222,6 +1223,7 @@ Guo
 gw
 gyromagnetic
 gz
+gzip
 gzipped
 Haak
 Hafskjold
@ -1809,6 +1811,7 @@ lyon
 Lysogorskiy
 Lyulin
 lz
+lzma
 Maaravi
 MACHDYN
 machdyn
@ -2262,6 +2265,7 @@ Nmols
 nn
 nnodes
 Nocedal
+nO
 nocite
 nocoeff
 nodeless
@ -2761,6 +2765,7 @@ REAXFF
 ReaxFF
 reaxff
 rebo
+recurse
 recursing
 Ree
 refactored
@ -3440,6 +3445,7 @@ usec
 uSemiParallel
 userguide
 username
+usleep
 usr
 util
 utils
@ -3657,6 +3663,7 @@ Yc
 ycm
 Yeh
 yellowgreen
+yEs
 Yethiraj
 yflag
 yhi
--- a/examples/PACKAGES/charge_regulation/in.chreg-polymer
+++ b/examples/PACKAGES/charge_regulation/in.chreg-polymer
@ -8,7 +8,7 @@ bond_style      harmonic
 bond_coeff      1 100 1.122462 # K R0
 velocity        all create 1.0 8008 loop geom

-pair_style      lj/cut/coul/long 1.122462 20
+pair_style      lj/cut/coul/long/soft 2 0.5 10.0  1.122462 20
 pair_coeff      * *  1.0 1.0 1.122462 # charges
 kspace_style    pppm 1.0e-3
 pair_modify     shift yes
--- a/examples/plugins/CMakeLists.txt
+++ b/examples/plugins/CMakeLists.txt
@ -31,6 +31,15 @@ else()
  endif()
 endif()

+# ugly hacks for MSVC which by default always reports an old C++ standard in the __cplusplus macro
+# and prints lots of pointless warnings about "unsafe" functions
+#if(MSVC)
+#  add_compile_options(/Zc:__cplusplus)
+#  add_compile_options(/wd4244)
+#  add_compile_options(/wd4267)
+#  add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
+#endif()
+
 # C++11 is required
 set(CMAKE_CXX_STANDARD 11)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
@ -40,11 +49,6 @@ if((CMAKE_CXX_COMPILER_ID STREQUAL "Intel") OR (CMAKE_CXX_COMPILER_ID STREQUAL "
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -restrict")
 endif()

-# bail out on windows
-if(CMAKE_SYSTEM_NAME STREQUAL Windows)
-  message(FATAL_ERROR "LAMMPS plugins are currently not supported on Windows")
-endif()
-
 set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR})
 include(CheckIncludeFileCXX)
 if(NOT LAMMPS_DIR)
@ -68,14 +72,23 @@ add_library(zero2plugin MODULE zero2plugin.cpp pair_zero2.cpp bond_zero2.cpp
                               angle_zero2.cpp dihedral_zero2.cpp improper_zero2.cpp)
 target_link_libraries(zero2plugin PRIVATE lammps)

-set_target_properties(morse2plugin nve2plugin helloplugin zero2plugin PROPERTIES
-                      PREFIX ""
-                      LINK_FLAGS "-rdynamic")
+set_target_properties(morse2plugin nve2plugin helloplugin zero2plugin PROPERTIES PREFIX "")

 # MacOS seems to need this
 if(CMAKE_SYSTEM_NAME STREQUAL Darwin)
+  set_target_properties(morse2plugin nve2plugin helloplugin zero2plugin
+    PROPERTIES LINK_FLAGS "-Wl,-undefined,dynamic_lookup")
+elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows")
+# tell CMake to export all symbols to a .dll on Windows with special case for MinGW cross-compilers
+  set_target_properties(morse2plugin nve2plugin helloplugin zero2plugin
+    PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS TRUE)
+  if(CMAKE_CROSSCOMPILING)
+    set_target_properties(morse2plugin nve2plugin helloplugin zero2plugin
+      PROPERTIES LINK_FLAGS "-Wl,--export-all-symbols")
+  endif()
+else()
  set_target_properties(morse2plugin nve2plugin helloplugin zero2plugin PROPERTIES
-                        LINK_FLAGS "-Wl,-undefined,dynamic_lookup")
+    LINK_FLAGS "-rdynamic")
 endif()

 add_custom_target(plugins ALL ${CMAKE_COMMAND} -E echo "Building Plugins"
--- a/examples/plugins/LAMMPSInterfaceCXX.cmake
+++ b/examples/plugins/LAMMPSInterfaceCXX.cmake
@ -23,7 +23,9 @@ endfunction(validate_option)
 # LAMMPS C++ interface. We only need the header related parts.
 add_library(lammps INTERFACE)
 target_include_directories(lammps INTERFACE ${LAMMPS_HEADER_DIR})
-
+if((CMAKE_SYSTEM_NAME STREQUAL "Windows") AND CMAKE_CROSSCOMPILING)
+  target_link_libraries(lammps INTERFACE ${CMAKE_BINARY_DIR}/../liblammps.dll.a)
+endif()
 ################################################################################
 # MPI configuration
 if(NOT CMAKE_CROSSCOMPILING)
--- a/lib/colvars/colvarmodule.cpp
+++ b/lib/colvars/colvarmodule.cpp
@ -1476,7 +1476,9 @@ int colvarmodule::write_output_files()
       bi != biases.end();
       bi++) {
    // Only write output files if they have not already been written this time step
-    if ((*bi)->output_freq == 0 || (cvm::step_absolute() % (*bi)->output_freq) != 0) {
+    if ((*bi)->output_freq == 0    ||
+        cvm::step_relative() == 0  ||
+        (cvm::step_absolute() % (*bi)->output_freq) != 0) {
      error_code |= (*bi)->write_output_files();
    }
    error_code |= (*bi)->write_state_to_replicas();
--- a/lib/colvars/colvars_version.h
+++ b/lib/colvars/colvars_version.h
@ -1,3 +1,3 @@
 #ifndef COLVARS_VERSION
-#define COLVARS_VERSION "2021-08-06"
+#define COLVARS_VERSION "2021-09-21"
 #endif
--- a/lib/gpu/geryon/ocl_device.h
+++ b/lib/gpu/geryon/ocl_device.h
@ -481,7 +481,7 @@ int UCL_Device::set_platform(int pid) {
      cl_device_id *subdevice_list = new cl_device_id[num_subdevices];
      CL_SAFE_CALL(clCreateSubDevices(device_list[i], props, num_subdevices,
                                      subdevice_list, &num_subdevices));
-      for (int j=0; j<num_subdevices; j++) {
+      for (cl_uint j=0; j<num_subdevices; j++) {
        _cl_devices.push_back(device_list[i]);
        add_properties(device_list[i]);
        _num_devices++;
@ -556,16 +556,22 @@ void UCL_Device::add_properties(cl_device_id device_list) {
                               sizeof(float_width),&float_width,nullptr));
  op.preferred_vector_width32=float_width;

-  // Determine if double precision is supported
  cl_uint double_width;
  CL_SAFE_CALL(clGetDeviceInfo(device_list,
                               CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE,
                               sizeof(double_width),&double_width,nullptr));
  op.preferred_vector_width64=double_width;
-  if (double_width==0)
-    op.double_precision=false;
-  else
+
+  // Determine if double precision is supported: All bits in the mask must be set.
+  cl_device_fp_config double_mask = (CL_FP_FMA|CL_FP_ROUND_TO_NEAREST|CL_FP_ROUND_TO_ZERO|
+                                     CL_FP_ROUND_TO_INF|CL_FP_INF_NAN|CL_FP_DENORM);
+  cl_device_fp_config double_avail;
+  CL_SAFE_CALL(clGetDeviceInfo(device_list,CL_DEVICE_DOUBLE_FP_CONFIG,
+                               sizeof(double_avail),&double_avail,nullptr));
+  if ((double_avail & double_mask) == double_mask)
    op.double_precision=true;
+  else
+    op.double_precision=false;

  CL_SAFE_CALL(clGetDeviceInfo(device_list,
                               CL_DEVICE_PROFILING_TIMER_RESOLUTION,
@ -629,7 +635,7 @@ void UCL_Device::add_properties(cl_device_id device_list) {
  size_t ext_str_size_ret;
  CL_SAFE_CALL(clGetDeviceInfo(device_list, CL_DEVICE_EXTENSIONS, 0, nullptr,
                               &ext_str_size_ret));
-  char buffer2[ext_str_size_ret];
+  char *buffer2 = new char[ext_str_size_ret];
  CL_SAFE_CALL(clGetDeviceInfo(device_list, CL_DEVICE_EXTENSIONS,
                               ext_str_size_ret, buffer2, nullptr));
  #if defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0)
@ -660,6 +666,7 @@ void UCL_Device::add_properties(cl_device_id device_list) {
    if (arch >= 3.0)
      op.has_shuffle_support=true;
  }
+  delete[] buffer2;
  #endif

  _properties.push_back(op);
@ -830,7 +837,7 @@ int UCL_Device::auto_set_platform(const enum UCL_DEVICE_TYPE type,

  bool vendor_match=false;
  bool type_match=false;
-  int max_cus=0;
+  unsigned int max_cus=0;
  int best_platform=0;

  std::string vendor_upper=vendor;
--- a/lib/gpu/lal_born_coul_long.cpp
+++ b/lib/gpu/lal_born_coul_long.cpp
@ -34,7 +34,7 @@ BornCoulLongT::BornCoulLong() : BaseCharge<numtyp,acctyp>(),
 }

 template <class numtyp, class acctyp>
-BornCoulLongT::~BornCoulLongT() {
+BornCoulLongT::~BornCoulLong() {
  clear();
 }

--- a/lib/gpu/lal_born_coul_wolf.cpp
+++ b/lib/gpu/lal_born_coul_wolf.cpp
@ -34,7 +34,7 @@ BornCoulWolfT::BornCoulWolf() : BaseCharge<numtyp,acctyp>(),
 }

 template <class numtyp, class acctyp>
-BornCoulWolfT::~BornCoulWolfT() {
+BornCoulWolfT::~BornCoulWolf() {
  clear();
 }

--- a/lib/gpu/lal_buck_coul_long.cpp
+++ b/lib/gpu/lal_buck_coul_long.cpp
@ -34,7 +34,7 @@ BuckCoulLongT::BuckCoulLong() : BaseCharge<numtyp,acctyp>(),
 }

 template <class numtyp, class acctyp>
-BuckCoulLongT::~BuckCoulLongT() {
+BuckCoulLongT::~BuckCoulLong() {
  clear();
 }

--- a/lib/gpu/lal_device.cpp
+++ b/lib/gpu/lal_device.cpp
@ -333,6 +333,12 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int ngpu,
    gpu_barrier();
  }

+  // check if double precision support is available
+  #if defined(_SINGLE_DOUBLE) || defined(_DOUBLE_DOUBLE)
+  if (!gpu->double_precision())
+    return -16;
+  #endif
+
  // Setup auto bin size calculation for calls from atom::sort
  // - This is repeated in neighbor init with additional info
  if (_user_cell_size<0.0) {
@ -546,14 +552,9 @@ int DeviceT::init_nbor(Neighbor *nbor, const int nlocal,
    return -3;

  if (_user_cell_size<0.0) {
-    #ifndef LAL_USE_OLD_NEIGHBOR
-    _neighbor_shared.setup_auto_cell_size(true,cutoff,nbor->simd_size());
-    #else
    _neighbor_shared.setup_auto_cell_size(false,cutoff,nbor->simd_size());
-    #endif
  } else
-    _neighbor_shared.setup_auto_cell_size(false,_user_cell_size,
-                                          nbor->simd_size());
+    _neighbor_shared.setup_auto_cell_size(false,_user_cell_size,nbor->simd_size());
  nbor->set_cutoff(cutoff);

  return 0;
--- a/lib/machdyn/Install.py
+++ b/lib/machdyn/Install.py
@ -17,11 +17,12 @@ parser = ArgumentParser(prog='Install.py',

 # settings

-version = '3.3.9'
+version = '3.4.0'
 tarball = "eigen.tar.gz"

 # known checksums for different Eigen versions. used to validate the download.
 checksums = { \
+              '3.4.0' : '4c527a9171d71a72a9d4186e65bea559', \
              '3.3.9' : '609286804b0f79be622ccf7f9ff2b660', \
              '3.3.7' : '9e30f67e8531477de4117506fe44669b' \
 }
@ -35,7 +36,7 @@ Syntax from src dir: make lib-smd args="-b"

 Syntax from lib dir: python Install.py -b
                 or: python Install.py -p /usr/include/eigen3"
-                 or: python Install.py -v 3.3.7 -b
+                 or: python Install.py -v 3.4.0 -b

 Example:

@ -77,7 +78,7 @@ if pathflag:
 if buildflag:
  print("Downloading Eigen ...")
  eigentar = os.path.join(homepath, tarball)
-  url = "https://gitlab.com/libeigen/eigen/-/archive/%s/eigen-%s.tar.gz" %  (version,version)
+  url = "https://download.lammps.org/thirdparty/eigen-%s.tar.gz" %  version
  geturl(url, eigentar)

  # verify downloaded archive integrity via md5 checksum, if known.
--- a/lib/pace/Makefile
+++ b/lib/pace/Makefile
@ -2,8 +2,8 @@ SHELL = /bin/sh

 # ------ FILES ------

-SRC_FILES = $(wildcard src/ML-PACE/*.cpp)
-SRC = $(filter-out src/ML-PACE/pair_pace.cpp, $(SRC_FILES))
+SRC_FILES = $(wildcard src/USER-PACE/*.cpp)
+SRC = $(filter-out src/USER-PACE/pair_pace.cpp, $(SRC_FILES))

 # ------ DEFINITIONS ------

@ -12,7 +12,7 @@ OBJ =   $(SRC:.cpp=.o)


 # ------ SETTINGS ------
-CXXFLAGS = -O3 -fPIC -Isrc/ML-PACE
+CXXFLAGS = -O3 -fPIC -Isrc/USER-PACE

 ARCHIVE =	ar
 ARCHFLAG =	-rc
--- a/lib/pace/Makefile.lammps
+++ b/lib/pace/Makefile.lammps
@ -1,3 +1,3 @@
-pace_SYSINC =-I../../lib/pace/src/ML-PACE
+pace_SYSINC =-I../../lib/pace/src/USER-PACE
 pace_SYSLIB = -L../../lib/pace/ -lpace
 pace_SYSPATH =
--- a/src/.gitignore
+++ b/src/.gitignore
@ -27,6 +27,9 @@
 /*_ssa.h
 /*_ssa.cpp

+!accelerator_kokkos.h
+!accelerator_omp.h
+
 /fix_mdi_engine.cpp
 /fix_mdi_engine.h
 /library_mdi.cpp
@ -202,7 +205,6 @@

 /plugin.cpp
 /plugin.h
-/lammpsplugin.h

 /atom_vec_spin.cpp
 /atom_vec_spin.h
@ -265,8 +267,6 @@
 /fix_drag.h
 /fix_numdiff.cpp
 /fix_numdiff.h
-/fix_nve_noforce.cpp
-/fix_nve_noforce.h
 /fix_spring_rg.cpp
 /fix_spring_rg.h
 /fix_temp_csld.cpp
@ -367,8 +367,6 @@
 /atom_vec_dpd.h
 /atom_vec_electron.cpp
 /atom_vec_electron.h
-/atom_vec_ellipsoid.cpp
-/atom_vec_ellipsoid.h
 /atom_vec_full.cpp
 /atom_vec_full.h
 /atom_vec_full_hars.cpp
@ -535,8 +533,6 @@
 /dihedral_harmonic.h
 /dihedral_helix.cpp
 /dihedral_helix.h
-/dihedral_hybrid.cpp
-/dihedral_hybrid.h
 /dihedral_multi_harmonic.cpp
 /dihedral_multi_harmonic.h
 /dihedral_nharmonic.cpp
@ -858,8 +854,6 @@
 /fix_ti_rs.h
 /fix_ti_spring.cpp
 /fix_ti_spring.h
-/fix_ttm.cpp
-/fix_ttm.h
 /fix_tune_kspace.cpp
 /fix_tune_kspace.h
 /fix_wall_body_polygon.cpp
@ -885,8 +879,6 @@
 /fix_widom.cpp
 /fix_widom.h
 /gpu_extra.h
-/gridcomm.cpp
-/gridcomm.h
 /group_ndx.cpp
 /group_ndx.h
 /gz_file_writer.cpp
@ -911,14 +903,13 @@
 /improper_fourier.h
 /improper_harmonic.cpp
 /improper_harmonic.h
-/improper_hybrid.cpp
-/improper_hybrid.h
 /improper_inversion_harmonic.cpp
 /improper_inversion_harmonic.h
 /improper_ring.cpp
 /improper_ring.h
 /improper_umbrella.cpp
 /improper_umbrella.h
+/interlayer_taper.h
 /kissfft.h
 /lj_sdk_common.h
 /math_complex.h
@ -933,7 +924,6 @@
 /msm_cg.h
 /neb.cpp
 /neb.h
-
 /pair_adp.cpp
 /pair_adp.h
 /pair_agni.cpp
@ -994,6 +984,8 @@
 /pair_cosine_squared.h
 /pair_coul_diel.cpp
 /pair_coul_diel.h
+/pair_coul_exclude.cpp
+/pair_coul_exclude.h
 /pair_coul_long.cpp
 /pair_coul_long.h
 /pair_coul_msm.cpp
@ -1332,8 +1324,6 @@
 /thr_data.h
 /verlet_split.cpp
 /verlet_split.h
-/write_dump.cpp
-/write_dump.h
 /xdr_compat.cpp
 /xdr_compat.h
 /zstd_file_writer.cpp
@ -1431,6 +1421,10 @@
 /fix_srp.h
 /fix_tfmc.cpp
 /fix_tfmc.h
+/fix_ttm.cpp
+/fix_ttm.h
+/fix_ttm_grid.cpp
+/fix_ttm_grid.h
 /fix_ttm_mod.cpp
 /fix_ttm_mod.h
 /pair_born_coul_long_cs.cpp
--- a/src/BOCS/fix_bocs.cpp
+++ b/src/BOCS/fix_bocs.cpp
@ -233,9 +233,7 @@ FixBocs::FixBocs(LAMMPS *lmp, int narg, char **arg) :
      iarg += 2;
    } else if (strcmp(arg[iarg],"mtk") == 0) {
      if (iarg+2 > narg) error->all(FLERR,"Illegal fix bocs command");
-      if (strcmp(arg[iarg+1],"yes") == 0) mtk_flag = 1;
-      else if (strcmp(arg[iarg+1],"no") == 0) mtk_flag = 0;
-      else error->all(FLERR,"Illegal fix bocs command");
+      mtk_flag = utils::logical(FLERR,arg[iarg+1],false,lmp);
      iarg += 2;
    } else if (strcmp(arg[iarg],"tloop") == 0) {
      if (iarg+2 > narg) error->all(FLERR,"Illegal fix bocs command");
--- a/src/COLVARS/fix_colvars.cpp
+++ b/src/COLVARS/fix_colvars.cpp
@ -303,7 +303,7 @@ FixColvars::FixColvars(LAMMPS *lmp, int narg, char **arg) :
  me = comm->me;
  root2root = MPI_COMM_NULL;

-  conf_file = strdup(arg[3]);
+  conf_file = utils::strdup(arg[3]);
  rng_seed = 1966;
  unwrap_flag = 1;

@ -312,35 +312,29 @@ FixColvars::FixColvars(LAMMPS *lmp, int narg, char **arg) :
  tmp_name = nullptr;

  /* parse optional arguments */
-  int argsdone = 4;
-  while (argsdone < narg) {
+  int iarg = 4;
+  while (iarg < narg) {
    // we have keyword/value pairs. check if value is missing
-    if (argsdone+1 == narg)
+    if (iarg+1 == narg)
      error->all(FLERR,"Missing argument to keyword");

-    if (0 == strcmp(arg[argsdone], "input")) {
-      inp_name = strdup(arg[argsdone+1]);
-    } else if (0 == strcmp(arg[argsdone], "output")) {
-      out_name = strdup(arg[argsdone+1]);
-    } else if (0 == strcmp(arg[argsdone], "seed")) {
-      rng_seed = utils::inumeric(FLERR,arg[argsdone+1],false,lmp);
-    } else if (0 == strcmp(arg[argsdone], "unwrap")) {
-      if (0 == strcmp(arg[argsdone+1], "yes")) {
-        unwrap_flag = 1;
-      } else if (0 == strcmp(arg[argsdone+1], "no")) {
-        unwrap_flag = 0;
-      } else {
-        error->all(FLERR,"Incorrect fix colvars unwrap flag");
-      }
-    } else if (0 == strcmp(arg[argsdone], "tstat")) {
-      tmp_name = strdup(arg[argsdone+1]);
+    if (0 == strcmp(arg[iarg], "input")) {
+      inp_name = utils::strdup(arg[iarg+1]);
+    } else if (0 == strcmp(arg[iarg], "output")) {
+      out_name = utils::strdup(arg[iarg+1]);
+    } else if (0 == strcmp(arg[iarg], "seed")) {
+      rng_seed = utils::inumeric(FLERR,arg[iarg+1],false,lmp);
+    } else if (0 == strcmp(arg[iarg], "unwrap")) {
+      unwrap_flag = utils::logical(FLERR,arg[iarg+1],false,lmp);
+    } else if (0 == strcmp(arg[iarg], "tstat")) {
+      tmp_name = utils::strdup(arg[iarg+1]);
    } else {
      error->all(FLERR,"Unknown fix colvars parameter");
    }
-    ++argsdone; ++argsdone;
+    ++iarg; ++iarg;
  }

-  if (!out_name) out_name = strdup("out");
+  if (!out_name) out_name = utils::strdup("out");

  /* initialize various state variables. */
  tstat_id = -1;
@ -365,10 +359,10 @@ FixColvars::FixColvars(LAMMPS *lmp, int narg, char **arg) :

 FixColvars::~FixColvars()
 {
-  memory->sfree(conf_file);
-  memory->sfree(inp_name);
-  memory->sfree(out_name);
-  memory->sfree(tmp_name);
+  delete[] conf_file;
+  delete[] inp_name;
+  delete[] out_name;
+  delete[] tmp_name;
  memory->sfree(comm_buf);

  if (proxy) {
@ -436,17 +430,15 @@ void FixColvars::one_time_init()
  // create and initialize the colvars proxy

  if (me == 0) {
-    if (screen) fputs("colvars: Creating proxy instance\n",screen);
-    if (logfile) fputs("colvars: Creating proxy instance\n",logfile);
+    utils::logmesg(lmp,"colvars: Creating proxy instance\n");

 #ifdef LAMMPS_BIGBIG
-    if (screen) fputs("colvars: cannot handle atom ids > 2147483647\n",screen);
-    if (logfile) fputs("colvars: cannot handle atom ids > 2147483647\n",logfile);
+    utils::logmesg(lmp,"colvars: cannot handle atom ids > 2147483647\n");
 #endif

    if (inp_name) {
      if (strcmp(inp_name,"NULL") == 0) {
-        memory->sfree(inp_name);
+        delete[] inp_name;
        inp_name = nullptr;
      }
    }
@ -464,8 +456,7 @@ void FixColvars::one_time_init()
      }
    }

-    proxy = new colvarproxy_lammps(lmp,inp_name,out_name,
-                                   rng_seed,t_target,root2root);
+    proxy = new colvarproxy_lammps(lmp,inp_name,out_name,rng_seed,t_target,root2root);
    proxy->init(conf_file);

    num_coords = (proxy->modify_atom_positions()->size());
--- a/src/COMPRESS/dump_atom_gz.cpp
+++ b/src/COMPRESS/dump_atom_gz.cpp
@ -33,7 +33,7 @@ DumpAtomGZ::~DumpAtomGZ() {}

 /* ----------------------------------------------------------------------
   generic opening of a dump file
-   ASCII or binary or gzipped
+   ASCII or binary or compressed
   some derived classes override this function
 ------------------------------------------------------------------------- */

--- a/src/COMPRESS/dump_atom_zstd.cpp
+++ b/src/COMPRESS/dump_atom_zstd.cpp
@ -188,17 +188,11 @@ int DumpAtomZstd::modify_param(int narg, char **arg)
    try {
      if (strcmp(arg[0], "checksum") == 0) {
        if (narg < 2) error->all(FLERR, "Illegal dump_modify command");
-        if (strcmp(arg[1], "yes") == 0)
-          writer.setChecksum(true);
-        else if (strcmp(arg[1], "no") == 0)
-          writer.setChecksum(false);
-        else
-          error->all(FLERR, "Illegal dump_modify command");
+        writer.setChecksum(utils::logical(FLERR, arg[1], false, lmp) == 1);
        return 2;
      } else if (strcmp(arg[0], "compression_level") == 0) {
        if (narg < 2) error->all(FLERR, "Illegal dump_modify command");
-        int compression_level = utils::inumeric(FLERR, arg[1], false, lmp);
-        writer.setCompressionLevel(compression_level);
+        writer.setCompressionLevel(utils::inumeric(FLERR, arg[1], false, lmp));
        return 2;
      }
    } catch (FileWriterException &e) {
--- a/src/COMPRESS/dump_cfg_gz.cpp
+++ b/src/COMPRESS/dump_cfg_gz.cpp
@ -35,7 +35,7 @@ DumpCFGGZ::~DumpCFGGZ() {}

 /* ----------------------------------------------------------------------
   generic opening of a dump file
-   ASCII or binary or gzipped
+   ASCII or binary or compressed
   some derived classes override this function
 ------------------------------------------------------------------------- */

--- a/src/COMPRESS/dump_cfg_zstd.cpp
+++ b/src/COMPRESS/dump_cfg_zstd.cpp
@ -233,17 +233,11 @@ int DumpCFGZstd::modify_param(int narg, char **arg)
    try {
      if (strcmp(arg[0], "checksum") == 0) {
        if (narg < 2) error->all(FLERR, "Illegal dump_modify command");
-        if (strcmp(arg[1], "yes") == 0)
-          writer.setChecksum(true);
-        else if (strcmp(arg[1], "no") == 0)
-          writer.setChecksum(false);
-        else
-          error->all(FLERR, "Illegal dump_modify command");
+        writer.setChecksum(utils::logical(FLERR, arg[1], false, lmp) == 1);
        return 2;
      } else if (strcmp(arg[0], "compression_level") == 0) {
        if (narg < 2) error->all(FLERR, "Illegal dump_modify command");
-        int compression_level = utils::inumeric(FLERR, arg[1], false, lmp);
-        writer.setCompressionLevel(compression_level);
+        writer.setCompressionLevel(utils::inumeric(FLERR, arg[1], false, lmp));
        return 2;
      }
    } catch (FileWriterException &e) {
--- a/src/COMPRESS/dump_custom_gz.cpp
+++ b/src/COMPRESS/dump_custom_gz.cpp
@ -33,7 +33,7 @@ DumpCustomGZ::~DumpCustomGZ() {}

 /* ----------------------------------------------------------------------
   generic opening of a dump file
-   ASCII or binary or gzipped
+   ASCII or binary or compressed
   some derived classes override this function
 ------------------------------------------------------------------------- */

--- a/src/COMPRESS/dump_custom_zstd.cpp
+++ b/src/COMPRESS/dump_custom_zstd.cpp
@ -45,7 +45,7 @@ DumpCustomZstd::~DumpCustomZstd()

 /* ----------------------------------------------------------------------
   generic opening of a dump file
-   ASCII or binary or gzipped
+   ASCII or binary or compressed
   some derived classes override this function
 ------------------------------------------------------------------------- */

@ -205,14 +205,11 @@ int DumpCustomZstd::modify_param(int narg, char **arg)
    try {
      if (strcmp(arg[0], "checksum") == 0) {
        if (narg < 2) error->all(FLERR, "Illegal dump_modify command");
-        if (strcmp(arg[1],"yes") == 0) writer.setChecksum(true);
-        else if (strcmp(arg[1],"no") == 0) writer.setChecksum(false);
-        else error->all(FLERR,"Illegal dump_modify command");
+        writer.setChecksum(utils::logical(FLERR, arg[1], false, lmp) == 1);
        return 2;
      } else if (strcmp(arg[0], "compression_level") == 0) {
        if (narg < 2) error->all(FLERR, "Illegal dump_modify command");
-        int compression_level = utils::inumeric(FLERR, arg[1], false, lmp);
-        writer.setCompressionLevel(compression_level);
+        writer.setCompressionLevel(utils::inumeric(FLERR, arg[1], false, lmp));
        return 2;
      }
    } catch (FileWriterException &e) {
--- a/src/COMPRESS/dump_local_gz.cpp
+++ b/src/COMPRESS/dump_local_gz.cpp
@ -33,7 +33,7 @@ DumpLocalGZ::~DumpLocalGZ() {}

 /* ----------------------------------------------------------------------
   generic opening of a dump file
-   ASCII or binary or gzipped
+   ASCII or binary or compressed
   some derived classes override this function
 ------------------------------------------------------------------------- */

--- a/src/COMPRESS/dump_local_zstd.cpp
+++ b/src/COMPRESS/dump_local_zstd.cpp
@ -39,7 +39,7 @@ DumpLocalZstd::~DumpLocalZstd() {}

 /* ----------------------------------------------------------------------
   generic opening of a dump file
-   ASCII or binary or gzipped
+   ASCII or binary or compressed
   some derived classes override this function
 ------------------------------------------------------------------------- */

@ -190,17 +190,11 @@ int DumpLocalZstd::modify_param(int narg, char **arg)
    try {
      if (strcmp(arg[0], "checksum") == 0) {
        if (narg < 2) error->all(FLERR, "Illegal dump_modify command");
-        if (strcmp(arg[1], "yes") == 0)
-          writer.setChecksum(true);
-        else if (strcmp(arg[1], "no") == 0)
-          writer.setChecksum(false);
-        else
-          error->all(FLERR, "Illegal dump_modify command");
+        writer.setChecksum(utils::logical(FLERR, arg[1], false, lmp) == 1);
        return 2;
      } else if (strcmp(arg[0], "compression_level") == 0) {
        if (narg < 2) error->all(FLERR, "Illegal dump_modify command");
-        int compression_level = utils::inumeric(FLERR, arg[1], false, lmp);
-        writer.setCompressionLevel(compression_level);
+        writer.setCompressionLevel(utils::inumeric(FLERR, arg[1], false, lmp));
        return 2;
      }
    } catch (FileWriterException &e) {
--- a/src/COMPRESS/dump_xyz_gz.cpp
+++ b/src/COMPRESS/dump_xyz_gz.cpp
@ -32,7 +32,7 @@ DumpXYZGZ::~DumpXYZGZ() {}

 /* ----------------------------------------------------------------------
   generic opening of a dump file
-   ASCII or binary or gzipped
+   ASCII or binary or compressed
   some derived classes override this function
 ------------------------------------------------------------------------- */

--- a/src/COMPRESS/dump_xyz_zstd.cpp
+++ b/src/COMPRESS/dump_xyz_zstd.cpp
@ -38,7 +38,7 @@ DumpXYZZstd::~DumpXYZZstd() {}

 /* ----------------------------------------------------------------------
   generic opening of a dump file
-   ASCII or binary or gzipped
+   ASCII or binary or compressed
   some derived classes override this function
 ------------------------------------------------------------------------- */

@ -156,17 +156,11 @@ int DumpXYZZstd::modify_param(int narg, char **arg)
    try {
      if (strcmp(arg[0], "checksum") == 0) {
        if (narg < 2) error->all(FLERR, "Illegal dump_modify command");
-        if (strcmp(arg[1], "yes") == 0)
-          writer.setChecksum(true);
-        else if (strcmp(arg[1], "no") == 0)
-          writer.setChecksum(false);
-        else
-          error->all(FLERR, "Illegal dump_modify command");
+        writer.setChecksum(utils::logical(FLERR, arg[1], false, lmp) == 1);
        return 2;
      } else if (strcmp(arg[0], "compression_level") == 0) {
        if (narg < 2) error->all(FLERR, "Illegal dump_modify command");
-        int compression_level = utils::inumeric(FLERR, arg[1], false, lmp);
-        writer.setCompressionLevel(compression_level);
+        writer.setCompressionLevel(utils::inumeric(FLERR, arg[1], false, lmp));
        return 2;
      }
    } catch (FileWriterException &e) {
--- a/src/DIELECTRIC/fix_polarize_bem_gmres.cpp
+++ b/src/DIELECTRIC/fix_polarize_bem_gmres.cpp
@ -796,12 +796,7 @@ int FixPolarizeBEMGMRES::modify_param(int narg, char **arg)
      iarg += 2;
    } else if (strcmp(arg[iarg], "kspace") == 0) {
      if (iarg + 2 > narg) error->all(FLERR, "Illegal fix_modify command");
-      if (strcmp(arg[iarg + 1], "yes") == 0)
-        kspaceflag = 1;
-      else if (strcmp(arg[iarg + 1], "no") == 0)
-        kspaceflag = 0;
-      else
-        error->all(FLERR, "Illegal fix_modify command for fix polarize");
+      kspaceflag = utils::logical(FLERR, arg[iarg + 1], false, lmp);
      iarg += 2;
    } else if (strcmp(arg[iarg], "dielectrics") == 0) {
      if (iarg + 6 > narg) error->all(FLERR, "Illegal fix_modify command");
--- a/src/DIELECTRIC/fix_polarize_bem_icc.cpp
+++ b/src/DIELECTRIC/fix_polarize_bem_icc.cpp
@ -355,12 +355,7 @@ int FixPolarizeBEMICC::modify_param(int narg, char **arg)
      iarg += 2;
    } else if (strcmp(arg[iarg], "kspace") == 0) {
      if (iarg + 2 > narg) error->all(FLERR, "Illegal fix_modify command");
-      if (strcmp(arg[iarg + 1], "yes") == 0)
-        kspaceflag = 1;
-      else if (strcmp(arg[iarg + 1], "no") == 0)
-        kspaceflag = 0;
-      else
-        error->all(FLERR, "Illegal fix_modify command for fix polarize");
+      kspaceflag = utils::logical(FLERR, arg[iarg + 1], false, lmp);
      iarg += 2;
    } else if (strcmp(arg[iarg], "dielectrics") == 0) {
      if (iarg + 6 > narg) error->all(FLERR, "Illegal fix_modify command");
--- a/src/DIELECTRIC/fix_polarize_functional.cpp
+++ b/src/DIELECTRIC/fix_polarize_functional.cpp
@ -478,12 +478,7 @@ int FixPolarizeFunctional::modify_param(int narg, char **arg)
  while (iarg < narg) {
    if (strcmp(arg[iarg], "kspace") == 0) {
      if (iarg + 2 > narg) error->all(FLERR, "Illegal fix_modify command");
-      if (strcmp(arg[iarg + 1], "yes") == 0)
-        kspaceflag = 1;
-      else if (strcmp(arg[iarg + 1], "no") == 0)
-        kspaceflag = 0;
-      else
-        error->all(FLERR, "Illegal fix_modify command for fix polarize/functional");
+      kspaceflag = utils::logical(FLERR, arg[iarg + 1], false, lmp);
      iarg += 2;
    } else if (strcmp(arg[iarg], "dielectrics") == 0) {
      if (iarg + 6 > narg) error->all(FLERR, "Illegal fix_modify command");
--- a/src/DIFFRACTION/compute_saed.cpp
+++ b/src/DIFFRACTION/compute_saed.cpp
@ -31,7 +31,6 @@

 #include <cmath>
 #include <cstring>
-#include <strings.h>    // for strcasecmp()

 #include "omp_compat.h"
 using namespace LAMMPS_NS;
@ -86,7 +85,7 @@ ComputeSAED::ComputeSAED(LAMMPS *lmp, int narg, char **arg) :
  }
  for (int i=0; i<ntypes; i++) {
     for (int j = 0; j < SAEDmaxType; j++) {
-         if (strcasecmp(arg[iarg],SAEDtypeList[j]) == 0) {
+       if (utils::lowercase(arg[iarg]) == utils::lowercase(SAEDtypeList[j])) {
         ztype[i] = j;
       }
     }
@ -348,7 +347,7 @@ void ComputeSAED::compute_vector()
  if (me == 0 && echo)
    utils::logmesg(lmp,"-----\nComputing SAED intensities");

-  double t0 = MPI_Wtime();
+  double t0 = platform::walltime();
  double *Fvec = new double[2*nRows]; // Strct factor (real & imaginary)
  // -- Note, vector entries correspond to different RELP

@ -491,7 +490,7 @@ void ComputeSAED::compute_vector()
    vector[i] = (scratch[2*i] * scratch[2*i] + scratch[2*i+1] * scratch[2*i+1]) / natoms;
  }

-  double t2 = MPI_Wtime();
+  double t2 = platform::walltime();

  // compute memory usage per processor
  double bytes = memory_usage();
--- a/src/DIFFRACTION/compute_xrd.cpp
+++ b/src/DIFFRACTION/compute_xrd.cpp
@ -32,7 +32,6 @@

 #include <cmath>
 #include <cstring>
-#include <strings.h>    // for strcasecmp()

 #include "omp_compat.h"
 using namespace LAMMPS_NS;
@ -87,7 +86,7 @@ ComputeXRD::ComputeXRD(LAMMPS *lmp, int narg, char **arg) :
  }
  for (int i = 0; i < ntypes; i++) {
    for (int j = 0; j < XRDmaxType; j++) {
-      if (strcasecmp(arg[iarg],XRDtypeList[j]) == 0) {
+      if (utils::lowercase(arg[iarg]) == utils::lowercase(XRDtypeList[j])) {
        ztype[i] = j;
       }
     }
@ -300,7 +299,7 @@ void ComputeXRD::compute_array()

  if (me == 0 && echo) utils::logmesg(lmp, "-----\nComputing XRD intensities");

-  double t0 = MPI_Wtime();
+  double t0 = platform::walltime();

  double *Fvec = new double[2*size_array_rows]; // Strct factor (real & imaginary)
  // -- Note: array rows correspond to different RELP
@ -496,7 +495,7 @@ void ComputeXRD::compute_array()
    array[i][1] = (scratch[2*i] * scratch[2*i] + scratch[2*i+1] * scratch[2*i+1]) / natoms;
  }

-  double t2 = MPI_Wtime();
+  double t2 = platform::walltime();

  // compute memory usage per processor
  double bytes = memory_usage();
--- a/src/DIFFRACTION/fix_saed_vtk.cpp
+++ b/src/DIFFRACTION/fix_saed_vtk.cpp
@ -31,6 +31,7 @@

 #include <cstring>
 #include <cmath>
+
 using namespace LAMMPS_NS;
 using namespace FixConst;

@ -100,8 +101,6 @@ FixSAEDVTK::FixSAEDVTK(LAMMPS *lmp, int narg, char **arg) :
    error->all(FLERR,"Illegal fix saed/vtk command");
  if (nfreq % nevery || nrepeat*nevery > nfreq)
    error->all(FLERR,"Illegal fix saed/vtk command");
-  if (ave != RUNNING && overwrite)
-    error->all(FLERR,"Illegal fix saed/vtk command");

  // allocate memory for averaging

@ -391,8 +390,7 @@ void FixSAEDVTK::invoke_vector(bigint ntimestep)
      fp = fopen(nName.c_str(),"w");

      if (fp == nullptr)
-        error->one(FLERR,"Cannot open fix saed/vtk file {}: {}",
-                                     nName,utils::getsyserror());
+        error->one(FLERR,"Cannot open fix saed/vtk file {}: {}", nName,utils::getsyserror());
    }

    fprintf(fp,"# vtk DataFile Version 3.0 c_%s\n",ids);
@ -406,9 +404,6 @@ void FixSAEDVTK::invoke_vector(bigint ntimestep)
    fprintf(fp,"SCALARS intensity float\n");
    fprintf(fp,"LOOKUP_TABLE default\n");

-    filepos = ftell(fp);
-
-    if (overwrite) fseek(fp,filepos,SEEK_SET);

    // Finding the intersection of the reciprical space and Ewald sphere
    int NROW1 = 0;
@ -497,7 +492,6 @@ void FixSAEDVTK::options(int narg, char **arg)
  fp = nullptr;
  ave = ONE;
  startstep = 0;
-  overwrite = 0;

  // optional args
  int iarg = 7;
@ -534,9 +528,6 @@ void FixSAEDVTK::options(int narg, char **arg)
      if (iarg+2 > narg) error->all(FLERR,"Illegal fix saed/vtk command");
      startstep = utils::inumeric(FLERR,arg[iarg+1],false,lmp);
      iarg += 2;
-    } else if (strcmp(arg[iarg],"overwrite") == 0) {
-      overwrite = 1;
-      iarg += 1;
    } else error->all(FLERR,"Illegal fix saed/vtk command");
  }
 }
--- a/src/DIFFRACTION/fix_saed_vtk.h
+++ b/src/DIFFRACTION/fix_saed_vtk.h
@ -43,8 +43,6 @@ class FixSAEDVTK : public Fix {
  int nrows;

  int ave, nwindow, nsum, startstep;
-  int overwrite;
-  long filepos;

  int norm, iwindow, window_limit;
  double *vector;
--- a/src/DPD-MESO/pair_mdpd.cpp
+++ b/src/DPD-MESO/pair_mdpd.cpp
@ -19,19 +19,19 @@

 #include "pair_mdpd.h"

+#include "atom.h"
+#include "citeme.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "memory.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+#include "random_mars.h"
+#include "update.h"
+
 #include <cmath>
 #include <ctime>
-#include "atom.h"
-#include "comm.h"
-#include "update.h"
-#include "force.h"
-#include "neighbor.h"
-#include "neigh_list.h"
-#include "random_mars.h"
-#include "citeme.h"
-#include "memory.h"
-#include "error.h"
-

 using namespace LAMMPS_NS;

@ -217,12 +217,13 @@ void PairMDPD::settings(int narg, char **arg)
  seed = utils::inumeric(FLERR,arg[2],false,lmp);

  // initialize Marsaglia RNG with processor-unique seed
+  // create a positive seed based on the system clock, if requested.

  if (seed <= 0) {
-    struct timespec time;
-    clock_gettime( CLOCK_REALTIME, &time );
-    seed = time.tv_nsec;  // if seed is non-positive, get the current time as the seed
+    constexpr double LARGE_NUM = 2<<30;
+    seed = int(fmod(platform::walltime() * LARGE_NUM, LARGE_NUM)) + 1;
  }
+
  delete random;
  random = new RanMars(lmp,(seed + comm->me) % 900000000);

--- a/src/DPD-MESO/pair_tdpd.cpp
+++ b/src/DPD-MESO/pair_tdpd.cpp
@ -18,19 +18,19 @@
 ------------------------------------------------------------------------- */

 #include "pair_tdpd.h"
-#include <cmath>
-#include <ctime>
-#include "atom.h"
-#include "comm.h"
-#include "update.h"
-#include "force.h"
-#include "neighbor.h"
-#include "neigh_list.h"
-#include "random_mars.h"
-#include "citeme.h"
-#include "memory.h"
-#include "error.h"

+#include "atom.h"
+#include "citeme.h"
+#include "comm.h"
+#include "error.h"
+#include "force.h"
+#include "memory.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+#include "random_mars.h"
+#include "update.h"
+
+#include <cmath>

 using namespace LAMMPS_NS;

@ -239,12 +239,13 @@ void PairTDPD::settings(int narg, char **arg)
  seed = utils::inumeric(FLERR,arg[2],false,lmp);

  // initialize Marsaglia RNG with processor-unique seed
+  // create a positive seed based on the system clock, if requested.

  if (seed <= 0) {
-    struct timespec time;
-    clock_gettime( CLOCK_REALTIME, &time );
-    seed = time.tv_nsec;  // if seed is non-positive, get the current time as the seed
+    constexpr double LARGE_NUM = 2<<30;
+    seed = int(fmod(platform::walltime() * LARGE_NUM, LARGE_NUM)) + 1;
  }
+
  delete random;
  random = new RanMars(lmp,(seed + comm->me) % 900000000);

--- a/src/DPD-REACT/fix_rx.cpp
+++ b/src/DPD-REACT/fix_rx.cpp
@ -58,7 +58,7 @@ namespace /* anonymous */
 {

 typedef double TimerType;
-TimerType getTimeStamp() { return MPI_Wtime(); }
+TimerType getTimeStamp() { return platform::walltime(); }
 double getElapsedTime( const TimerType &t0, const TimerType &t1) { return t1-t0; }

 } // end namespace
@ -126,7 +126,7 @@ FixRX::FixRX(LAMMPS *lmp, int narg, char **arg) :
      error->all(FLERR, errmsg);
    }

-    if (comm->me == 0 and Verbosity > 1) {
+    if (comm->me == 0 && Verbosity > 1) {
      std::string msg = "FixRX: matrix format is ";
      if (useSparseKinetics)
         msg += std::string("sparse");
@ -172,7 +172,7 @@ FixRX::FixRX(LAMMPS *lmp, int narg, char **arg) :
    char *word = arg[iarg++];
    minSteps = atoi( word );

-    if (comm->me == 0 and Verbosity > 1) {
+    if (comm->me == 0 && Verbosity > 1) {
      char msg[128];
      sprintf(msg, "FixRX: RK4 numSteps= %d", minSteps);
      error->message(FLERR, msg);
@ -197,7 +197,7 @@ FixRX::FixRX(LAMMPS *lmp, int narg, char **arg) :
    // maxIters must be at least minSteps.
    maxIters = std::max( minSteps, maxIters );

-    if (comm->me == 0 and Verbosity > 1) {
+    if (comm->me == 0 && Verbosity > 1) {
      //printf("FixRX: RKF45 minSteps= %d maxIters= %d absTol= %e relTol= %e\n", minSteps, maxIters, absTol, relTol);
      char msg[128];
      sprintf(msg, "FixRX: RKF45 minSteps= %d maxIters= %d relTol= %.1e absTol= %.1e diagnosticFrequency= %d", minSteps, maxIters, relTol, absTol, diagnosticFrequency);
@ -371,7 +371,7 @@ void FixRX::initSparse()
 {
  const int Verbosity = 1;

-  if (comm->me == 0 and Verbosity > 1) {
+  if (comm->me == 0 && Verbosity > 1) {
    for (int k = 0; k < nspecies; ++k)
      printf("atom->dvname[%d]= %s\n", k, atom->dvname[k]);

@ -421,7 +421,7 @@ void FixRX::initSparse()
    std::string pstr, rstr;
    bool allAreIntegral = true;
    for (int k = 0; k < nspecies; ++k) {
-      if (stoichReactants[i][k] == 0 and stoichProducts[i][k] == 0)
+      if (stoichReactants[i][k] == 0 && stoichProducts[i][k] == 0)
        nzeros++;

      if (stoichReactants[i][k] > 0.0) {
@ -448,7 +448,7 @@ void FixRX::initSparse()
        pstr += atom->dvname[k];
      }
    }
-    if (comm->me == 0 and Verbosity > 1)
+    if (comm->me == 0 && Verbosity > 1)
      printf("rx%3d: %d %d %d ... %s %s %s\n", i, nreac_i, nprod_i, allAreIntegral, rstr.c_str(), /*reversible[i]*/ (false) ? "<=>" : "=", pstr.c_str());

    mxreac = std::max( mxreac, nreac_i );
@ -457,7 +457,7 @@ void FixRX::initSparse()
    if (allAreIntegral) nIntegral++;
  }

-  if (comm->me == 0 and Verbosity > 1) {
+  if (comm->me == 0 && Verbosity > 1) {
    char msg[256];
    sprintf(msg, "FixRX: Sparsity of Stoichiometric Matrix= %.1f%% non-zeros= %d nspecies= %d nreactions= %d maxReactants= %d maxProducts= %d maxSpecies= %d integralReactions= %d", 100*(double(nzeros) / (nspecies * nreactions)), nzeros, nspecies, nreactions, mxreac, mxprod, (mxreac + mxprod), SparseKinetics_enableIntegralReactions);
    error->message(FLERR, msg);
@ -539,7 +539,7 @@ void FixRX::initSparse()
       sparseKinetics_isIntegralReaction[i] = isIntegral_i;
  }

-  if (comm->me == 0 and Verbosity > 1) {
+  if (comm->me == 0 && Verbosity > 1) {
    for (int i = 1; i < nu_bin.size(); ++i)
      if (nu_bin[i] > 0)
        printf("nu_bin[%d] = %d\n", i, nu_bin[i]);
@ -554,7 +554,7 @@ void FixRX::initSparse()
            rstr += " + ";

          char digit[6];
-          if (SparseKinetics_enableIntegralReactions and sparseKinetics_isIntegralReaction[i])
+          if (SparseKinetics_enableIntegralReactions && sparseKinetics_isIntegralReaction[i])
            sprintf(digit,"%d ", sparseKinetics_inu[i][kk]);
          else
            sprintf(digit,"%4.1f ", sparseKinetics_nu[i][kk]);
@ -570,7 +570,7 @@ void FixRX::initSparse()
            pstr += " + ";

          char digit[6];
-          if (SparseKinetics_enableIntegralReactions and sparseKinetics_isIntegralReaction[i])
+          if (SparseKinetics_enableIntegralReactions && sparseKinetics_isIntegralReaction[i])
            sprintf(digit,"%d ", sparseKinetics_inu[i][kk]);
          else
            sprintf(digit,"%4.1f ", sparseKinetics_nu[i][kk]);
@ -578,7 +578,7 @@ void FixRX::initSparse()
          pstr += atom->dvname[k];
        }
      }
-      if (comm->me == 0 and Verbosity > 1)
+      if (comm->me == 0 && Verbosity > 1)
        printf("rx%3d: %s %s %s\n", i, rstr.c_str(), /*reversible[i]*/ (false) ? "<=>" : "=", pstr.c_str());
    }
    // end for nreactions
--- a/src/DPD-REACT/random_external_state.h
+++ b/src/DPD-REACT/random_external_state.h
@ -78,8 +78,8 @@
 namespace random_external_state {
 typedef uint64_t es_RNG_t;

-enum { MAX_URAND = 0xffffffffU };
-enum { MAX_URAND64 = 0xffffffffffffffffULL - 1 };
+constexpr uint32_t MAX_URAND = 0xffffffffU;
+constexpr uint64_t MAX_URAND64 = 0xffffffffffffffffULL - 1;

 LAMMPS_INLINE
 uint32_t es_urand(es_RNG_t &state_)
--- a/src/DRUDE/fix_drude_transform.cpp
+++ b/src/DRUDE/fix_drude_transform.cpp
@ -13,16 +13,18 @@
 ------------------------------------------------------------------------- */

 /** Fix Drude Transform ******************************************************/
+
 #include "fix_drude_transform.h"

+#include "atom.h"
+#include "comm.h"
+#include "domain.h"
+#include "error.h"
+#include "fix_drude.h"
+#include "modify.h"
+
 #include <cmath>
 #include <cstring>
-#include "fix_drude.h"
-#include "atom.h"
-#include "domain.h"
-#include "comm.h"
-#include "error.h"
-#include "modify.h"

 using namespace LAMMPS_NS;
 using namespace FixConst;
--- a/src/DRUDE/fix_drude_transform.h
+++ b/src/DRUDE/fix_drude_transform.h
@ -27,8 +27,8 @@ namespace LAMMPS_NS {

 template <bool inverse> class FixDrudeTransform: public Fix {
 public:
-  FixDrudeTransform<inverse>(class LAMMPS *, int, char **);
-  ~FixDrudeTransform<inverse>();
+  FixDrudeTransform(class LAMMPS *, int, char **);
+  ~FixDrudeTransform();
  int setmask();
  void init();
  void setup(int vflag);
--- a/src/DRUDE/fix_langevin_drude.cpp
+++ b/src/DRUDE/fix_langevin_drude.cpp
@ -91,9 +91,7 @@ FixLangevinDrude::FixLangevinDrude(LAMMPS *lmp, int narg, char **arg) :
  while (iarg < narg) {
    if (strcmp(arg[iarg],"zero") == 0) {
      if (iarg+2 > narg) error->all(FLERR,"Illegal fix langevin/drude command");
-      if (strcmp(arg[iarg+1],"no") == 0) zero = 0;
-      else if (strcmp(arg[iarg+1],"yes") == 0) zero = 1;
-      else error->all(FLERR,"Illegal fix langevin/drude command");
+      zero = utils::logical(FLERR, arg[iarg + 1], false, lmp);
      iarg += 2;
    } else error->all(FLERR,"Illegal fix langevin/drude command");
  }
--- a/Show More
+++ b/Show More