diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index 5b0af6c0d9..9e0d748140 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -318,7 +318,7 @@ if(WITH_JPEG)
   find_package(JPEG REQUIRED)
   target_compile_definitions(lammps PRIVATE -DLAMMPS_JPEG)
   if(CMAKE_VERSION VERSION_LESS 3.12)
-    target_include_directories(lammps PRIVATE ${JPEG_INCLUDE_DIR})
+    target_include_directories(lammps PRIVATE ${JPEG_INCLUDE_DIRS})
     target_link_libraries(lammps PRIVATE ${JPEG_LIBRARIES})
   else()
     target_link_libraries(lammps PRIVATE JPEG::JPEG)
diff --git a/cmake/Modules/Packages/KSPACE.cmake b/cmake/Modules/Packages/KSPACE.cmake
index 5e79813ac0..de7e7e5b20 100644
--- a/cmake/Modules/Packages/KSPACE.cmake
+++ b/cmake/Modules/Packages/KSPACE.cmake
@@ -19,16 +19,16 @@ if(FFT STREQUAL "FFTW3")
   find_package(${FFTW} REQUIRED)
   target_compile_definitions(lammps PRIVATE -DFFT_FFTW3)
   target_link_libraries(lammps PRIVATE ${FFTW}::${FFTW})
-  if(FFTW3_OMP_LIBRARY OR FFTW3F_OMP_LIBRARY)
+  if(FFTW3_OMP_LIBRARIES OR FFTW3F_OMP_LIBRARIES)
     option(FFT_FFTW_THREADS "Use threaded FFTW library" ON)
   else()
     option(FFT_FFTW_THREADS "Use threaded FFT library" OFF)
   endif()
 
   if(FFT_FFTW_THREADS)
-    if(FFTW3_OMP_LIBRARY OR FFTW3F_OMP_LIBRARY)
-	target_compile_definitions(lammps PRIVATE -DFFT_FFTW_THREADS)
-	target_link_libraries(lammps PRIVATE ${FFTW}::${FFTW}_OMP)
+    if(FFTW3_OMP_LIBRARIES OR FFTW3F_OMP_LIBRARIES)
+      target_compile_definitions(lammps PRIVATE -DFFT_FFTW_THREADS)
+      target_link_libraries(lammps PRIVATE ${FFTW}::${FFTW}_OMP)
     else()
       message(FATAL_ERROR "Need OpenMP enabled FFTW3 library for FFT_THREADS")
     endif()
diff --git a/cmake/Modules/Packages/MSCG.cmake b/cmake/Modules/Packages/MSCG.cmake
index 55f71588fe..6cb389fb13 100644
--- a/cmake/Modules/Packages/MSCG.cmake
+++ b/cmake/Modules/Packages/MSCG.cmake
@@ -38,7 +38,7 @@ if(DOWNLOAD_MSCG)
 else()
   find_package(MSCG)
   if(NOT MSCG_FOUND)
-    message(FATAL_ERROR "MSCG not found, help CMake to find it by setting MSCG_LIBRARY and MSCG_INCLUDE_DIRS, or set DOWNLOAD_MSCG=ON to download it")
+    message(FATAL_ERROR "MSCG not found, help CMake to find it by setting MSCG_LIBRARY and MSCG_INCLUDE_DIR, or set DOWNLOAD_MSCG=ON to download it")
   endif()
   target_link_libraries(lammps PRIVATE MSCG::MSCG)
 endif()
diff --git a/cmake/Modules/Packages/PYTHON.cmake b/cmake/Modules/Packages/PYTHON.cmake
index a577f824fe..7be25a6b05 100644
--- a/cmake/Modules/Packages/PYTHON.cmake
+++ b/cmake/Modules/Packages/PYTHON.cmake
@@ -1,7 +1,7 @@
 if(CMAKE_VERSION VERSION_LESS 3.12)
   find_package(PythonLibs REQUIRED) # Deprecated since version 3.12
-  target_include_directories(lammps PRIVATE ${PYTHON_INCLUDE_DIR})
-  target_link_libraries(lammps PRIVATE ${PYTHON_LIBRARY})
+  target_include_directories(lammps PRIVATE ${PYTHON_INCLUDE_DIRS})
+  target_link_libraries(lammps PRIVATE ${PYTHON_LIBRARIES})
 else()
   find_package(Python REQUIRED COMPONENTS Development)
   target_link_libraries(lammps PRIVATE Python::Python)
diff --git a/cmake/Modules/Packages/USER-MOLFILE.cmake b/cmake/Modules/Packages/USER-MOLFILE.cmake
index 1a2df2202b..4d414acead 100644
--- a/cmake/Modules/Packages/USER-MOLFILE.cmake
+++ b/cmake/Modules/Packages/USER-MOLFILE.cmake
@@ -1,4 +1,5 @@
-set(MOLFILE_INCLUDE_DIRS "${LAMMPS_LIB_SOURCE_DIR}/molfile" CACHE STRING "Path to VMD molfile plugin headers")
+set(MOLFILE_INCLUDE_DIR "${LAMMPS_LIB_SOURCE_DIR}/molfile" CACHE STRING "Path to VMD molfile plugin headers")
+set(MOLFILE_INCLUDE_DIRS "${MOLFILE_INCLUDE_DIR}")
 add_library(molfile INTERFACE)
 target_include_directories(molfile INTERFACE ${MOLFILE_INCLUDE_DIRS})
 # no need to link with -ldl on windows
diff --git a/doc/doxygen/Doxyfile.in b/doc/doxygen/Doxyfile.in
index ee34b77355..22fe2a0b43 100644
--- a/doc/doxygen/Doxyfile.in
+++ b/doc/doxygen/Doxyfile.in
@@ -410,25 +410,28 @@ WARN_LOGFILE           = "../doxygen-warn.log"
 # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
 # Note: If this tag is empty the current directory is searched.
 
-INPUT                  = @LAMMPS_SOURCE_DIR@/utils.cpp      \
-                         @LAMMPS_SOURCE_DIR@/utils.h        \
-                         @LAMMPS_SOURCE_DIR@/library.cpp    \
-                         @LAMMPS_SOURCE_DIR@/library.h      \
-                         @LAMMPS_SOURCE_DIR@/lammps.cpp     \
-                         @LAMMPS_SOURCE_DIR@/lammps.h       \
-                         @LAMMPS_SOURCE_DIR@/lmptype.h      \
-                         @LAMMPS_SOURCE_DIR@/pointers.h     \
-                         @LAMMPS_SOURCE_DIR@/atom.cpp       \
-                         @LAMMPS_SOURCE_DIR@/atom.h         \
-                         @LAMMPS_SOURCE_DIR@/input.cpp      \
-                         @LAMMPS_SOURCE_DIR@/input.h        \
-                         @LAMMPS_SOURCE_DIR@/tokenizer.cpp  \
-                         @LAMMPS_SOURCE_DIR@/tokenizer.h    \
-                         @LAMMPS_SOURCE_DIR@/math_eigen.h    \
-                         @LAMMPS_SOURCE_DIR@/text_file_reader.cpp  \
-                         @LAMMPS_SOURCE_DIR@/text_file_reader.h    \
-                         @LAMMPS_SOURCE_DIR@/potential_file_reader.cpp  \
-                         @LAMMPS_SOURCE_DIR@/potential_file_reader.h    \
+INPUT                  = @LAMMPS_SOURCE_DIR@/utils.cpp                 \
+                         @LAMMPS_SOURCE_DIR@/utils.h                   \
+                         @LAMMPS_SOURCE_DIR@/library.cpp               \
+                         @LAMMPS_SOURCE_DIR@/library.h                 \
+                         @LAMMPS_SOURCE_DIR@/lammps.cpp                \
+                         @LAMMPS_SOURCE_DIR@/lammps.h                  \
+                         @LAMMPS_SOURCE_DIR@/lmptype.h                 \
+                         @LAMMPS_SOURCE_DIR@/atom.cpp                  \
+                         @LAMMPS_SOURCE_DIR@/atom.h                    \
+                         @LAMMPS_SOURCE_DIR@/input.cpp                 \
+                         @LAMMPS_SOURCE_DIR@/input.h                   \
+                         @LAMMPS_SOURCE_DIR@/tokenizer.cpp             \
+                         @LAMMPS_SOURCE_DIR@/tokenizer.h               \
+                         @LAMMPS_SOURCE_DIR@/text_file_reader.cpp      \
+                         @LAMMPS_SOURCE_DIR@/text_file_reader.h        \
+                         @LAMMPS_SOURCE_DIR@/potential_file_reader.cpp \
+                         @LAMMPS_SOURCE_DIR@/potential_file_reader.h   \
+                         @LAMMPS_SOURCE_DIR@/my_page.cpp               \
+                         @LAMMPS_SOURCE_DIR@/my_page.h                 \
+                         @LAMMPS_SOURCE_DIR@/my_pool_chunk.cpp         \
+                         @LAMMPS_SOURCE_DIR@/my_pool_chunk.h           \
+                         @LAMMPS_SOURCE_DIR@/math_eigen.h              \
 
 # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
 # directories that are symbolic links (a Unix file system feature) are excluded
diff --git a/doc/src/Build_basics.rst b/doc/src/Build_basics.rst
index 7548a8a6e9..3b20209e5c 100644
--- a/doc/src/Build_basics.rst
+++ b/doc/src/Build_basics.rst
@@ -32,74 +32,80 @@ LAMMPS are also written with support for shared memory parallelization
 using the `OpenMP <https://en.wikipedia.org/wiki/OpenMP>`_ threading
 standard. A more detailed discussion of that is below.
 
-**CMake build**\ :
+.. tabs::
 
-.. code-block:: bash
+   .. tab:: CMake build
 
-   -D BUILD_MPI=value        # yes or no, default is yes if CMake finds MPI, else no
-   -D BUILD_OMP=value        # yes or no, default is yes if a compatible compiler is detected
-   -D LAMMPS_MACHINE=name    # name = mpi, serial, mybox, titan, laptop, etc
-                             # no default value
+      .. code-block:: bash
 
-The executable created by CMake (after running make) is named ``lmp`` unless
-the ``LAMMPS_MACHINE`` option is set.  When setting ``LAMMPS_MACHINE=name``
-the executable will be called ``lmp_name``.  Using ``BUILD_MPI=no`` will
-enforce building a serial executable using the MPI STUBS library.
+         -D BUILD_MPI=value        # yes or no, default is yes if CMake finds MPI, else no
+         -D BUILD_OMP=value        # yes or no, default is yes if a compatible compiler is detected
+         -D LAMMPS_MACHINE=name    # name = mpi, serial, mybox, titan, laptop, etc
+                                   # no default value
 
-**Traditional make**\ :
+      The executable created by CMake (after running make) is named
+      ``lmp`` unless the ``LAMMPS_MACHINE`` option is set.  When setting
+      ``LAMMPS_MACHINE=name`` the executable will be called
+      ``lmp_name``.  Using ``BUILD_MPI=no`` will enforce building a
+      serial executable using the MPI STUBS library.
 
-The build with traditional makefiles has to be done inside the source folder ``src``.
+   .. tab:: Traditional make
 
-.. code-block:: bash
+      The build with traditional makefiles has to be done inside the source folder ``src``.
 
-   make mpi                # parallel build, produces lmp_mpi using Makefile.mpi
-   make serial             # serial build, produces lmp_serial using Makefile/serial
-   make mybox              # uses Makefile.mybox to produce lmp_mybox
+      .. code-block:: bash
 
-Any ``make machine`` command will look up the make settings from a file
-``Makefile.machine`` in the folder ``src/MAKE`` or one of its
-sub-directories ``MINE``, ``MACHINES``, or ``OPTIONS``, create a folder
-``Obj_machine`` with all objects and generated files and an executable
-called ``lmp_machine``\ .  The standard parallel build with ``make mpi``
-assumes a standard MPI installation with MPI compiler wrappers where all
-necessary compiler and linker flags to get access and link with the
-suitable MPI headers and libraries are set by the wrapper programs.  For
-other cases or the serial build, you have to adjust the make file
-variables ``MPI_INC``, ``MPI_PATH``, ``MPI_LIB`` as well as ``CC`` and
-``LINK``\ .  To enable OpenMP threading usually a compiler specific flag
-needs to be added to the compile and link commands.  For the GNU
-compilers, this is ``-fopenmp``\ , which can be added to the ``CC`` and
-``LINK`` makefile variables.
+         make mpi                # parallel build, produces lmp_mpi using Makefile.mpi
+         make serial             # serial build, produces lmp_serial using Makefile/serial
+         make mybox              # uses Makefile.mybox to produce lmp_mybox
 
-For the serial build the following make variables are set (see src/MAKE/Makefile.serial):
+      Any ``make machine`` command will look up the make settings from a
+      file ``Makefile.machine`` in the folder ``src/MAKE`` or one of its
+      sub-directories ``MINE``, ``MACHINES``, or ``OPTIONS``, create a
+      folder ``Obj_machine`` with all objects and generated files and an
+      executable called ``lmp_machine``\ .  The standard parallel build
+      with ``make mpi`` assumes a standard MPI installation with MPI
+      compiler wrappers where all necessary compiler and linker flags to
+      get access and link with the suitable MPI headers and libraries
+      are set by the wrapper programs.  For other cases or the serial
+      build, you have to adjust the make file variables ``MPI_INC``,
+      ``MPI_PATH``, ``MPI_LIB`` as well as ``CC`` and ``LINK``\ .  To
+      enable OpenMP threading usually a compiler specific flag needs to
+      be added to the compile and link commands.  For the GNU compilers,
+      this is ``-fopenmp``\ , which can be added to the ``CC`` and
+      ``LINK`` makefile variables.
 
-.. code-block:: make
+      For the serial build the following make variables are set (see src/MAKE/Makefile.serial):
 
-   CC =            g++
-   LINK =          g++
-   MPI_INC =       -I../STUBS
-   MPI_PATH =      -L../STUBS
-   MPI_LIB =       -lmpi_stubs
+      .. code-block:: make
 
-You also need to build the STUBS library for your platform before making
-LAMMPS itself.  A ``make serial`` build does this for you automatically,
-otherwise, type ``make mpi-stubs`` from the src directory, or ``make``
-from the ``src/STUBS`` dir.  If the build fails, you may need to edit
-the ``STUBS/Makefile`` for your platform.  The stubs library does not
-provide MPI/IO functions required by some LAMMPS packages,
-e.g. ``MPIIO`` or ``USER-LB``, and thus is not compatible with those
-packages.
+         CC =            g++
+         LINK =          g++
+         MPI_INC =       -I../STUBS
+         MPI_PATH =      -L../STUBS
+         MPI_LIB =       -lmpi_stubs
 
-.. note::
+      You also need to build the STUBS library for your platform before
+      making LAMMPS itself.  A ``make serial`` build does this for you
+      automatically, otherwise, type ``make mpi-stubs`` from the src
+      directory, or ``make`` from the ``src/STUBS`` dir.  If the build
+      fails, you may need to edit the ``STUBS/Makefile`` for your
+      platform.  The stubs library does not provide MPI/IO functions
+      required by some LAMMPS packages, e.g. ``MPIIO`` or ``USER-LB``,
+      and thus is not compatible with those packages.
 
-   The file ``src/STUBS/mpi.c`` provides a CPU timer function called
-   ``MPI_Wtime()`` that calls ``gettimeofday()``.  If your operating system
-   does not support ``gettimeofday()``, you will need to insert code to
-   call another timer.  Note that the ANSI-standard function ``clock()``
-   rolls over after an hour or so, and is therefore insufficient for
-   timing long LAMMPS simulations.
+      .. note::
 
-**MPI and OpenMP support info**\ :
+         The file ``src/STUBS/mpi.c`` provides a CPU timer function
+         called ``MPI_Wtime()`` that calls ``gettimeofday()``.  If your
+         operating system does not support ``gettimeofday()``, you will
+         need to insert code to call another timer.  Note that the
+         ANSI-standard function ``clock()`` rolls over after an hour or
+         so, and is therefore insufficient for timing long LAMMPS
+         simulations.
+
+MPI and OpenMP support in LAMMPS
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 If you are installing MPI yourself to build a parallel LAMMPS
 executable, we recommend either MPICH or OpenMPI which are regularly
@@ -145,18 +151,19 @@ please refer to its documentation.
 
 .. _default-none-issues:
 
-**OpenMP Compiler compatibility info**\ :
+OpenMP Compiler compatibility
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Some compilers do not fully support the ``default(none)`` directive
-and others (e.g. GCC version 9 and beyond, Clang version 10 and later)
-may implement strict OpenMP 4.0 and later semantics, which are incompatible
+Some compilers do not fully support the ``default(none)`` directive and
+others (e.g. GCC version 9 and beyond, Clang version 10 and later) may
+implement strict OpenMP 4.0 and later semantics, which are incompatible
 with the OpenMP 3.1 semantics used in LAMMPS for maximal compatibility
 with compiler versions in use.  If compilation with OpenMP enabled fails
 because of your compiler requiring strict OpenMP 4.0 semantic, you can
-change the behavior by adding ``-D LAMMPS_OMP_COMPAT=4`` to the ``LMP_INC``
-variable in your makefile, or add it to the command line while configuring
-with CMake. CMake will detect the suitable setting for the GNU, Clang,
-and Intel compilers.
+change the behavior by adding ``-D LAMMPS_OMP_COMPAT=4`` to the
+``LMP_INC`` variable in your makefile, or add it to the command line
+while configuring with CMake. CMake will detect the suitable setting for
+the GNU, Clang, and Intel compilers.
 
 ----------
 
@@ -185,131 +192,139 @@ for their compile/link environments, you can often access different
 compilers by simply loading the appropriate module before building
 LAMMPS.
 
-**CMake build**\ :
+.. tabs::
 
-By default CMake will use a compiler it finds according to internal
-preferences and it will add optimization flags appropriate to that
-compiler and any :doc:`accelerator packages <Speed_packages>` you have
-included in the build.
+   .. tab:: CMake build
 
-You can tell CMake to look for a specific compiler with setting CMake
-variables (listed below) during configuration.  For a few common
-choices, there are also presets in the ``cmake/presets`` folder.  For
-convenience, there is a ``CMAKE_TUNE_FLAGS`` variable that can be set to
-apply global compiler options (applied to compilation only), to be used
-for adding compiler or host specific optimization flags in addition to
-the "flags" variables listed below. You may also specify the
-corresponding ``CMAKE_*_FLAGS`` variables individually, if you want to
-experiment with alternate optimization flags.  You should specify all 3
-compilers, so that the (few) LAMMPS source files written in C or Fortran
-are built with a compiler consistent with the one used for the C++
-files:
+      By default CMake will use the compiler it finds according to
+      internal preferences and it will add optimization flags
+      appropriate to that compiler and any :doc:`accelerator packages
+      <Speed_packages>` you have included in the build.  CMake will
+      check if the detected or selected compiler is compatible with the
+      C++ support requirements of LAMMPS and stop with an error, if this
+      is not the case.
 
-.. code-block:: bash
+      You can tell CMake to look for a specific compiler with setting
+      CMake variables (listed below) during configuration.  For a few
+      common choices, there are also presets in the ``cmake/presets``
+      folder.  For convenience, there is a ``CMAKE_TUNE_FLAGS`` variable
+      that can be set to apply global compiler options (applied to
+      compilation only), to be used for adding compiler or host specific
+      optimization flags in addition to the "flags" variables listed
+      below. You may also specify the corresponding ``CMAKE_*_FLAGS``
+      variables individually, if you want to experiment with alternate
+      optimization flags.  You should specify all 3 compilers, so that
+      the (few) LAMMPS source files written in C or Fortran are built
+      with a compiler consistent with the one used for the C++ files:
 
-   -D CMAKE_CXX_COMPILER=name            # name of C++ compiler
-   -D CMAKE_C_COMPILER=name              # name of C compiler
-   -D CMAKE_Fortran_COMPILER=name        # name of Fortran compiler
+      .. code-block:: bash
 
-   -D CMAKE_CXX_FLAGS=string             # flags to use with C++ compiler
-   -D CMAKE_C_FLAGS=string               # flags to use with C compiler
-   -D CMAKE_Fortran_FLAGS=string         # flags to use with Fortran compiler
+         -D CMAKE_CXX_COMPILER=name            # name of C++ compiler
+         -D CMAKE_C_COMPILER=name              # name of C compiler
+         -D CMAKE_Fortran_COMPILER=name        # name of Fortran compiler
 
-A few example command lines are:
+         -D CMAKE_CXX_FLAGS=string             # flags to use with C++ compiler
+         -D CMAKE_C_FLAGS=string               # flags to use with C compiler
+         -D CMAKE_Fortran_FLAGS=string         # flags to use with Fortran compiler
 
-.. code-block:: bash
+      A few example command lines are:
 
-   # Building with GNU Compilers:
-   cmake ../cmake -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DCMAKE_Fortran_COMPILER=gfortran
-   # Building with Intel Compilers:
-   cmake ../cmake -DCMAKE_C_COMPILER=icc -DCMAKE_CXX_COMPILER=icpc -DCMAKE_Fortran_COMPILER=ifort
-   # Building with LLVM/Clang Compilers:
-   cmake ../cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_Fortran_COMPILER=flang
+      .. code-block:: bash
 
-For compiling with the Clang/LLVM compilers a CMake preset is provided that
-can be loaded with `-C ../cmake/presets/clang.cmake`.  Similarly,
-`-C ../cmake/presets/intel.cmake` should switch the
+         # Building with GNU Compilers:
+         cmake ../cmake -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DCMAKE_Fortran_COMPILER=gfortran
+         # Building with Intel Compilers:
+         cmake ../cmake -DCMAKE_C_COMPILER=icc -DCMAKE_CXX_COMPILER=icpc -DCMAKE_Fortran_COMPILER=ifort
+         # Building with LLVM/Clang Compilers:
+         cmake ../cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_Fortran_COMPILER=flang
 
-In addition you can set ``CMAKE_TUNE_FLAGS`` to specifically add
-compiler flags to tune for optimal performance on given hosts. By
-default this variable is empty.
+      For compiling with the Clang/LLVM compilers a CMake preset is
+      provided that can be loaded with
+      `-C ../cmake/presets/clang.cmake`.  Similarly,
+      `-C ../cmake/presets/intel.cmake` should switch the compiler
+      toolchain to the Intel compilers.
 
-.. note::
+      In addition you can set ``CMAKE_TUNE_FLAGS`` to specifically add
+      compiler flags to tune for optimal performance on given hosts. By
+      default this variable is empty.
 
-   When the cmake command completes, it prints a summary to the screen
-   which compilers it is using and what flags and settings will be used
-   for the  compilation.  Note that if the top-level compiler is mpicxx,
-   it is simply a wrapper on a real compiler.  The underlying compiler
-   info is what CMake will try to determine and report.  You should check
-   to confirm you are using the compiler and optimization flags you want.
+      .. note::
 
-**Makefile.machine settings for traditional make**\ :
+         When the cmake command completes, it prints a summary to the
+         screen which compilers it is using and what flags and settings
+         will be used for the compilation.  Note that if the top-level
+         compiler is mpicxx, it is simply a wrapper on a real compiler.
+         The underlying compiler info is what CMake will try to
+         determine and report.  You should check to confirm you are
+         using the compiler and optimization flags you want.
 
-The "compiler/linker settings" section of a Makefile.machine lists
-compiler and linker settings for your C++ compiler, including
-optimization flags.  For a parallel build it is recommended to use
-``mpicxx`` or ``mpiCC``, since these compiler wrappers will include a
-variety of settings appropriate for your MPI installation and thus
-avoiding the guesswork of finding the right flags.
+   .. tab:: Makefile.machine settings for traditional make
 
-Parallel build (see ``src/MAKE/Makefile.mpi``):
+      The "compiler/linker settings" section of a Makefile.machine lists
+      compiler and linker settings for your C++ compiler, including
+      optimization flags.  For a parallel build it is recommended to use
+      ``mpicxx`` or ``mpiCC``, since these compiler wrappers will
+      include a variety of settings appropriate for your MPI
+      installation and thus avoiding the guesswork of finding the right
+      flags.
 
-.. code-block:: bash
+      Parallel build (see ``src/MAKE/Makefile.mpi``):
 
-   CC =            mpicxx
-   CCFLAGS =       -g -O3
-   LINK =          mpicxx
-   LINKFLAGS =     -g -O
+      .. code-block:: bash
 
-Serial build with GNU gcc (see ``src/MAKE/Makefile.serial``):
+         CC =            mpicxx
+         CCFLAGS =       -g -O3
+         LINK =          mpicxx
+         LINKFLAGS =     -g -O
 
-.. code-block:: make
+      Serial build with GNU gcc (see ``src/MAKE/Makefile.serial``):
 
-   CC =            g++
-   CCFLAGS =       -g -O3
-   LINK =          g++
-   LINKFLAGS =     -g -O
+      .. code-block:: make
 
+         CC =            g++
+         CCFLAGS =       -g -O3
+         LINK =          g++
+         LINKFLAGS =     -g -O
 
-.. note::
+      .. note::
 
-   If compilation stops with a message like the following:
+         If compilation stops with a message like the following:
 
-   .. code-block::
+         .. code-block::
 
-      g++ -g -O3  -DLAMMPS_GZIP -DLAMMPS_MEMALIGN=64    -I../STUBS     -c ../main.cpp
-      In file included from ../pointers.h:24:0,
-                 from ../input.h:17,
-                 from ../main.cpp:16:
-      ../lmptype.h:34:2: error: #error LAMMPS requires a C++11 (or later) compliant compiler. Enable C++11 compatibility or upgrade the compiler.
+            g++ -g -O3  -DLAMMPS_GZIP -DLAMMPS_MEMALIGN=64    -I../STUBS     -c ../main.cpp
+            In file included from ../pointers.h:24:0,
+                       from ../input.h:17,
+                       from ../main.cpp:16:
+            ../lmptype.h:34:2: error: #error LAMMPS requires a C++11 (or later) compliant compiler. Enable C++11 compatibility or upgrade the compiler.
 
-   then you have either an unsupported (old) compiler or you have to
-   turn on C++11 mode.  The latter applies to GCC 4.8.x shipped with
-   RHEL 7.x and CentOS 7.x.  For those compilers, you need to add the
-   ``-std=c++11`` flag.  Otherwise, you would have to install a newer
-   compiler that supports C++11; either as a binary package or through
-   compiling from source.
+         then you have either an unsupported (old) compiler or you have
+         to turn on C++11 mode.  The latter applies to GCC 4.8.x shipped
+         with RHEL 7.x and CentOS 7.x.  For those compilers, you need to
+         add the ``-std=c++11`` flag.  Otherwise, you would have to
+         install a newer compiler that supports C++11; either as a
+         binary package or through compiling from source.
 
-If you build LAMMPS with any :doc:`Speed_packages` included, there may
-be specific compiler or linker flags
-that are either required or recommended to enable required features and
-to achieve optimal performance.  You need to include these in the
-CCFLAGS and LINKFLAGS settings above.  For details, see the individual
-package doc pages listed on the :doc:`Speed_packages`
-page.  Or examine these files in the src/MAKE/OPTIONS directory.
-They correspond to each of the 5 accelerator packages and their hardware
-variants:
+         If you build LAMMPS with any :doc:`Speed_packages` included,
+         there may be specific compiler or linker flags that are either
+         required or recommended to enable required features and to
+         achieve optimal performance.  You need to include these in the
+         CCFLAGS and LINKFLAGS settings above.  For details, see the
+         individual package doc pages listed on the
+         :doc:`Speed_packages` page.  Or examine these files in the
+         src/MAKE/OPTIONS directory.  They correspond to each of the 5
+         accelerator packages and their hardware variants:
 
-.. code-block:: bash
+         .. code-block:: bash
 
-   Makefile.opt                   # OPT package
-   Makefile.omp                   # USER-OMP package
-   Makefile.intel_cpu             # USER-INTEL package for CPUs
-   Makefile.intel_coprocessor     # USER-INTEL package for KNLs
-   Makefile.gpu                   # GPU package
-   Makefile.kokkos_cuda_mpi       # KOKKOS package for GPUs
-   Makefile.kokkos_omp            # KOKKOS package for CPUs (OpenMP)
-   Makefile.kokkos_phi            # KOKKOS package for KNLs (OpenMP)
+            Makefile.opt                   # OPT package
+            Makefile.omp                   # USER-OMP package
+            Makefile.intel_cpu             # USER-INTEL package for CPUs
+            Makefile.intel_coprocessor     # USER-INTEL package for KNLs
+            Makefile.gpu                   # GPU package
+            Makefile.kokkos_cuda_mpi       # KOKKOS package for GPUs
+            Makefile.kokkos_omp            # KOKKOS package for CPUs (OpenMP)
+            Makefile.kokkos_phi            # KOKKOS package for KNLs (OpenMP)
 
 ----------
 
@@ -328,51 +343,56 @@ page for more info on coupling LAMMPS to other codes.  See the
 :doc:`Python <Python_head>` doc page for more info on wrapping and
 running LAMMPS from Python via its library interface.
 
-**CMake build**\ :
+.. tabs::
 
-For CMake builds, you can select through setting CMake variables between
-building a shared or a static LAMMPS library and what kind of suffix is
-added to them (in case you want to concurrently install multiple variants
-of binaries with different settings). If none are set, defaults are applied.
+   .. tab:: CMake build
 
-.. code-block:: bash
+      For CMake builds, you can select through setting CMake variables
+      between building a shared or a static LAMMPS library and what kind
+      of suffix is added to them (in case you want to concurrently
+      install multiple variants of binaries with different settings). If
+      none are set, defaults are applied.
 
-   -D BUILD_SHARED_LIBS=value   # yes or no (default)
-   -D LAMMPS_MACHINE=name       # name = mpi, serial, mybox, titan, laptop, etc
-                                # no default value
+      .. code-block:: bash
 
-The compilation will always produce a LAMMPS library and an executable
-linked to it.  By default this will be a static library named
-``liblammps.a`` and an executable named ``lmp`` Setting
-``BUILD_SHARED_LIBS=yes`` will instead produce a shared library called
-``liblammps.so`` (or ``liblammps.dylib`` or ``liblammps.dll`` depending
-on the platform) If ``LAMMPS_MACHINE=name`` is set in addition, the name
-of the generated libraries will be changed to either
-``liblammps_name.a`` or ``liblammps_name.so``\ , respectively and the
-executable will be called ``lmp_name``.
+         -D BUILD_SHARED_LIBS=value   # yes or no (default)
+         -D LAMMPS_MACHINE=name       # name = mpi, serial, mybox, titan, laptop, etc
+                                      # no default value
 
-**Traditional make**\ :
+      The compilation will always produce a LAMMPS library and an
+      executable linked to it.  By default this will be a static library
+      named ``liblammps.a`` and an executable named ``lmp`` Setting
+      ``BUILD_SHARED_LIBS=yes`` will instead produce a shared library
+      called ``liblammps.so`` (or ``liblammps.dylib`` or
+      ``liblammps.dll`` depending on the platform) If
+      ``LAMMPS_MACHINE=name`` is set in addition, the name of the
+      generated libraries will be changed to either ``liblammps_name.a``
+      or ``liblammps_name.so``\ , respectively and the executable will
+      be called ``lmp_name``.
 
-With the traditional makefile based build process, the choice of
-the generated executable or library depends on the "mode" setting.
-Several options are available and ``mode=static`` is the default.
+   .. tab:: Traditional make
 
-.. code-block:: bash
+      With the traditional makefile based build process, the choice of
+      the generated executable or library depends on the "mode" setting.
+      Several options are available and ``mode=static`` is the default.
 
-   make machine               # build LAMMPS executable lmp_machine
-   make mode=static machine   # same as "make machine"
-   make mode=shared machine   # build LAMMPS shared lib liblammps_machine.so instead
+      .. code-block:: bash
 
-The "static" build will generate a static library called
-``liblammps_machine.a`` and an executable named ``lmp_machine``\ , while
-the "shared" build will generate a shared library
-``liblammps_machine.so`` instead and ``lmp_machine`` will be linked to
-it.  The build step will also create generic soft links, named
-``liblammps.a`` and ``liblammps.so``\ , which point to the specific
-``liblammps_machine.a/so`` files.
+         make machine               # build LAMMPS executable lmp_machine
+         make mode=static machine   # same as "make machine"
+         make mode=shared machine   # build LAMMPS shared lib liblammps_machine.so instead
 
-CMake and make info
-^^^^^^^^^^^^^^^^^^^
+      The "static" build will generate a static library called
+      ``liblammps_machine.a`` and an executable named ``lmp_machine``\ ,
+      while the "shared" build will generate a shared library
+      ``liblammps_machine.so`` instead and ``lmp_machine`` will be
+      linked to it.  The build step will also create generic soft links,
+      named ``liblammps.a`` and ``liblammps.so``\ , which point to the
+      specific ``liblammps_machine.a/so`` files.
+
+
+Additional information
+^^^^^^^^^^^^^^^^^^^^^^
 
 Note that for creating a shared library, all the libraries it depends on
 must be compiled to be compatible with shared libraries.  This should be
@@ -462,8 +482,8 @@ tool.  The actual translation is then done via make commands.
 .. _rst: https://docutils.readthedocs.io/en/sphinx-docs/user/rst/quickstart.html
 .. _sphinx: https://www.sphinx-doc.org
 
-Documentation make option
-^^^^^^^^^^^^^^^^^^^^^^^^^
+Documentation makefile options
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The following make commands can be issued in the doc folder of the
 LAMMPS source distribution.
@@ -490,15 +510,16 @@ your system.
    current LAMMPS version (HTML and PDF files), from the website
    `download page <https://lammps.sandia.gov/download.html>`_.
 
-CMake build option
-^^^^^^^^^^^^^^^^^^
+CMake build options
+^^^^^^^^^^^^^^^^^^^
 
-It is also possible to create the HTML version of the manual within
-the :doc:`CMake build directory <Build_cmake>`.  The reason for this
-option is to include the installation of the HTML manual pages into
-the "install" step when installing LAMMPS after the CMake build via
-``make install``.  The documentation build is included in the default
-build target, but can also be requested independently with ``make doc``.
+It is also possible to create the HTML version (and only the HTML
+version) of the manual within the :doc:`CMake build directory
+<Build_cmake>`.  The reason for this option is to include the
+installation of the HTML manual pages into the "install" step when
+installing LAMMPS after the CMake build via ``make install``.  The
+documentation build is included in the default build target, but can
+also be requested independently with ``make doc``.
 
 .. code-block:: bash
 
@@ -514,27 +535,27 @@ Build LAMMPS tools
 Some tools described in :doc:`Auxiliary tools <Tools>` can be built directly
 using CMake or Make.
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-.. code-block:: bash
+   .. tab:: CMake build
 
-   -D BUILD_TOOLS=value       # yes or no (default)
+      .. code-block:: bash
 
-The generated binaries will also become part of the LAMMPS installation
-(see below).
+         -D BUILD_TOOLS=value       # yes or no (default)
 
-Traditional make
-^^^^^^^^^^^^^^^^
+      The generated binaries will also become part of the LAMMPS installation
+      (see below).
 
-.. code-block:: bash
+   .. tab:: Traditional make
 
-   cd lammps/tools
-   make all              # build all binaries of tools
-   make binary2txt       # build only binary2txt tool
-   make chain            # build only chain tool
-   make micelle2d        # build only micelle2d tool
-   make thermo_extract   # build only thermo_extract tool
+      .. code-block:: bash
+
+         cd lammps/tools
+         make all              # build all binaries of tools
+         make binary2txt       # build only binary2txt tool
+         make chain            # build only chain tool
+         make micelle2d        # build only micelle2d tool
+         make thermo_extract   # build only thermo_extract tool
 
 ----------
 
@@ -549,18 +570,19 @@ a globally visible place on your system, for others to access.  Note
 that you may need super-user privileges (e.g. sudo) if the directory
 you want to copy files to is protected.
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-.. code-block:: bash
+   .. tab:: CMake build
 
-   cmake -D CMAKE_INSTALL_PREFIX=path [options ...] ../cmake
-   make                        # perform make after CMake command
-   make install                # perform the installation into prefix
+      .. code-block:: bash
 
-Traditional make
-^^^^^^^^^^^^^^^^
+         cmake -D CMAKE_INSTALL_PREFIX=path [options ...] ../cmake
+         make                        # perform make after CMake command
+         make install                # perform the installation into prefix
 
-There is no "install" option in the ``src/Makefile`` for LAMMPS.  If
-you wish to do this you will need to first build LAMMPS, then manually
-copy the desired LAMMPS files to the appropriate system directories.
+   .. tab:: Traditional make
+
+      There is no "install" option in the ``src/Makefile`` for LAMMPS.
+      If you wish to do this you will need to first build LAMMPS, then
+      manually copy the desired LAMMPS files to the appropriate system
+      directories.
diff --git a/doc/src/Build_extras.rst b/doc/src/Build_extras.rst
index 9595849b58..79e38ab2b4 100644
--- a/doc/src/Build_extras.rst
+++ b/doc/src/Build_extras.rst
@@ -2,17 +2,21 @@ Packages with extra build options
 =================================
 
 When building with some packages, additional steps may be required,
-in addition to:
+in addition to
 
-.. code-block:: bash
+.. list-table::
+   :align: center
+   :header-rows: 1
 
-   $ cmake -D PKG_NAME=yes
+   * - CMake build
+     - Traditional make
+   * - .. code-block:: bash
 
-or
+          $ cmake -D PKG_NAME=yes
 
-.. code-block:: bash
+     - .. code-block:: bash
 
-   $ make yes-name
+          $ make yes-name
 
 as described on the :doc:`Build_package <Build_package>` doc page.
 
@@ -62,25 +66,39 @@ This is the list of packages that may require additional steps.
 COMPRESS package
 ----------------
 
-To build with this package you must have the zlib compression library
-available on your system.
+To build with this package you must have the `zlib compression library
+<https://zlib.net>`_ available on your system to build dump styles with
+a '/gz' suffix.  There are also styles using the
+`Zstandard <https://facebook.github.io/zstd/>`_ library which have a
+'/zstd' suffix.
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-If CMake cannot find the library, you can set these variables:
+   .. tab:: CMake build
 
-.. code-block:: bash
+      If CMake cannot find the zlib library or include files, you can set
+      these variables:
 
-   -D ZLIB_INCLUDE_DIR=path    # path to zlib.h header file
-   -D ZLIB_LIBRARIES=path      # path to libz.a (.so) file
+      .. code-block:: bash
 
-Traditional make
-^^^^^^^^^^^^^^^^
+         -D ZLIB_INCLUDE_DIR=path    # path to zlib.h header file
+         -D ZLIB_LIBRARY=path        # path to libz.a (.so) file
 
-If make cannot find the library, you can edit the file
-``lib/compress/Makefile.lammps`` to specify the paths and library
-name.
+      Support for Zstandard compression is auto-detected and for that
+      CMake depends on the `pkg-config
+      <https://www.freedesktop.org/wiki/Software/pkg-config/>`_ tool to
+      identify the necessary flags to compile with this library, so the
+      corresponding ``libzstandard.pc`` file must be in a folder where
+      pkg-config can find it, which may require adding it to the
+      ``PKG_CONFIG_PATH`` environment variable.
+
+   .. tab:: Traditional make
+
+      To include support for Zstandard compression, ``-DLAMMPS_ZSTD``
+      must be added to the compiler flags.  If make cannot find the
+      libraries, you can edit the file ``lib/compress/Makefile.lammps``
+      to specify the paths and library names.  This must be done
+      **before** the package is installed.
 
 ----------
 
@@ -121,17 +139,19 @@ CMake build
 
 * sm_12 or sm_13 for GT200 (supported by CUDA 3.2 until CUDA 6.5)
 * sm_20 or sm_21 for Fermi (supported by CUDA 3.2 until CUDA 7.5)
-* sm_30 or sm_35 or sm_37 for Kepler (supported since CUDA 5)
+* sm_30 for Kepler (supported since CUDA 5 and until CUDA 10.x)
+* sm_35 or sm_37 for Kepler (supported since CUDA 5 and until CUDA 11.x)
 * sm_50 or sm_52 for Maxwell (supported since CUDA 6)
 * sm_60 or sm_61 for Pascal (supported since CUDA 8)
 * sm_70 for Volta (supported since CUDA 9)
 * sm_75 for Turing (supported since CUDA 10)
+* sm_80 for Ampere (supported since CUDA 11)
 
 A more detailed list can be found, for example,
 at `Wikipedia's CUDA article <https://en.wikipedia.org/wiki/CUDA#GPUs_supported>`_
 
-CMake can detect which version of the CUDA toolkit is used and thus can
-include support for **all** major GPU architectures supported by this toolkit.
+CMake can detect which version of the CUDA toolkit is used and thus will try
+to include support for **all** major GPU architectures supported by this toolkit.
 Thus the GPU_ARCH setting is merely an optimization, to have code for
 the preferred GPU architecture directly included rather than having to wait
 for the JIT compiler of the CUDA driver to translate it.
@@ -155,7 +175,7 @@ and the linker to work correctly.
 
 .. code:: bash
 
-   # CUDA target
+   # CUDA target (not recommended, use GPU_ARCH=cuda)
    # !!! DO NOT set CMAKE_CXX_COMPILER !!!
    export HIP_PLATFORM=nvcc
    export CUDA_PATH=/usr/local/cuda
@@ -253,64 +273,70 @@ See the list of all KIM models here: https://openkim.org/browse/models
 the KIM API library with all its models, may take a long time (tens of
 minutes to hours) to build.  Of course you only need to do that once.)
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-.. code-block:: bash
+   .. tab:: CMake build
 
-   -D DOWNLOAD_KIM=value           # download OpenKIM API v2 for build, value = no (default) or yes
-   -D LMP_DEBUG_CURL=value         # set libcurl verbose mode on/off, value = off (default) or on
-   -D LMP_NO_SSL_CHECK=value       # tell libcurl to not verify the peer, value = no (default) or yes
+      .. code-block:: bash
 
-If ``DOWNLOAD_KIM`` is set, the KIM library will be downloaded and built
-inside the CMake build directory.  If the KIM library is already on
-your system (in a location CMake cannot find it), set the ``PKG_CONFIG_PATH``
-environment variable so that libkim-api can be found.
+         -D DOWNLOAD_KIM=value           # download OpenKIM API v2 for build, value = no (default) or yes
+         -D LMP_DEBUG_CURL=value         # set libcurl verbose mode on/off, value = off (default) or on
+         -D LMP_NO_SSL_CHECK=value       # tell libcurl to not verify the peer, value = no (default) or yes
 
-*For using OpenKIM web queries in LAMMPS*\ :
+      If ``DOWNLOAD_KIM`` is set to *yes* (or *on*), the KIM API library
+      will be downloaded and built inside the CMake build directory.  If
+      the KIM library is already installed on your system (in a location
+      where CMake cannot find it), you may need to set the
+      ``PKG_CONFIG_PATH`` environment variable so that libkim-api can be
+      found, or run the command ``source kim-api-activate``.
 
-If the ``LMP_DEBUG_CURL`` environment variable is set, the libcurl verbose
-mode will be on, and any libcurl calls within the KIM web query display a
-lot of information about libcurl operations.  You hardly ever want this
-set in production use, you will almost always want this when you debug or
+   .. tab:: Traditional make
+
+      You can download and build the KIM library manually if you prefer;
+      follow the instructions in ``lib/kim/README``.  You can also do
+      this in one step from the lammps/src dir, using a command like
+      these, which simply invoke the ``lib/kim/Install.py`` script with
+      the specified args.
+
+      .. code-block:: bash
+
+         $ make lib-kim              # print help message
+         $ make lib-kim args="-b "   # (re-)install KIM API lib with only example models
+         $ make lib-kim args="-b -a Glue_Ercolessi_Adams_Al__MO_324507536345_001"  # ditto plus one model
+         $ make lib-kim args="-b -a everything"     # install KIM API lib with all models
+         $ make lib-kim args="-n -a EAM_Dynamo_Ackland_W__MO_141627196590_002"       # add one model or model driver
+         $ make lib-kim args="-p /usr/local" # use an existing KIM API installation at the provided location
+         $ make lib-kim args="-p /usr/local -a EAM_Dynamo_Ackland_W__MO_141627196590_002" # ditto but add one model or driver
+
+      Settings for debugging OpenKIM web queries discussed below need to
+      be applied by adding them to the ``LMP_INC`` variable through
+      editing the ``Makefile.machine`` you are using.  For example:
+
+      .. code-block:: make
+
+         LMP_INC =       -DLMP_NO_SSL_CHECK
+
+Debugging OpenKIM web queries in LAMMPS
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If ``LMP_DEBUG_CURL`` is set, the libcurl verbose mode will be turned
+on, and any libcurl calls within the KIM web query display a lot of
+information about libcurl operations.  You hardly ever want this set in
+production use, you will almost always want this when you debug or
 report problems.
 
-The libcurl performs peer SSL certificate verification by default. This
-verification is done using a CA certificate store that the SSL library can
-use to make sure the peer's server certificate is valid. If SSL reports an
-error ("certificate verify failed") during the handshake and thus refuses
-further communication with that server, you can set ``LMP_NO_SSL_CHECK``\ .
-If ``LMP_NO_SSL_CHECK`` is set, libcurl does not verify the peer and connection
-succeeds regardless of the names in the certificate. This option is insecure.
-As an alternative, you can specify your own CA cert path by setting the
-environment variable ``CURL_CA_BUNDLE`` to the path of your choice. A call
-to the KIM web query would get this value from the environmental variable.
-
-Traditional make
-^^^^^^^^^^^^^^^^
-
-You can download and build the KIM library manually if you prefer;
-follow the instructions in ``lib/kim/README``\ .  You can also do it in one
-step from the lammps/src dir, using a command like these, which simply
-invoke the ``lib/kim/Install.py`` script with the specified args.
-
-.. code-block:: bash
-
-  $ make lib-kim              # print help message
-  $ make lib-kim args="-b "   # (re-)install KIM API lib with only example models
-  $ make lib-kim args="-b -a Glue_Ercolessi_Adams_Al__MO_324507536345_001"  # ditto plus one model
-  $ make lib-kim args="-b -a everything"     # install KIM API lib with all models
-  $ make lib-kim args="-n -a EAM_Dynamo_Ackland_W__MO_141627196590_002"       # add one model or model driver
-  $ make lib-kim args="-p /usr/local" # use an existing KIM API installation at the provided location
-  $ make lib-kim args="-p /usr/local -a EAM_Dynamo_Ackland_W__MO_141627196590_002" # ditto but add one model or driver
-
-Settings for OpenKIM web queries discussed above need to be applied by adding
-them to the ``LMP_INC`` variable through editing the ``Makefile.machine`` you are
-using.  For example:
-
-.. code-block:: make
-
-   LMP_INC =       -DLMP_NO_SSL_CHECK
+The libcurl library performs peer SSL certificate verification by
+default.  This verification is done using a CA certificate store that
+the SSL library can use to make sure the peer's server certificate is
+valid.  If SSL reports an error ("certificate verify failed") during the
+handshake and thus refuses further communicate with that server, you can
+set ``LMP_NO_SSL_CHECK`` to override that behavior.  When LAMMPS is
+compiled with ``LMP_NO_SSL_CHECK`` set, libcurl does not verify the peer
+and connection attempts will succeed regardless of the names in the
+certificate. This option is insecure.  As an alternative, you can
+specify your own CA cert path by setting the environment variable
+``CURL_CA_BUNDLE`` to the path of your choice.  A call to the KIM web
+query would get this value from the environment variable.
 
 ----------
 
@@ -347,8 +373,8 @@ More information on Kokkos can be found on the
 Available Architecture settings
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-These are the possible choices for the Kokkos architecture ID. They must
-be specified in uppercase.
+These are the possible choices for the Kokkos architecture ID as of
+version 3.2 of the Kokkos library. They must be specified in uppercase.
 
 .. list-table::
    :header-rows: 0
@@ -460,111 +486,115 @@ be specified in uppercase.
       - GPU
       - Intel GPUs Gen9+
 
-Basic CMake build settings:
-^^^^^^^^^^^^^^^^^^^^^^^^^^^
-For multicore CPUs using OpenMP, set these 2 variables.
+.. tabs::
 
-.. code-block:: bash
+   .. tab:: Basic CMake build settings:
 
-   -D Kokkos_ARCH_HOSTARCH=yes  # HOSTARCH = HOST from list above
-   -D Kokkos_ENABLE_OPENMP=yes
-   -D BUILD_OMP=yes
+      For multicore CPUs using OpenMP, set these 2 variables.
 
-Please note that enabling OpenMP for KOKKOS requires that OpenMP is
-also :ref:`enabled for the rest of LAMMPS <serial>`.
+      .. code-block:: bash
 
-For Intel KNLs using OpenMP, set these variables:
+         -D Kokkos_ARCH_HOSTARCH=yes  # HOSTARCH = HOST from list above
+         -D Kokkos_ENABLE_OPENMP=yes
+         -D BUILD_OMP=yes
 
-.. code-block:: bash
+      Please note that enabling OpenMP for KOKKOS requires that OpenMP is
+      also :ref:`enabled for the rest of LAMMPS <serial>`.
 
-   -D Kokkos_ARCH_KNL=yes
-   -D Kokkos_ENABLE_OPENMP=yes
+      For Intel KNLs using OpenMP, set these variables:
 
-For NVIDIA GPUs using CUDA, set these variables:
+      .. code-block:: bash
 
-.. code-block:: bash
+         -D Kokkos_ARCH_KNL=yes
+         -D Kokkos_ENABLE_OPENMP=yes
 
-   -D Kokkos_ARCH_HOSTARCH=yes   # HOSTARCH = HOST from list above
-   -D Kokkos_ARCH_GPUARCH=yes    # GPUARCH = GPU from list above
-   -D Kokkos_ENABLE_CUDA=yes
-   -D Kokkos_ENABLE_OPENMP=yes
-   -D CMAKE_CXX_COMPILER=wrapper # wrapper = full path to Cuda nvcc wrapper
+      For NVIDIA GPUs using CUDA, set these variables:
 
-This will also enable executing FFTs on the GPU, either via the internal
-KISSFFT library, or - by preference - with the cuFFT library bundled
-with the CUDA toolkit, depending on whether CMake can identify its
-location.  The *wrapper* value for ``CMAKE_CXX_COMPILER`` variable is
-the path to the CUDA nvcc compiler wrapper provided in the Kokkos
-library: ``lib/kokkos/bin/nvcc_wrapper``\ .  The setting should include
-the full path name to the wrapper, e.g.
+      .. code-block:: bash
 
-.. code-block:: bash
+         -D Kokkos_ARCH_HOSTARCH=yes   # HOSTARCH = HOST from list above
+         -D Kokkos_ARCH_GPUARCH=yes    # GPUARCH = GPU from list above
+         -D Kokkos_ENABLE_CUDA=yes
+         -D Kokkos_ENABLE_OPENMP=yes
+         -D CMAKE_CXX_COMPILER=wrapper # wrapper = full path to Cuda nvcc wrapper
 
-   -D CMAKE_CXX_COMPILER=${HOME}/lammps/lib/kokkos/bin/nvcc_wrapper
+      This will also enable executing FFTs on the GPU, either via the
+      internal KISSFFT library, or - by preference - with the cuFFT
+      library bundled with the CUDA toolkit, depending on whether CMake
+      can identify its location.  The *wrapper* value for
+      ``CMAKE_CXX_COMPILER`` variable is the path to the CUDA nvcc
+      compiler wrapper provided in the Kokkos library:
+      ``lib/kokkos/bin/nvcc_wrapper``\ .  The setting should include the
+      full path name to the wrapper, e.g.
 
-To simplify the compilation, three preset files are included in the
-``cmake/presets`` folder, ``kokkos-serial.cmake``, ``kokkos-openmp.cmake``,
-and ``kokkos-cuda.cmake``. They will enable the KOKKOS package and
-enable some hardware choice.  So to compile with OpenMP host parallelization,
-CUDA device parallelization (for GPUs with CC 5.0 and up) with some
-common packages enabled, you can do the following:
+      .. code-block:: bash
 
-.. code-block:: bash
+         -D CMAKE_CXX_COMPILER=${HOME}/lammps/lib/kokkos/bin/nvcc_wrapper
 
-   mkdir build-kokkos-cuda
-   cd build-kokkos-cuda
-   cmake -C ../cmake/presets/minimal.cmake -C ../cmake/presets/kokkos-cuda.cmake ../cmake
-   cmake --build .
+      To simplify compilation, three preset files are included in the
+      ``cmake/presets`` folder, ``kokkos-serial.cmake``,
+      ``kokkos-openmp.cmake``, and ``kokkos-cuda.cmake``. They will
+      enable the KOKKOS package and enable some hardware choice.  So to
+      compile with OpenMP host parallelization, CUDA device
+      parallelization (for GPUs with CC 5.0 and up) with some common
+      packages enabled, you can do the following:
 
-Basic traditional make settings:
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+      .. code-block:: bash
 
-Choose which hardware to support in ``Makefile.machine`` via
-``KOKKOS_DEVICES`` and ``KOKKOS_ARCH`` settings.  See the
-``src/MAKE/OPTIONS/Makefile.kokkos*`` files for examples.
+         mkdir build-kokkos-cuda
+         cd build-kokkos-cuda
+         cmake -C ../cmake/presets/minimal.cmake -C ../cmake/presets/kokkos-cuda.cmake ../cmake
+         cmake --build .
 
-For multicore CPUs using OpenMP:
+   .. tab:: Basic traditional make settings:
 
-.. code-block:: make
+      Choose which hardware to support in ``Makefile.machine`` via
+      ``KOKKOS_DEVICES`` and ``KOKKOS_ARCH`` settings.  See the
+      ``src/MAKE/OPTIONS/Makefile.kokkos*`` files for examples.
 
-   KOKKOS_DEVICES = OpenMP
-   KOKKOS_ARCH = HOSTARCH          # HOSTARCH = HOST from list above
+      For multicore CPUs using OpenMP:
 
-For Intel KNLs using OpenMP:
+      .. code-block:: make
 
-.. code-block:: make
+         KOKKOS_DEVICES = OpenMP
+         KOKKOS_ARCH = HOSTARCH          # HOSTARCH = HOST from list above
 
-   KOKKOS_DEVICES = OpenMP
-   KOKKOS_ARCH = KNL
+      For Intel KNLs using OpenMP:
 
-For NVIDIA GPUs using CUDA:
+      .. code-block:: make
 
-.. code-block:: make
+         KOKKOS_DEVICES = OpenMP
+         KOKKOS_ARCH = KNL
 
-   KOKKOS_DEVICES = Cuda
-   KOKKOS_ARCH = HOSTARCH,GPUARCH  # HOSTARCH = HOST from list above that is hosting the GPU
-   KOKKOS_CUDA_OPTIONS = "enable_lambda"
-                                  # GPUARCH = GPU from list above
-   FFT_INC = -DFFT_CUFFT          # enable use of cuFFT (optional)
-   FFT_LIB = -lcufft              # link to cuFFT library
+      For NVIDIA GPUs using CUDA:
 
-For GPUs, you also need the following lines in your ``Makefile.machine``
-before the CC line is defined.  They tell ``mpicxx`` to use an ``nvcc``
-compiler wrapper, which will use ``nvcc`` for compiling CUDA files and a
-C++ compiler for non-Kokkos, non-CUDA files.
+      .. code-block:: make
 
-.. code-block:: make
+         KOKKOS_DEVICES = Cuda
+         KOKKOS_ARCH = HOSTARCH,GPUARCH  # HOSTARCH = HOST from list above that is hosting the GPU
+         KOKKOS_CUDA_OPTIONS = "enable_lambda"
+                                         # GPUARCH = GPU from list above
+         FFT_INC = -DFFT_CUFFT           # enable use of cuFFT (optional)
+         FFT_LIB = -lcufft               # link to cuFFT library
 
-   # For OpenMPI
-   KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd)
-   export OMPI_CXX = $(KOKKOS_ABSOLUTE_PATH)/config/nvcc_wrapper
-   CC = mpicxx
+      For GPUs, you also need the following lines in your
+      ``Makefile.machine`` before the CC line is defined.  They tell
+      ``mpicxx`` to use an ``nvcc`` compiler wrapper, which will use
+      ``nvcc`` for compiling CUDA files and a C++ compiler for
+      non-Kokkos, non-CUDA files.
 
-.. code-block:: make
+      .. code-block:: make
 
-   # For MPICH and derivatives
-   KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd)
-   CC = mpicxx -cxx=$(KOKKOS_ABSOLUTE_PATH)/config/nvcc_wrapper
+         # For OpenMPI
+         KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd)
+         export OMPI_CXX = $(KOKKOS_ABSOLUTE_PATH)/config/nvcc_wrapper
+         CC = mpicxx
+
+      .. code-block:: make
+
+         # For MPICH and derivatives
+         KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd)
+         CC = mpicxx -cxx=$(KOKKOS_ABSOLUTE_PATH)/config/nvcc_wrapper
 
 
 Advanced KOKKOS compilation settings
@@ -619,43 +649,43 @@ LATTE package
 To build with this package, you must download and build the LATTE
 library.
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-.. code-block:: bash
+   .. tab:: CMake build
 
-   -D DOWNLOAD_LATTE=value    # download LATTE for build, value = no (default) or yes
-   -D LATTE_LIBRARY=path      # LATTE library file (only needed if a custom location)
+      .. code-block:: bash
 
-If ``DOWNLOAD_LATTE`` is set, the LATTE library will be downloaded and
-built inside the CMake build directory.  If the LATTE library is
-already on your system (in a location CMake cannot find it),
-``LATTE_LIBRARY`` is the filename (plus path) of the LATTE library file,
-not the directory the library file is in.
+         -D DOWNLOAD_LATTE=value    # download LATTE for build, value = no (default) or yes
+         -D LATTE_LIBRARY=path      # LATTE library file (only needed if a custom location)
 
-Traditional make
-^^^^^^^^^^^^^^^^
+      If ``DOWNLOAD_LATTE`` is set, the LATTE library will be downloaded
+      and built inside the CMake build directory.  If the LATTE library
+      is already on your system (in a location CMake cannot find it),
+      ``LATTE_LIBRARY`` is the filename (plus path) of the LATTE library
+      file, not the directory the library file is in.
 
-You can download and build the LATTE library manually if you prefer;
-follow the instructions in ``lib/latte/README``\ .  You can also do it in
-one step from the ``lammps/src`` dir, using a command like these, which
-simply invokes the ``lib/latte/Install.py`` script with the specified
-args:
+   .. tab:: Traditional make
 
-.. code-block:: bash
+      You can download and build the LATTE library manually if you
+      prefer; follow the instructions in ``lib/latte/README``\ .  You
+      can also do it in one step from the ``lammps/src`` dir, using a
+      command like these, which simply invokes the
+      ``lib/latte/Install.py`` script with the specified args:
 
-  $ make lib-latte                          # print help message
-  $ make lib-latte args="-b"                # download and build in lib/latte/LATTE-master
-  $ make lib-latte args="-p $HOME/latte"    # use existing LATTE installation in $HOME/latte
-  $ make lib-latte args="-b -m gfortran"    # download and build in lib/latte and
-                                            #   copy Makefile.lammps.gfortran to Makefile.lammps
+      .. code-block:: bash
 
-Note that 3 symbolic (soft) links, ``includelink`` and ``liblink`` and
-``filelink.o``, are created in ``lib/latte`` to point to required
-folders and files in the LATTE home directory.  When LAMMPS itself is
-built it will use these links.  You should also check that the
-``Makefile.lammps`` file you create is appropriate for the compiler you
-use on your system to build LATTE.
+         $ make lib-latte                          # print help message
+         $ make lib-latte args="-b"                # download and build in lib/latte/LATTE-master
+         $ make lib-latte args="-p $HOME/latte"    # use existing LATTE installation in $HOME/latte
+         $ make lib-latte args="-b -m gfortran"    # download and build in lib/latte and
+                                                   #   copy Makefile.lammps.gfortran to Makefile.lammps
+
+      Note that 3 symbolic (soft) links, ``includelink`` and ``liblink``
+      and ``filelink.o``, are created in ``lib/latte`` to point to
+      required folders and files in the LATTE home directory.  When
+      LAMMPS itself is built it will use these links.  You should also
+      check that the ``Makefile.lammps`` file you create is appropriate
+      for the compiler you use on your system to build LATTE.
 
 ----------
 
@@ -668,34 +698,35 @@ This package can optionally include support for messaging via sockets,
 using the open-source `ZeroMQ library <http://zeromq.org>`_, which must
 be installed on your system.
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-.. code-block:: bash
+   .. tab:: CMake build
 
-   -D MESSAGE_ZMQ=value    # build with ZeroMQ support, value = no (default) or yes
-   -D ZMQ_LIBRARY=path     # ZMQ library file (only needed if a custom location)
-   -D ZMQ_INCLUDE_DIR=path # ZMQ include directory (only needed if a custom location)
+      .. code-block:: bash
 
-Traditional make
-^^^^^^^^^^^^^^^^
+         -D MESSAGE_ZMQ=value    # build with ZeroMQ support, value = no (default) or yes
+         -D ZMQ_LIBRARY=path     # ZMQ library file (only needed if a custom location)
+         -D ZMQ_INCLUDE_DIR=path # ZMQ include directory (only needed if a custom location)
 
-Before building LAMMPS, you must build the CSlib library in
-``lib/message``\ .  You can build the CSlib library manually if you prefer;
-follow the instructions in ``lib/message/README``\ .  You can also do it in
-one step from the ``lammps/src`` dir, using a command like these, which
-simply invoke the ``lib/message/Install.py`` script with the specified args:
+   .. tab:: Traditional make
 
-.. code-block:: bash
+      Before building LAMMPS, you must build the CSlib library in
+      ``lib/message``\ .  You can build the CSlib library manually if
+      you prefer; follow the instructions in ``lib/message/README``\ .
+      You can also do it in one step from the ``lammps/src`` dir, using
+      a command like these, which simply invoke the
+      ``lib/message/Install.py`` script with the specified args:
 
-  $ make lib-message               # print help message
-  $ make lib-message args="-m -z"  # build with MPI and socket (ZMQ) support
-  $ make lib-message args="-s"     # build as serial lib with no ZMQ support
+      .. code-block:: bash
 
-The build should produce two files: ``lib/message/cslib/src/libmessage.a``
-and ``lib/message/Makefile.lammps``.  The latter is copied from an
-existing ``Makefile.lammps.*`` and has settings to link with the ZeroMQ
-library if requested in the build.
+         $ make lib-message               # print help message
+         $ make lib-message args="-m -z"  # build with MPI and socket (ZMQ) support
+         $ make lib-message args="-s"     # build as serial lib with no ZMQ support
+
+      The build should produce two files: ``lib/message/cslib/src/libmessage.a``
+      and ``lib/message/Makefile.lammps``.  The latter is copied from an
+      existing ``Makefile.lammps.*`` and has settings to link with the ZeroMQ
+      library if requested in the build.
 
 ----------
 
@@ -710,43 +741,45 @@ library.  Building the MS-CG library requires that the GSL
 machine.  See the ``lib/mscg/README`` and ``MSCG/Install`` files for
 more details.
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-.. code-block:: bash
+   .. tab:: CMake build
 
-   -D DOWNLOAD_MSCG=value    # download MSCG for build, value = no (default) or yes
-   -D MSCG_LIBRARY=path      # MSCG library file (only needed if a custom location)
-   -D MSCG_INCLUDE_DIR=path  # MSCG include directory (only needed if a custom location)
+      .. code-block:: bash
 
-If ``DOWNLOAD_MSCG`` is set, the MSCG library will be downloaded and built
-inside the CMake build directory.  If the MSCG library is already on
-your system (in a location CMake cannot find it), ``MSCG_LIBRARY`` is the
-filename (plus path) of the MSCG library file, not the directory the
-library file is in.  ``MSCG_INCLUDE_DIR`` is the directory the MSCG
-include file is in.
+         -D DOWNLOAD_MSCG=value    # download MSCG for build, value = no (default) or yes
+         -D MSCG_LIBRARY=path      # MSCG library file (only needed if a custom location)
+         -D MSCG_INCLUDE_DIR=path  # MSCG include directory (only needed if a custom location)
 
-Traditional make
-^^^^^^^^^^^^^^^^
+      If ``DOWNLOAD_MSCG`` is set, the MSCG library will be downloaded
+      and built inside the CMake build directory.  If the MSCG library
+      is already on your system (in a location CMake cannot find it),
+      ``MSCG_LIBRARY`` is the filename (plus path) of the MSCG library
+      file, not the directory the library file is in.
+      ``MSCG_INCLUDE_DIR`` is the directory the MSCG include file is in.
 
-You can download and build the MS-CG library manually if you prefer;
-follow the instructions in ``lib/mscg/README``\ .  You can also do it in one
-step from the ``lammps/src`` dir, using a command like these, which simply
-invoke the ``lib/mscg/Install.py`` script with the specified args:
+   .. tab:: Traditional make
 
-.. code-block:: bash
+      You can download and build the MS-CG library manually if you
+      prefer; follow the instructions in ``lib/mscg/README``\ .  You can
+      also do it in one step from the ``lammps/src`` dir, using a
+      command like these, which simply invoke the
+      ``lib/mscg/Install.py`` script with the specified args:
 
-  $ make lib-mscg             # print help message
-  $ make lib-mscg args="-b -m serial"   # download and build in lib/mscg/MSCG-release-master
-                                       # with the settings compatible with "make serial"
-  $ make lib-mscg args="-b -m mpi"      # download and build in lib/mscg/MSCG-release-master
-                                       # with the settings compatible with "make mpi"
-  $ make lib-mscg args="-p /usr/local/mscg-release" # use the existing MS-CG installation in /usr/local/mscg-release
+      .. code-block:: bash
 
-Note that 2 symbolic (soft) links, "includelink" and "liblink", will
-be created in ``lib/mscg`` to point to the MS-CG ``src/installation``
-dir.  When LAMMPS is built in src it will use these links.  You should
-not need to edit the ``lib/mscg/Makefile.lammps`` file.
+         $ make lib-mscg             # print help message
+         $ make lib-mscg args="-b -m serial"   # download and build in lib/mscg/MSCG-release-master
+                                               # with the settings compatible with "make serial"
+         $ make lib-mscg args="-b -m mpi"      # download and build in lib/mscg/MSCG-release-master
+                                               # with the settings compatible with "make mpi"
+         $ make lib-mscg args="-p /usr/local/mscg-release" # use the existing MS-CG installation in /usr/local/mscg-release
+
+      Note that 2 symbolic (soft) links, ``includelink`` and ``liblink``,
+      will be created in ``lib/mscg`` to point to the MS-CG
+      ``src/installation`` dir.  When LAMMPS is built in src it will use
+      these links.  You should not need to edit the
+      ``lib/mscg/Makefile.lammps`` file.
 
 ----------
 
@@ -755,18 +788,18 @@ not need to edit the ``lib/mscg/Makefile.lammps`` file.
 OPT package
 ---------------------
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-No additional settings are needed besides ``-D PKG_OPT=yes``
+   .. tab:: CMake build
 
-Traditional make
-^^^^^^^^^^^^^^^^
+      No additional settings are needed besides ``-D PKG_OPT=yes``
 
-The compile flag ``-restrict`` must be used to build LAMMPS with the OPT
-package when using Intel compilers.  It should be added to the CCFLAGS
-line of your ``Makefile.machine``.  See
-``src/MAKE/OPTIONS/Makefile.opt`` for an example.
+   .. tab:: Traditional make
+
+      The compiler flag ``-restrict`` must be used to build LAMMPS with
+      the OPT package when using Intel compilers.  It should be added to
+      the :code:`CCFLAGS` line of your ``Makefile.machine``.  See
+      ``src/MAKE/OPTIONS/Makefile.opt`` for an example.
 
 ----------
 
@@ -775,34 +808,36 @@ line of your ``Makefile.machine``.  See
 POEMS package
 -------------------------
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-No additional settings are needed besides ``-D PKG_OPT=yes``
+   .. tab:: CMake build
 
-Traditional make
-^^^^^^^^^^^^^^^^
+      No additional settings are needed besides ``-D PKG_OPT=yes``
 
-Before building LAMMPS, you must build the POEMS library in ``lib/poems``\ .
-You can do this manually if you prefer; follow the instructions in
-``lib/poems/README``\ .  You can also do it in one step from the ``lammps/src``
-dir, using a command like these, which simply invoke the
-``lib/poems/Install.py`` script with the specified args:
+   .. tab:: Traditional make
 
-.. code-block:: bash
+      Before building LAMMPS, you must build the POEMS library in
+      ``lib/poems``\ .  You can do this manually if you prefer; follow
+      the instructions in ``lib/poems/README``\ .  You can also do it in
+      one step from the ``lammps/src`` dir, using a command like these,
+      which simply invoke the ``lib/poems/Install.py`` script with the
+      specified args:
 
-  $ make lib-poems                   # print help message
-  $ make lib-poems args="-m serial"  # build with GNU g++ compiler (settings as with "make serial")
-  $ make lib-poems args="-m mpi"     # build with default MPI C++ compiler (settings as with "make mpi")
-  $ make lib-poems args="-m icc"     # build with Intel icc compiler
+      .. code-block:: bash
 
-The build should produce two files: ``lib/poems/libpoems.a`` and
-``lib/poems/Makefile.lammps``.  The latter is copied from an existing
-``Makefile.lammps.*`` and has settings needed to build LAMMPS with the
-POEMS library (though typically the settings are just blank).  If
-necessary, you can edit/create a new ``lib/poems/Makefile.machine`` file
-for your system, which should define an ``EXTRAMAKE`` variable to specify
-a corresponding ``Makefile.lammps.machine`` file.
+         $ make lib-poems                   # print help message
+         $ make lib-poems args="-m serial"  # build with GNU g++ compiler (settings as with "make serial")
+         $ make lib-poems args="-m mpi"     # build with default MPI C++ compiler (settings as with "make mpi")
+         $ make lib-poems args="-m icc"     # build with Intel icc compiler
+
+      The build should produce two files: ``lib/poems/libpoems.a`` and
+      ``lib/poems/Makefile.lammps``.  The latter is copied from an
+      existing ``Makefile.lammps.*`` and has settings needed to build
+      LAMMPS with the POEMS library (though typically the settings are
+      just blank).  If necessary, you can edit/create a new
+      ``lib/poems/Makefile.machine`` file for your system, which should
+      define an ``EXTRAMAKE`` variable to specify a corresponding
+      ``Makefile.lammps.machine`` file.
 
 ----------
 
@@ -817,27 +852,28 @@ version or a Python 3.x version.  Since support for Python 2.x has ended,
 using Python 3.x is strongly recommended. See ``lib/python/README`` for
 additional details.
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-.. code-block:: bash
+   .. tab:: CMake build
 
-   -D PYTHON_EXECUTABLE=path   # path to Python executable to use
+      .. code-block:: bash
 
-Without this setting, CMake will guess the default Python on your
-system.  To use a different Python version, you can either create a
-virtualenv, activate it and then run cmake.  Or you can set the
-PYTHON_EXECUTABLE variable to specify which Python interpreter should
-be used.  Note note that you will also need to have the development
-headers installed for this version, e.g. python2-devel.
+         -D PYTHON_EXECUTABLE=path   # path to Python executable to use
 
-Traditional make
-^^^^^^^^^^^^^^^^
+      Without this setting, CMake will guess the default Python version
+      on your system.  To use a different Python version, you can either
+      create a virtualenv, activate it and then run cmake.  Or you can
+      set the PYTHON_EXECUTABLE variable to specify which Python
+      interpreter should be used.  Note note that you will also need to
+      have the development headers installed for this version,
+      e.g. python2-devel.
 
-The build uses the ``lib/python/Makefile.lammps`` file in the compile/link
-process to find Python.  You should only need to create a new
-``Makefile.lammps.*`` file (and copy it to ``Makefile.lammps``) if
-the LAMMPS build fails.
+   .. tab:: Traditional make
+
+      The build uses the ``lib/python/Makefile.lammps`` file in the
+      compile/link process to find Python.  You should only need to
+      create a new ``Makefile.lammps.*`` file (and copy it to
+      ``Makefile.lammps``) if the LAMMPS build fails.
 
 ----------
 
@@ -846,46 +882,48 @@ the LAMMPS build fails.
 VORONOI package
 -----------------------------
 
-To build with this package, you must download and build the `Voro++ library <voro-home_>`_.
+To build with this package, you must download and build the
+`Voro++ library <http://math.lbl.gov/voro++>`_ or install a
+binary package provided by your operating system.
 
-.. _voro-home: http://math.lbl.gov/voro++
+.. tabs::
 
-CMake build
-^^^^^^^^^^^
+   .. tab:: CMake build
 
-.. code-block:: bash
+      .. code-block:: bash
 
-   -D DOWNLOAD_VORO=value    # download Voro++ for build, value = no (default) or yes
-   -D VORO_LIBRARY=path      # Voro++ library file (only needed if at custom location)
-   -D VORO_INCLUDE_DIR=path  # Voro++ include directory (only needed if at custom location)
+         -D DOWNLOAD_VORO=value    # download Voro++ for build, value = no (default) or yes
+         -D VORO_LIBRARY=path      # Voro++ library file (only needed if at custom location)
+         -D VORO_INCLUDE_DIR=path  # Voro++ include directory (only needed if at custom location)
 
-If ``DOWNLOAD_VORO`` is set, the Voro++ library will be downloaded and
-built inside the CMake build directory.  If the Voro++ library is
-already on your system (in a location CMake cannot find it),
-``VORO_LIBRARY`` is the filename (plus path) of the Voro++ library file,
-not the directory the library file is in.  ``VORO_INCLUDE_DIR`` is the
-directory the Voro++ include file is in.
+      If ``DOWNLOAD_VORO`` is set, the Voro++ library will be downloaded
+      and built inside the CMake build directory.  If the Voro++ library
+      is already on your system (in a location CMake cannot find it),
+      ``VORO_LIBRARY`` is the filename (plus path) of the Voro++ library
+      file, not the directory the library file is in.
+      ``VORO_INCLUDE_DIR`` is the directory the Voro++ include file is
+      in.
 
-Traditional make
-^^^^^^^^^^^^^^^^
+   .. tab:: Traditional make
 
-You can download and build the Voro++ library manually if you prefer;
-follow the instructions in ``lib/voronoi/README``.  You can also do it in
-one step from the ``lammps/src`` dir, using a command like these, which
-simply invoke the ``lib/voronoi/Install.py`` script with the specified
-args:
+      You can download and build the Voro++ library manually if you
+      prefer; follow the instructions in ``lib/voronoi/README``.  You
+      can also do it in one step from the ``lammps/src`` dir, using a
+      command like these, which simply invoke the
+      ``lib/voronoi/Install.py`` script with the specified args:
 
-.. code-block:: bash
+      .. code-block:: bash
 
-  $ make lib-voronoi                          # print help message
-  $ make lib-voronoi args="-b"                # download and build the default version in lib/voronoi/voro++-<version>
-  $ make lib-voronoi args="-p $HOME/voro++"   # use existing Voro++ installation in $HOME/voro++
-  $ make lib-voronoi args="-b -v voro++0.4.6" # download and build the 0.4.6 version in lib/voronoi/voro++-0.4.6
+         $ make lib-voronoi                          # print help message
+         $ make lib-voronoi args="-b"                # download and build the default version in lib/voronoi/voro++-<version>
+         $ make lib-voronoi args="-p $HOME/voro++"   # use existing Voro++ installation in $HOME/voro++
+         $ make lib-voronoi args="-b -v voro++0.4.6" # download and build the 0.4.6 version in lib/voronoi/voro++-0.4.6
 
-Note that 2 symbolic (soft) links, ``includelink`` and ``liblink``, are
-created in lib/voronoi to point to the Voro++ source dir.  When LAMMPS
-builds in ``src`` it will use these links.  You should not need to edit
-the ``lib/voronoi/Makefile.lammps`` file.
+      Note that 2 symbolic (soft) links, ``includelink`` and
+      ``liblink``, are created in lib/voronoi to point to the Voro++
+      source dir.  When LAMMPS builds in ``src`` it will use these
+      links.  You should not need to edit the
+      ``lib/voronoi/Makefile.lammps`` file.
 
 ----------
 
@@ -903,29 +941,30 @@ environment variables have been updated for the local ADIOS installation
 and the instructions below are followed for the respective build
 systems.
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-.. code-block:: bash
+   .. tab:: CMake build
 
-   -D ADIOS2_DIR=path        # path is where ADIOS 2.x is installed
-   -D PKG_USER-ADIOS=yes
+      .. code-block:: bash
 
-Traditional make
-^^^^^^^^^^^^^^^^
+         -D ADIOS2_DIR=path        # path is where ADIOS 2.x is installed
+         -D PKG_USER-ADIOS=yes
 
-Turn on the USER-ADIOS package before building LAMMPS. If the ADIOS 2.x
-software is installed in PATH, there is nothing else to do:
+   .. tab:: Traditional make
 
-.. code-block:: bash
+      Turn on the USER-ADIOS package before building LAMMPS. If the
+      ADIOS 2.x software is installed in PATH, there is nothing else to
+      do:
 
-  $ make yes-user-adios
+      .. code-block:: bash
 
-otherwise, set ADIOS2_DIR environment variable when turning on the package:
+         $ make yes-user-adios
 
-.. code-block:: bash
+      otherwise, set ADIOS2_DIR environment variable when turning on the package:
 
-  $ ADIOS2_DIR=path make yes-user-adios   # path is where ADIOS 2.x is installed
+      .. code-block:: bash
+
+         $ ADIOS2_DIR=path make yes-user-adios   # path is where ADIOS 2.x is installed
 
 ----------
 
@@ -936,48 +975,50 @@ USER-ATC package
 
 The USER-ATC package requires the MANYBODY package also be installed.
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-No additional settings are needed besides "-D PKG_USER-ATC=yes"
-and "-D PKG_MANYBODY=yes".
+   .. tab:: CMake build
 
-Traditional make
-^^^^^^^^^^^^^^^^
+      No additional settings are needed besides ``-D PKG_USER-ATC=yes``
+      and ``-D PKG_MANYBODY=yes``.
 
-Before building LAMMPS, you must build the ATC library in ``lib/atc``.
-You can do this manually if you prefer; follow the instructions in
-``lib/atc/README``.  You can also do it in one step from the
-``lammps/src`` dir, using a command like these, which simply invoke the
-``lib/atc/Install.py`` script with the specified args:
+   .. tab:: Traditional make
 
-.. code-block:: bash
+      Before building LAMMPS, you must build the ATC library in
+      ``lib/atc``.  You can do this manually if you prefer; follow the
+      instructions in ``lib/atc/README``.  You can also do it in one
+      step from the ``lammps/src`` dir, using a command like these,
+      which simply invoke the ``lib/atc/Install.py`` script with the
+      specified args:
 
-  $ make lib-atc                      # print help message
-  $ make lib-atc args="-m serial"     # build with GNU g++ compiler and MPI STUBS (settings as with "make serial")
-  $ make lib-atc args="-m mpi"        # build with default MPI compiler (settings as with "make mpi")
-  $ make lib-atc args="-m icc"        # build with Intel icc compiler
+      .. code-block:: bash
 
-The build should produce two files: ``lib/atc/libatc.a`` and
-``lib/atc/Makefile.lammps``.  The latter is copied from an existing
-``Makefile.lammps.*`` and has settings needed to build LAMMPS with the
-ATC library.  If necessary, you can edit/create a new
-``lib/atc/Makefile.machine`` file for your system, which should define
-an ``EXTRAMAKE`` variable to specify a corresponding
-``Makefile.lammps.<machine>`` file.
+         $ make lib-atc                      # print help message
+         $ make lib-atc args="-m serial"     # build with GNU g++ compiler and MPI STUBS (settings as with "make serial")
+         $ make lib-atc args="-m mpi"        # build with default MPI compiler (settings as with "make mpi")
+         $ make lib-atc args="-m icc"        # build with Intel icc compiler
 
-Note that the Makefile.lammps file has settings for the BLAS and
-LAPACK linear algebra libraries.  As explained in ``lib/atc/README`` these
-can either exist on your system, or you can use the files provided in
-``lib/linalg``.  In the latter case you also need to build the library in
-``lib/linalg`` with a command like these:
+      The build should produce two files: ``lib/atc/libatc.a`` and
+      ``lib/atc/Makefile.lammps``.  The latter is copied from an
+      existing ``Makefile.lammps.*`` and has settings needed to build
+      LAMMPS with the ATC library.  If necessary, you can edit/create a
+      new ``lib/atc/Makefile.machine`` file for your system, which
+      should define an ``EXTRAMAKE`` variable to specify a corresponding
+      ``Makefile.lammps.<machine>`` file.
 
-.. code-block:: bash
+      Note that the Makefile.lammps file has settings for the BLAS and
+      LAPACK linear algebra libraries.  As explained in
+      ``lib/atc/README`` these can either exist on your system, or you
+      can use the files provided in ``lib/linalg``.  In the latter case
+      you also need to build the library in ``lib/linalg`` with a
+      command like these:
 
-  $ make lib-linalg                     # print help message
-  $ make lib-linalg args="-m serial"    # build with GNU Fortran compiler (settings as with "make serial")
-  $ make lib-linalg args="-m mpi"       # build with default MPI Fortran compiler (settings as with "make mpi")
-  $ make lib-linalg args="-m gfortran"  # build with GNU Fortran compiler
+      .. code-block:: bash
+
+         $ make lib-linalg                     # print help message
+         $ make lib-linalg args="-m serial"    # build with GNU Fortran compiler (settings as with "make serial")
+         $ make lib-linalg args="-m mpi"       # build with default MPI Fortran compiler (settings as with "make mpi")
+         $ make lib-linalg args="-m gfortran"  # build with GNU Fortran compiler
 
 ----------
 
@@ -986,47 +1027,49 @@ can either exist on your system, or you can use the files provided in
 USER-AWPMD package
 ------------------
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-No additional settings are needed besides ``-D PKG_USER-AQPMD=yes``.
+   .. tab:: CMake build
 
-Traditional make
-^^^^^^^^^^^^^^^^
+      No additional settings are needed besides ``-D PKG_USER-AQPMD=yes``.
 
-Before building LAMMPS, you must build the AWPMD library in ``lib/awpmd``.
-You can do this manually if you prefer; follow the instructions in
-``lib/awpmd/README``.  You can also do it in one step from the ``lammps/src``
-dir, using a command like these, which simply invoke the
-``lib/awpmd/Install.py`` script with the specified args:
+   .. tab:: Traditional make
 
-.. code-block:: bash
+      Before building LAMMPS, you must build the AWPMD library in
+      ``lib/awpmd``.  You can do this manually if you prefer; follow the
+      instructions in ``lib/awpmd/README``.  You can also do it in one
+      step from the ``lammps/src`` dir, using a command like these,
+      which simply invoke the ``lib/awpmd/Install.py`` script with the
+      specified args:
 
-  $ make lib-awpmd                   # print help message
-  $ make lib-awpmd args="-m serial"  # build with GNU g++ compiler and MPI STUBS (settings as with "make serial")
-  $ make lib-awpmd args="-m mpi"     # build with default MPI compiler (settings as with "make mpi")
-  $ make lib-awpmd args="-m icc"     # build with Intel icc compiler
+      .. code-block:: bash
 
-The build should produce two files: ``lib/awpmd/libawpmd.a`` and
-``lib/awpmd/Makefile.lammps``.  The latter is copied from an existing
-``Makefile.lammps.*`` and has settings needed to build LAMMPS with the
-AWPMD library.  If necessary, you can edit/create a new
-``lib/awpmd/Makefile.machine`` file for your system, which should define
-an ``EXTRAMAKE`` variable to specify a corresponding
-``Makefile.lammps.<machine>`` file.
+         $ make lib-awpmd                   # print help message
+         $ make lib-awpmd args="-m serial"  # build with GNU g++ compiler and MPI STUBS (settings as with "make serial")
+         $ make lib-awpmd args="-m mpi"     # build with default MPI compiler (settings as with "make mpi")
+         $ make lib-awpmd args="-m icc"     # build with Intel icc compiler
 
-Note that the ``Makefile.lammps`` file has settings for the BLAS and
-LAPACK linear algebra libraries.  As explained in ``lib/awpmd/README``
-these can either exist on your system, or you can use the files
-provided in ``lib/linalg``.  In the latter case you also need to build the
-library in ``lib/linalg`` with a command like these:
+      The build should produce two files: ``lib/awpmd/libawpmd.a`` and
+      ``lib/awpmd/Makefile.lammps``.  The latter is copied from an
+      existing ``Makefile.lammps.*`` and has settings needed to build
+      LAMMPS with the AWPMD library.  If necessary, you can edit/create
+      a new ``lib/awpmd/Makefile.machine`` file for your system, which
+      should define an ``EXTRAMAKE`` variable to specify a corresponding
+      ``Makefile.lammps.<machine>`` file.
 
-.. code-block:: bash
+      Note that the ``Makefile.lammps`` file has settings for the BLAS
+      and LAPACK linear algebra libraries.  As explained in
+      ``lib/awpmd/README`` these can either exist on your system, or you
+      can use the files provided in ``lib/linalg``.  In the latter case
+      you also need to build the library in ``lib/linalg`` with a
+      command like these:
 
-  $ make lib-linalg                     # print help message
-  $ make lib-linalg args="-m serial"    # build with GNU Fortran compiler (settings as with "make serial")
-  $ make lib-linalg args="-m mpi"       # build with default MPI Fortran compiler (settings as with "make mpi")
-  $ make lib-linalg args="-m gfortran"  # build with GNU Fortran compiler
+      .. code-block:: bash
+
+         $ make lib-linalg                     # print help message
+         $ make lib-linalg args="-m serial"    # build with GNU Fortran compiler (settings as with "make serial")
+         $ make lib-linalg args="-m mpi"       # build with default MPI Fortran compiler (settings as with "make mpi")
+         $ make lib-linalg args="-m gfortran"  # build with GNU Fortran compiler
 
 ----------
 
@@ -1035,54 +1078,57 @@ library in ``lib/linalg`` with a command like these:
 USER-COLVARS package
 ---------------------------------------
 
-This package includes into the LAMMPS distribution the Colvars library, which
-can be built for the most part with all major versions of the C++ language.
+This package includes the `Colvars library
+<https://colvars.github.io/>`_ into the LAMMPS distribution, which can
+be built for the most part with all major versions of the C++ language.
 
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-This is the recommended build recipe: no additional settings are normally
-needed besides ``-D PKG_USER-COLVARS=yes``.
+   .. tab:: CMake build
 
-Traditional make
-^^^^^^^^^^^^^^^^
+      This is the recommended build procedure for using Colvars in
+      LAMMPS. No additional settings are normally needed besides
+      ``-D PKG_USER-COLVARS=yes``.
 
-Before building LAMMPS, one must build the Colvars library in lib/colvars.
+   .. tab:: Traditional make
 
-This can be done manually in the same folder by using or adapting one of
-the provided Makefiles: for example, ``Makefile.g++`` for the GNU C++
-compiler.  C++11 compatibility may need to be enabled for some older
-compilers (as is done in the example makefile).
+      Before building LAMMPS, one must build the Colvars library in lib/colvars.
 
-In general, it is safer to use build setting consistent with the rest of
-LAMMPS.  This is best carried out from the LAMMPS src directory using a
-command like these, which simply invoke the ``lib/colvars/Install.py`` script with
-the specified args:
+      This can be done manually in the same folder by using or adapting
+      one of the provided Makefiles: for example, ``Makefile.g++`` for
+      the GNU C++ compiler.  C++11 compatibility may need to be enabled
+      for some older compilers (as is done in the example makefile).
 
-.. code-block:: bash
+      In general, it is safer to use build setting consistent with the
+      rest of LAMMPS.  This is best carried out from the LAMMPS src
+      directory using a command like these, which simply invoke the
+      ``lib/colvars/Install.py`` script with the specified args:
 
-  $ make lib-colvars                      # print help message
-  $ make lib-colvars args="-m serial"     # build with GNU g++ compiler (settings as with "make serial")
-  $ make lib-colvars args="-m mpi"        # build with default MPI compiler (settings as with "make mpi")
-  $ make lib-colvars args="-m g++-debug"  # build with GNU g++ compiler and colvars debugging enabled
+      .. code-block:: bash
 
-The "machine" argument of the "-m" flag is used to find a Makefile.machine to
-use as build recipe.  If it does not already exist in ``lib/colvars``, it will be
-auto-generated by using compiler flags consistent with those parsed from the
-core LAMMPS makefiles.
+         $ make lib-colvars                      # print help message
+         $ make lib-colvars args="-m serial"     # build with GNU g++ compiler (settings as with "make serial")
+         $ make lib-colvars args="-m mpi"        # build with default MPI compiler (settings as with "make mpi")
+         $ make lib-colvars args="-m g++-debug"  # build with GNU g++ compiler and colvars debugging enabled
 
-Optional flags may be specified as environment variables:
+      The "machine" argument of the "-m" flag is used to find a
+      Makefile.machine to use as build recipe.  If it does not already
+      exist in ``lib/colvars``, it will be auto-generated by using
+      compiler flags consistent with those parsed from the core LAMMPS
+      makefiles.
 
-.. code-block:: bash
+      Optional flags may be specified as environment variables:
 
-    $ COLVARS_DEBUG=yes make lib-colvars args="-m machine"  # Build with debug code (much slower)
-    $ COLVARS_LEPTON=no make lib-colvars args="-m machine"  # Build without Lepton (included otherwise)
+      .. code-block:: bash
 
-The build should produce two files: the library ``lib/colvars/libcolvars.a``
-(which also includes Lepton objects if enabled) and the specification file
-``lib/colvars/Makefile.lammps``.  The latter is auto-generated, and normally does
-not need to be edited.
+         $ COLVARS_DEBUG=yes make lib-colvars args="-m machine"  # Build with debug code (much slower)
+         $ COLVARS_LEPTON=no make lib-colvars args="-m machine"  # Build without Lepton (included otherwise)
+
+      The build should produce two files: the library ``lib/colvars/libcolvars.a``
+      (which also includes Lepton objects if enabled) and the specification file
+      ``lib/colvars/Makefile.lammps``.  The latter is auto-generated, and normally does
+      not need to be edited.
 
 ----------
 
@@ -1128,84 +1174,91 @@ try a different one, switch to a different build system, consider a
 global PLUMED installation or consider downloading PLUMED during the
 LAMMPS build.
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-When the ``-D PKG_USER-PLUMED=yes`` flag is included in the cmake
-command you must ensure that GSL is installed in locations that are
-specified in your environment.  There are then two additional variables
-that control the manner in which PLUMED is obtained and linked into
-LAMMPS.
+   .. tab:: CMake build
 
-.. code-block:: bash
+      When the ``-D PKG_USER-PLUMED=yes`` flag is included in the cmake
+      command you must ensure that GSL is installed in locations that
+      are specified in your environment.  There are then two additional
+      variables that control the manner in which PLUMED is obtained and
+      linked into LAMMPS.
 
-   -D DOWNLOAD_PLUMED=value   # download PLUMED for build, value = no (default) or yes
-   -D PLUMED_MODE=value       # Linkage mode for PLUMED, value = static (default), shared, or runtime
+      .. code-block:: bash
 
-If DOWNLOAD_PLUMED is set to "yes", the PLUMED library will be
-downloaded (the version of PLUMED that will be downloaded is hard-coded
-to a vetted version of PLUMED, usually a recent stable release version)
-and built inside the CMake build directory.  If ``DOWNLOAD_PLUMED`` is
-set to "no" (the default), CMake will try to detect and link to an
-installed version of PLUMED.  For this to work, the PLUMED library has
-to be installed into a location where the ``pkg-config`` tool can find
-it or the PKG_CONFIG_PATH environment variable has to be set up
-accordingly.  PLUMED should be installed in such a location if you
-compile it using the default make; make install commands.
+         -D DOWNLOAD_PLUMED=value   # download PLUMED for build, value = no (default) or yes
+         -D PLUMED_MODE=value       # Linkage mode for PLUMED, value = static (default), shared, or runtime
 
-The ``PLUMED_MODE`` setting determines the linkage mode for the PLUMED
-library.  The allowed values for this flag are "static" (default),
-"shared", or "runtime".  For a discussion of PLUMED linkage modes,
-please see above.  When ``DOWNLOAD_PLUMED`` is enabled the static
-linkage mode is recommended.
+      If DOWNLOAD_PLUMED is set to "yes", the PLUMED library will be
+      downloaded (the version of PLUMED that will be downloaded is
+      hard-coded to a vetted version of PLUMED, usually a recent stable
+      release version) and built inside the CMake build directory.  If
+      ``DOWNLOAD_PLUMED`` is set to "no" (the default), CMake will try
+      to detect and link to an installed version of PLUMED.  For this to
+      work, the PLUMED library has to be installed into a location where
+      the ``pkg-config`` tool can find it or the PKG_CONFIG_PATH
+      environment variable has to be set up accordingly.  PLUMED should
+      be installed in such a location if you compile it using the
+      default make; make install commands.
 
-Traditional make
-^^^^^^^^^^^^^^^^
+      The ``PLUMED_MODE`` setting determines the linkage mode for the
+      PLUMED library.  The allowed values for this flag are "static"
+      (default), "shared", or "runtime".  If you want to switch the
+      linkage mode, just re-run CMake with a different setting. For a
+      discussion of PLUMED linkage modes, please see above.  When
+      ``DOWNLOAD_PLUMED`` is enabled the static linkage mode is
+      recommended.
 
-PLUMED needs to be installed before the USER-PLUMED package is installed
-so that LAMMPS can find the right settings when compiling and linking
-the LAMMPS executable.  You can either download and build PLUMED inside
-the LAMMPS plumed library folder or use a previously installed PLUMED
-library and point LAMMPS to its location. You also have to choose the
-linkage mode: "static" (default), "shared" or "runtime".  For a
-discussion of PLUMED linkage modes, please see above.
+   .. tab:: Traditional make
 
-Download/compilation/configuration of the plumed library can be done
-from the src folder through the following make args:
+      PLUMED needs to be installed before the USER-PLUMED package is
+      installed so that LAMMPS can find the right settings when
+      compiling and linking the LAMMPS executable.  You can either
+      download and build PLUMED inside the LAMMPS plumed library folder
+      or use a previously installed PLUMED library and point LAMMPS to
+      its location. You also have to choose the linkage mode: "static"
+      (default), "shared" or "runtime".  For a discussion of PLUMED
+      linkage modes, please see above.
 
-.. code-block:: bash
+      Download/compilation/configuration of the plumed library can be done
+      from the src folder through the following make args:
 
-  $ make lib-plumed                         # print help message
-  $ make lib-plumed args="-b"               # download and build PLUMED in lib/plumed/plumed2
-  $ make lib-plumed args="-p $HOME/.local"  # use existing PLUMED installation in $HOME/.local
-  $ make lib-plumed args="-p /usr/local -m shared"  # use existing PLUMED installation in
-                                                   # /usr/local and use shared linkage mode
+      .. code-block:: bash
 
-Note that 2 symbolic (soft) links, ``includelink`` and ``liblink`` are
-created in lib/plumed that point to the location of the PLUMED build to
-use. A new file ``lib/plumed/Makefile.lammps`` is also created with settings
-suitable for LAMMPS to compile and link PLUMED using the desired linkage
-mode. After this step is completed, you can install the USER-PLUMED
-package and compile LAMMPS in the usual manner:
+         $ make lib-plumed                         # print help message
+         $ make lib-plumed args="-b"               # download and build PLUMED in lib/plumed/plumed2
+         $ make lib-plumed args="-p $HOME/.local"  # use existing PLUMED installation in $HOME/.local
+         $ make lib-plumed args="-p /usr/local -m shared"  # use existing PLUMED installation in
+                                                           # /usr/local and use shared linkage mode
 
-.. code-block:: bash
+      Note that 2 symbolic (soft) links, ``includelink`` and ``liblink``
+      are created in lib/plumed that point to the location of the PLUMED
+      build to use. A new file ``lib/plumed/Makefile.lammps`` is also
+      created with settings suitable for LAMMPS to compile and link
+      PLUMED using the desired linkage mode. After this step is
+      completed, you can install the USER-PLUMED package and compile
+      LAMMPS in the usual manner:
 
-  $ make yes-user-plumed
-  $ make machine
+      .. code-block:: bash
 
-Once this compilation completes you should be able to run LAMMPS in the
-usual way.  For shared linkage mode, libplumed.so must be found by the
-LAMMPS executable, which on many operating systems means, you have to
-set the LD_LIBRARY_PATH environment variable accordingly.
+         $ make yes-user-plumed
+         $ make machine
 
-Support for the different linkage modes in LAMMPS varies for different
-operating systems, using the static linkage is expected to be the most
-portable, and thus set to be the default.
+      Once this compilation completes you should be able to run LAMMPS
+      in the usual way.  For shared linkage mode, libplumed.so must be
+      found by the LAMMPS executable, which on many operating systems
+      means, you have to set the LD_LIBRARY_PATH environment variable
+      accordingly.
 
-If you want to change the linkage mode, you have to re-run "make
-lib-plumed" with the desired settings **and** do a re-install if the
-USER-PLUMED package with "make yes-user-plumed" to update the required
-makefile settings with the changes in the lib/plumed folder.
+      Support for the different linkage modes in LAMMPS varies for
+      different operating systems, using the static linkage is expected
+      to be the most portable, and thus set to be the default.
+
+      If you want to change the linkage mode, you have to re-run "make
+      lib-plumed" with the desired settings **and** do a re-install if
+      the USER-PLUMED package with "make yes-user-plumed" to update the
+      required makefile settings with the changes in the lib/plumed
+      folder.
 
 ----------
 
@@ -1218,38 +1271,39 @@ To build with this package you must have the HDF5 software package
 installed on your system, which should include the h5cc compiler and
 the HDF5 library.
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-No additional settings are needed besides ``-D PKG_USER-H5MD=yes``.
+   .. tab:: CMake build
 
-This should auto-detect the H5MD library on your system.  Several
-advanced CMake H5MD options exist if you need to specify where it is
-installed.  Use the ccmake (terminal window) or cmake-gui (graphical)
-tools to see these options and set them interactively from their user
-interfaces.
+      No additional settings are needed besides ``-D PKG_USER-H5MD=yes``.
 
-Traditional make
-^^^^^^^^^^^^^^^^
+      This should auto-detect the H5MD library on your system.  Several
+      advanced CMake H5MD options exist if you need to specify where it
+      is installed.  Use the ccmake (terminal window) or cmake-gui
+      (graphical) tools to see these options and set them interactively
+      from their user interfaces.
 
-Before building LAMMPS, you must build the CH5MD library in
-``lib/h5md``.  You can do this manually if you prefer; follow the
-instructions in ``lib/h5md/README``.  You can also do it in one step
-from the ``lammps/src`` dir, using a command like these, which simply
-invoke the ``lib/h5md/Install.py`` script with the specified args:
+   .. tab:: Traditional make
 
-.. code-block:: bash
+      Before building LAMMPS, you must build the CH5MD library in
+      ``lib/h5md``.  You can do this manually if you prefer; follow the
+      instructions in ``lib/h5md/README``.  You can also do it in one
+      step from the ``lammps/src`` dir, using a command like these,
+      which simply invoke the ``lib/h5md/Install.py`` script with the
+      specified args:
 
-  $ make lib-h5md                     # print help message
-  $ make lib-h5md args="-m h5cc"      # build with h5cc compiler
+      .. code-block:: bash
 
-The build should produce two files: ``lib/h5md/libch5md.a`` and
-``lib/h5md/Makefile.lammps``.  The latter is copied from an existing
-``Makefile.lammps.*`` and has settings needed to build LAMMPS with the
-system HDF5 library.  If necessary, you can edit/create a new
-``lib/h5md/Makefile.machine`` file for your system, which should define
-an EXTRAMAKE variable to specify a corresponding
-``Makefile.lammps.<machine>`` file.
+         $ make lib-h5md                     # print help message
+         $ make lib-h5md args="-m h5cc"      # build with h5cc compiler
+
+      The build should produce two files: ``lib/h5md/libch5md.a`` and
+      ``lib/h5md/Makefile.lammps``.  The latter is copied from an
+      existing ``Makefile.lammps.*`` and has settings needed to build
+      LAMMPS with the system HDF5 library.  If necessary, you can
+      edit/create a new ``lib/h5md/Makefile.machine`` file for your
+      system, which should define an EXTRAMAKE variable to specify a
+      corresponding ``Makefile.lammps.<machine>`` file.
 
 ----------
 
@@ -1273,51 +1327,53 @@ incorrectly and thus can cause segmentation faults in otherwise correct
 code when using features from the USER-INTEL package.
 
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-.. code-block:: bash
+   .. tab:: CMake build
 
-   -D INTEL_ARCH=value     # value = cpu (default) or knl
-   -D INTEL_LRT_MODE=value # value = threads, none, or c++11
+      .. code-block:: bash
+
+         -D INTEL_ARCH=value     # value = cpu (default) or knl
+         -D INTEL_LRT_MODE=value # value = threads, none, or c++11
+
+   .. tab:: Traditional make
+
+      Choose which hardware to compile for in Makefile.machine via the
+      following settings.  See ``src/MAKE/OPTIONS/Makefile.intel_cpu*``
+      and ``Makefile.knl`` files for examples. and
+      ``src/USER-INTEL/README`` for additional information.
+
+      For CPUs:
+
+      .. code-block:: make
+
+         OPTFLAGS =      -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits -qopt-zmm-usage=high
+         CCFLAGS =       -g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload -fno-alias -ansi-alias -restrict $(OPTFLAGS)
+         LINKFLAGS =     -g -qopenmp $(OPTFLAGS)
+         LIB =           -ltbbmalloc
+
+      For KNLs:
+
+      .. code-block:: make
+
+         OPTFLAGS =      -xMIC-AVX512 -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
+         CCFLAGS =       -g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload -fno-alias -ansi-alias -restrict $(OPTFLAGS)
+         LINKFLAGS =     -g -qopenmp $(OPTFLAGS)
+         LIB =           -ltbbmalloc
 
 In Long-range thread mode (LRT) a modified verlet style is used, that
 operates the Kspace calculation in a separate thread concurrently to
-other calculations. This has to be enabled in the :doc:`package intel <package>`
-command at runtime. With the setting "threads" it used the pthreads
-library, while c++11 will use the built-in thread support of C++11
-compilers. The option "none" skips compilation of this feature. The
-default is to use "threads" if pthreads is available and otherwise "none".
+other calculations. This has to be enabled in the :doc:`package intel
+<package>` command at runtime. With the setting "threads" it used the
+pthreads library, while "c++11" will use the built-in thread support
+of C++11 compilers. The option "none" skips compilation of this
+feature. The default is to use "threads" if pthreads is available and
+otherwise "none".
 
-Best performance is achieved with Intel hardware, Intel compilers, as well as
-the Intel TBB and MKL libraries. However, the code also compiles, links, and
-runs with other compilers and without TBB and MKL.
-
-Traditional make
-^^^^^^^^^^^^^^^^
-
-Choose which hardware to compile for in Makefile.machine via the
-following settings.  See ``src/MAKE/OPTIONS/Makefile.intel_cpu*`` and
-``Makefile.knl`` files for examples. and ``src/USER-INTEL/README`` for
-additional information.
-
-For CPUs:
-
-.. code-block:: make
-
-   OPTFLAGS =      -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits -qopt-zmm-usage=high
-   CCFLAGS =       -g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload -fno-alias -ansi-alias -restrict $(OPTFLAGS)
-   LINKFLAGS =     -g -qopenmp $(OPTFLAGS)
-   LIB =           -ltbbmalloc
-
-For KNLs:
-
-.. code-block:: make
-
-   OPTFLAGS =      -xMIC-AVX512 -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
-   CCFLAGS =       -g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload -fno-alias -ansi-alias -restrict $(OPTFLAGS)
-   LINKFLAGS =     -g -qopenmp $(OPTFLAGS)
-   LIB =           -ltbbmalloc
+Best performance is achieved with Intel hardware, Intel compilers, as
+well as the Intel TBB and MKL libraries. However, the code also
+compiles, links, and runs with other compilers / hardware and without
+TBB and MKL.
 
 ----------
 
@@ -1332,31 +1388,34 @@ compile it.  Also, the files with the force field data for running the
 bundled examples are not included in the source distribution. Instead
 they will be downloaded the first time this package is installed.
 
-**CMake build**\ :
+.. tabs::
 
-No additional settings are needed besides ``-D PKG_USER-MESONT=yes``
+   .. tab:: CMake build
 
-**Traditional make**\ :
+      No additional settings are needed besides ``-D PKG_USER-MESONT=yes``
 
-Before building LAMMPS, you must build the *mesont* library in ``lib/mesont``\ .
-You can also do it in one step from the ``lammps/src`` dir, using a command like
-these, which simply invoke the ``lib/mesont/Install.py`` script with the specified
-args:
+   .. tab:: Traditional make
 
-.. code-block:: bash
+      Before building LAMMPS, you must build the *mesont* library in
+      ``lib/mesont``\ .  You can also do it in one step from the
+      ``lammps/src`` dir, using a command like these, which simply
+      invoke the ``lib/mesont/Install.py`` script with the specified
+      args:
 
-  $ make lib-mesont                    # print help message
-  $ make lib-mesont args="-m gfortran" # build with GNU g++ compiler (settings as with "make serial")
-  $ make lib-mesont args="-m ifort"    # build with Intel icc compiler
+      .. code-block:: bash
 
-The build should produce two files: ``lib/mesont/libmesont.a`` and
-``lib/mesont/Makefile.lammps``\ .  The latter is copied from an existing
-``Makefile.lammps.\*`` and has settings needed to build LAMMPS with the
-*mesont* library (though typically the settings contain only the Fortran
-runtime library).  If necessary, you can edit/create a new
-``lib/mesont/Makefile.machine`` file for your system, which should
-define an ``EXTRAMAKE`` variable to specify a corresponding
-``Makefile.lammps.machine`` file.
+         $ make lib-mesont                    # print help message
+         $ make lib-mesont args="-m gfortran" # build with GNU g++ compiler (settings as with "make serial")
+         $ make lib-mesont args="-m ifort"    # build with Intel icc compiler
+
+      The build should produce two files: ``lib/mesont/libmesont.a`` and
+      ``lib/mesont/Makefile.lammps``\ .  The latter is copied from an
+      existing ``Makefile.lammps.\*`` and has settings needed to build
+      LAMMPS with the *mesont* library (though typically the settings
+      contain only the Fortran runtime library).  If necessary, you can
+      edit/create a new ``lib/mesont/Makefile.machine`` file for your
+      system, which should define an ``EXTRAMAKE`` variable to specify a
+      corresponding ``Makefile.lammps.machine`` file.
 
 ----------
 
@@ -1365,35 +1424,36 @@ define an ``EXTRAMAKE`` variable to specify a corresponding
 USER-MOLFILE package
 ---------------------------------------
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-.. code-block:: bash
+   .. tab:: CMake build
 
-   -D MOLFILE_INCLUDE_DIRS=path   # (optional) path where VMD molfile plugin headers are installed
-   -D PKG_USER-MOLFILE=yes
+      .. code-block:: bash
 
-Using "-D PKG_USER-MOLFILE=yes" enables the package, and setting
-"-D MOLFILE_INCLUDE DIRS" allows to provide a custom location for
-the molfile plugin header files. These should match the ABI of the
-plugin files used, and thus one typically sets them to include
-folder of the local VMD installation in use. LAMMPS ships with a
-couple of default header files that correspond to a popular VMD
-version, usually the latest release.
+         -D MOLFILE_INCLUDE_DIR=path   # (optional) path where VMD molfile plugin headers are installed
+         -D PKG_USER-MOLFILE=yes
 
-Traditional make
-^^^^^^^^^^^^^^^^
+      Using ``-D PKG_USER-MOLFILE=yes`` enables the package, and setting
+      ``-D MOLFILE_INCLUDE_DIR`` allows to provide a custom location for
+      the molfile plugin header files. These should match the ABI of the
+      plugin files used, and thus one typically sets them to include
+      folder of the local VMD installation in use. LAMMPS ships with a
+      couple of default header files that correspond to a popular VMD
+      version, usually the latest release.
 
-The ``lib/molfile/Makefile.lammps`` file has a setting for a dynamic
-loading library libdl.a that is typically present on all systems.  It
-is required for LAMMPS to link with this package.  If the setting is
-not valid for your system, you will need to edit the Makefile.lammps
-file.  See ``lib/molfile/README`` and ``lib/molfile/Makefile.lammps`` for
-details. It is also possible to configure a different folder with
-the VMD molfile plugin header files. LAMMPS ships with a couple of
-default headers, but these are not compatible with all VMD versions,
-so it is often best to change this setting to the location of the
-same include files of the local VMD installation in use.
+   .. tab:: Traditional make
+
+      The ``lib/molfile/Makefile.lammps`` file has a setting for a
+      dynamic loading library libdl.a that is typically present on all
+      systems.  It is required for LAMMPS to link with this package.  If
+      the setting is not valid for your system, you will need to edit
+      the Makefile.lammps file.  See ``lib/molfile/README`` and
+      ``lib/molfile/Makefile.lammps`` for details. It is also possible
+      to configure a different folder with the VMD molfile plugin header
+      files. LAMMPS ships with a couple of default headers, but these
+      are not compatible with all VMD versions, so it is often best to
+      change this setting to the location of the same include files of
+      the local VMD installation in use.
 
 ----------
 
@@ -1405,24 +1465,26 @@ USER-NETCDF package
 To build with this package you must have the NetCDF library installed
 on your system.
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-No additional settings are needed besides ``-D PKG_USER-NETCDF=yes``.
+   .. tab:: CMake build
 
-This should auto-detect the NETCDF library if it is installed on your
-system at standard locations.  Several advanced CMake NETCDF options
-exist if you need to specify where it was installed.  Use the ``ccmake``
-(terminal window) or ``cmake-gui`` (graphical) tools to see these
-options and set them interactively from their user interfaces.
+      No additional settings are needed besides ``-D PKG_USER-NETCDF=yes``.
 
-Traditional make
-^^^^^^^^^^^^^^^^
+      This should auto-detect the NETCDF library if it is installed on
+      your system at standard locations.  Several advanced CMake NETCDF
+      options exist if you need to specify where it was installed.  Use
+      the ``ccmake`` (terminal window) or ``cmake-gui`` (graphical)
+      tools to see these options and set them interactively from their
+      user interfaces.
 
-The ``lib/netcdf/Makefile.lammps`` file has settings for NetCDF include
-and library files which LAMMPS needs to build with this package.  If
-the settings are not valid for your system, you will need to edit the
-``Makefile.lammps`` file.  See ``lib/netcdf/README`` for details.
+   .. tab:: Traditional make
+
+      The ``lib/netcdf/Makefile.lammps`` file has settings for NetCDF
+      include and library files which LAMMPS needs to build with this
+      package.  If the settings are not valid for your system, you will
+      need to edit the ``Makefile.lammps`` file.  See
+      ``lib/netcdf/README`` for details.
 
 ----------
 
@@ -1431,30 +1493,31 @@ the settings are not valid for your system, you will need to edit the
 USER-OMP package
 -------------------------------
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-No additional settings are required besides ``-D PKG_USER-OMP=yes``.  If
-CMake detects OpenMP support, the USER-OMP code will be compiled with
-multi-threading support enabled, otherwise as optimized serial code.
+   .. tab:: CMake build
 
-Traditional make
-^^^^^^^^^^^^^^^^
+      No additional settings are required besides ``-D
+      PKG_USER-OMP=yes``.  If CMake detects OpenMP compiler support, the
+      USER-OMP code will be compiled with multi-threading support
+      enabled, otherwise as optimized serial code.
 
-To enable multi-threading support in the USER-OMP package (and other
-styles supporting OpenMP) the following compile and link flags must be
-added to your Makefile.machine file.  See
-``src/MAKE/OPTIONS/Makefile.omp`` for an example.
+   .. tab:: Traditional make
 
-.. parsed-literal::
+      To enable multi-threading support in the USER-OMP package (and
+      other styles supporting OpenMP) the following compile and link
+      flags must be added to your Makefile.machine file.  See
+      ``src/MAKE/OPTIONS/Makefile.omp`` for an example.
 
-   CCFLAGS: -fopenmp               # for GNU and Clang Compilers
-   CCFLAGS: -qopenmp -restrict     # for Intel compilers on Linux
-   LINKFLAGS: -fopenmp             # for GNU and Clang Compilers
-   LINKFLAGS: -qopenmp             # for Intel compilers on Linux
+      .. parsed-literal::
 
-For other platforms and compilers, please consult the documentation
-about OpenMP support for your compiler.
+         CCFLAGS: -fopenmp               # for GNU and Clang Compilers
+         CCFLAGS: -qopenmp -restrict     # for Intel compilers on Linux
+         LINKFLAGS: -fopenmp             # for GNU and Clang Compilers
+         LINKFLAGS: -qopenmp             # for Intel compilers on Linux
+
+      For other platforms and compilers, please consult the
+      documentation about OpenMP support for your compiler.
 
 ----------
 
@@ -1476,58 +1539,60 @@ and LAMMPS versions.  The current interface and makefiles have last been
 verified to work in February 2020 with Quantum Espresso versions 6.3 to
 6.5.
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-When using CMake, building a LAMMPS library is required and it is
-recommended to build a shared library, since any libraries built from
-the sources in the *lib* folder (including the essential libqmmm.a)
-are not included in the static LAMMPS library and (currently) not
-installed, while their code is included in the shared LAMMPS library.
-Thus a typical command line to configure building LAMMPS for USER-QMMM
-would be:
+   .. tab:: CMake build
 
-.. code-block:: bash
+      When using CMake, building a LAMMPS library is required and it is
+      recommended to build a shared library, since any libraries built
+      from the sources in the *lib* folder (including the essential
+      libqmmm.a) are not included in the static LAMMPS library and
+      (currently) not installed, while their code is included in the
+      shared LAMMPS library.  Thus a typical command line to configure
+      building LAMMPS for USER-QMMM would be:
 
-    cmake -C ../cmake/presets/minimal.cmake -D PKG_USER-QMMM=yes \
-            -D BUILD_LIB=yes -DBUILD_SHARED_LIBS=yes ../cmake
+      .. code-block:: bash
 
-After completing the LAMMPS build and also configuring and compiling
-Quantum ESPRESSO with external library support (via "make couple"),
-go back to the ``lib/qmmm` folder and follow the instructions on the
-README file to build the combined LAMMPS/QE QM/MM executable
-(pwqmmm.x) in the ``lib/qmmm`` folder.  You need to make certain, that
+         cmake -C ../cmake/presets/minimal.cmake -D PKG_USER-QMMM=yes \
+             -D BUILD_LIB=yes -DBUILD_SHARED_LIBS=yes ../cmake
 
-Traditional make
-^^^^^^^^^^^^^^^^
+      After completing the LAMMPS build and also configuring and
+      compiling Quantum ESPRESSO with external library support (via
+      "make couple"), go back to the ``lib/qmmm`` folder and follow the
+      instructions on the README file to build the combined LAMMPS/QE
+      QM/MM executable (pwqmmm.x) in the ``lib/qmmm`` folder.
 
-Before building LAMMPS, you must build the QMMM library in ``lib/qmmm``.
-You can do this manually if you prefer; follow the first two steps
-explained in ``lib/qmmm/README``.  You can also do it in one step from
-the ``lammps/src`` dir, using a command like these, which simply invoke
-the ``lib/qmmm/Install.py`` script with the specified args:
+   .. tab:: Traditional make
 
-.. code-block:: bash
+      Before building LAMMPS, you must build the QMMM library in
+      ``lib/qmmm``.  You can do this manually if you prefer; follow the
+      first two steps explained in ``lib/qmmm/README``.  You can also do
+      it in one step from the ``lammps/src`` dir, using a command like
+      these, which simply invoke the ``lib/qmmm/Install.py`` script with
+      the specified args:
 
-  $ make lib-qmmm                      # print help message
-  $ make lib-qmmm args="-m serial"     # build with GNU Fortran compiler (settings as in "make serial")
-  $ make lib-qmmm args="-m mpi"        # build with default MPI compiler (settings as in "make mpi")
-  $ make lib-qmmm args="-m gfortran"   # build with GNU Fortran compiler
+      .. code-block:: bash
 
-The build should produce two files: ``lib/qmmm/libqmmm.a`` and
-``lib/qmmm/Makefile.lammps``.  The latter is copied from an existing
-``Makefile.lammps.*`` and has settings needed to build LAMMPS with the
-QMMM library (though typically the settings are just blank).  If
-necessary, you can edit/create a new ``lib/qmmm/Makefile.<machine>`` file
-for your system, which should define an ``EXTRAMAKE`` variable to
-specify a corresponding ``Makefile.lammps.<machine>`` file.
+         $ make lib-qmmm                      # print help message
+         $ make lib-qmmm args="-m serial"     # build with GNU Fortran compiler (settings as in "make serial")
+         $ make lib-qmmm args="-m mpi"        # build with default MPI compiler (settings as in "make mpi")
+         $ make lib-qmmm args="-m gfortran"   # build with GNU Fortran compiler
 
-You can then install QMMM package and build LAMMPS in the usual
-manner.  After completing the LAMMPS build and compiling Quantum
-ESPRESSO with external library support (via "make couple"), go back to
-the ``lib/qmmm`` folder and follow the instructions in the README file to
-build the combined LAMMPS/QE QM/MM executable (pwqmmm.x) in the
-lib/qmmm folder.
+      The build should produce two files: ``lib/qmmm/libqmmm.a`` and
+      ``lib/qmmm/Makefile.lammps``.  The latter is copied from an
+      existing ``Makefile.lammps.*`` and has settings needed to build
+      LAMMPS with the QMMM library (though typically the settings are
+      just blank).  If necessary, you can edit/create a new
+      ``lib/qmmm/Makefile.<machine>`` file for your system, which should
+      define an ``EXTRAMAKE`` variable to specify a corresponding
+      ``Makefile.lammps.<machine>`` file.
+
+      You can then install QMMM package and build LAMMPS in the usual
+      manner.  After completing the LAMMPS build and compiling Quantum
+      ESPRESSO with external library support (via "make couple"), go
+      back to the ``lib/qmmm`` folder and follow the instructions in the
+      README file to build the combined LAMMPS/QE QM/MM executable
+      (pwqmmm.x) in the lib/qmmm folder.
 
 ----------
 
@@ -1542,27 +1607,27 @@ potentials, additional files with specific licensing conditions need
 to be downloaded and configured.  See step 1 and step 1.1 in the
 ``lib/quip/README`` file for details on how to do this.
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-.. code-block:: bash
+   .. tab:: CMake build
 
-   -D QUIP_LIBRARY=path     # path to libquip.a (only needed if a custom location)
+      .. code-block:: bash
 
-CMake will not download and build the QUIP library.  But once you have
-done that, a CMake build of LAMMPS with ``-D PKG_USER-QUIP=yes`` should
-work.  Set QUIP_LIBRARY if CMake cannot find the QUIP library.
+         -D QUIP_LIBRARY=path     # path to libquip.a (only needed if a custom location)
 
-Traditional make
-^^^^^^^^^^^^^^^^
+      CMake will **not** download and build the QUIP library.  But once you have
+      done that, a CMake build of LAMMPS with ``-D PKG_USER-QUIP=yes`` should
+      work.  Set the ``QUIP_LIBRARY`` variable if CMake cannot find the QUIP library.
 
-The download/build procedure for the QUIP library, described in
-``lib/quip/README`` file requires setting two environment variables,
-QUIP_ROOT and QUIP_ARCH.  These are accessed by the
-lib/quip/Makefile.lammps file which is used when you compile and link
-LAMMPS with this package.  You should only need to edit
-``Makefile.lammps`` if the LAMMPS build can not use its settings to
-successfully build on your system.
+   .. tab:: Traditional make
+
+      The download/build procedure for the QUIP library, described in
+      ``lib/quip/README`` file requires setting two environment
+      variables, ``QUIP_ROOT`` and ``QUIP_ARCH``.  These are accessed by
+      the ``lib/quip/Makefile.lammps`` file which is used when you
+      compile and link LAMMPS with this package.  You should only need
+      to edit ``Makefile.lammps`` if the LAMMPS build can not use its
+      settings to successfully build on your system.
 
 ----------
 
@@ -1571,44 +1636,45 @@ successfully build on your system.
 USER-SCAFACOS package
 -----------------------------------------
 
-To build with this package, you must download and build the `ScaFaCoS
-Coulomb solver library <scafacos-home_>`_
+To build with this package, you must download and build the
+`ScaFaCoS Coulomb solver library <http://www.scafacos.de>`_
 
-.. _scafacos-home: http://www.scafacos.de
+.. tabs::
 
-CMake build
-^^^^^^^^^^^
+   .. tab:: CMake build
 
-.. code-block:: bash
+      .. code-block:: bash
 
-   -D DOWNLOAD_SCAFACOS=value    # download ScaFaCoS for build, value = no (default) or yes
-   -D SCAFACOS_LIBRARY=path      # ScaFaCos library file (only needed if at custom location)
-   -D SCAFACOS_INCLUDE_DIR=path  # ScaFaCoS include directory (only needed if at custom location)
+         -D DOWNLOAD_SCAFACOS=value    # download ScaFaCoS for build, value = no (default) or yes
+         -D SCAFACOS_LIBRARY=path      # ScaFaCos library file (only needed if at custom location)
+         -D SCAFACOS_INCLUDE_DIR=path  # ScaFaCoS include directory (only needed if at custom location)
 
-If DOWNLOAD_SCAFACOS is set, the ScaFaCoS library will be downloaded
-and built inside the CMake build directory.  If the ScaFaCoS library
-is already on your system (in a location CMake cannot find it),
-SCAFACOS_LIBRARY is the filename (plus path) of the ScaFaCoS library
-file, not the directory the library file is in.  SCAFACOS_INCLUDE_DIR
-is the directory the ScaFaCoS include file is in.
+      If ``DOWNLOAD_SCAFACOS`` is set, the ScaFaCoS library will be
+      downloaded and built inside the CMake build directory.  If the
+      ScaFaCoS library is already on your system (in a location CMake
+      cannot find it), ``SCAFACOS_LIBRARY`` is the filename (plus path) of
+      the ScaFaCoS library file, not the directory the library file is
+      in.  ``SCAFACOS_INCLUDE_DIR`` is the directory the ScaFaCoS include
+      file is in.
 
-Traditional make
-^^^^^^^^^^^^^^^^
+   .. tab:: Traditional make
 
-You can download and build the ScaFaCoS library manually if you
-prefer; follow the instructions in ``lib/scafacos/README``.  You can also
-do it in one step from the ``lammps/src`` dir, using a command like these,
-which simply invoke the ``lib/scafacos/Install.py`` script with the
-specified args:
+      You can download and build the ScaFaCoS library manually if you
+      prefer; follow the instructions in ``lib/scafacos/README``.  You
+      can also do it in one step from the ``lammps/src`` dir, using a
+      command like these, which simply invoke the
+      ``lib/scafacos/Install.py`` script with the specified args:
 
-make lib-scafacos                         # print help message
-make lib-scafacos args="-b"               # download and build in lib/scafacos/scafacos-<version>
-make lib-scafacos args="-p $HOME/scafacos  # use existing ScaFaCoS installation in $HOME/scafacos
+      .. code-block:: bash
 
-Note that 2 symbolic (soft) links, ``includelink`` and ``liblink``, are
-created in ``lib/scafacos`` to point to the ScaFaCoS src dir.  When LAMMPS
-builds in src it will use these links.  You should not need to edit
-the ``lib/scafacos/Makefile.lammps`` file.
+         make lib-scafacos                         # print help message
+         make lib-scafacos args="-b"               # download and build in lib/scafacos/scafacos-<version>
+         make lib-scafacos args="-p $HOME/scafacos  # use existing ScaFaCoS installation in $HOME/scafacos
+
+      Note that 2 symbolic (soft) links, ``includelink`` and ``liblink``, are
+      created in ``lib/scafacos`` to point to the ScaFaCoS src dir.  When LAMMPS
+      builds in src it will use these links.  You should not need to edit
+      the ``lib/scafacos/Makefile.lammps`` file.
 
 ----------
 
@@ -1620,37 +1686,39 @@ USER-SMD package
 To build with this package, you must download the Eigen3 library.
 Eigen3 is a template library, so you do not need to build it.
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-.. code-block:: bash
+   .. tab:: CMake build
 
-   -D DOWNLOAD_EIGEN3            # download Eigen3, value = no (default) or yes
-   -D EIGEN3_INCLUDE_DIR=path    # path to Eigen library (only needed if a custom location)
+      .. code-block:: bash
 
-If ``DOWNLOAD_EIGEN3`` is set, the Eigen3 library will be downloaded and
-inside the CMake build directory.  If the Eigen3 library is already on
-your system (in a location CMake cannot find it), ``EIGEN3_INCLUDE_DIR``
-is the directory the Eigen3++ include file is in.
+         -D DOWNLOAD_EIGEN3            # download Eigen3, value = no (default) or yes
+         -D EIGEN3_INCLUDE_DIR=path    # path to Eigen library (only needed if a custom location)
 
-Traditional make
-^^^^^^^^^^^^^^^^
+      If ``DOWNLOAD_EIGEN3`` is set, the Eigen3 library will be
+      downloaded and inside the CMake build directory.  If the Eigen3
+      library is already on your system (in a location where CMake
+      cannot find it), set ``EIGEN3_INCLUDE_DIR`` to the directory the
+      ``Eigen3`` include file is in.
 
-You can download the Eigen3 library manually if you prefer; follow the
-instructions in ``lib/smd/README``.  You can also do it in one step from
-the ``lammps/src`` dir, using a command like these, which simply invoke
-the ``lib/smd/Install.py`` script with the specified args:
+   .. tab:: Traditional make
 
-.. code-block:: bash
+      You can download the Eigen3 library manually if you prefer; follow
+      the instructions in ``lib/smd/README``.  You can also do it in one
+      step from the ``lammps/src`` dir, using a command like these,
+      which simply invoke the ``lib/smd/Install.py`` script with the
+      specified args:
 
-  $ make lib-smd                         # print help message
-  $ make lib-smd args="-b"               # download to lib/smd/eigen3
-  $ make lib-smd args="-p /usr/include/eigen3"    # use existing Eigen installation in /usr/include/eigen3
+      .. code-block:: bash
 
-Note that a symbolic (soft) link named ``includelink`` is created in
-``lib/smd`` to point to the Eigen dir.  When LAMMPS builds it will use
-this link.  You should not need to edit the ``lib/smd/Makefile.lammps``
-file.
+         $ make lib-smd                         # print help message
+         $ make lib-smd args="-b"               # download to lib/smd/eigen3
+         $ make lib-smd args="-p /usr/include/eigen3"    # use existing Eigen installation in /usr/include/eigen3
+
+      Note that a symbolic (soft) link named ``includelink`` is created
+      in ``lib/smd`` to point to the Eigen dir.  When LAMMPS builds it
+      will use this link.  You should not need to edit the
+      ``lib/smd/Makefile.lammps`` file.
 
 ----------
 
@@ -1662,23 +1730,24 @@ USER-VTK package
 To build with this package you must have the VTK library installed on
 your system.
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-No additional settings are needed besides ``-D PKG_USER-VTK=yes``.
+   .. tab:: CMake build
 
-This should auto-detect the VTK library if it is installed on your
-system at standard locations.  Several advanced VTK options exist if
-you need to specify where it was installed.  Use the ``ccmake`` (terminal
-window) or ``cmake-gui`` (graphical) tools to see these options and set
-them interactively from their user interfaces.
+      No additional settings are needed besides ``-D PKG_USER-VTK=yes``.
 
-Traditional make
-^^^^^^^^^^^^^^^^
+      This should auto-detect the VTK library if it is installed on your
+      system at standard locations.  Several advanced VTK options exist
+      if you need to specify where it was installed.  Use the ``ccmake``
+      (terminal window) or ``cmake-gui`` (graphical) tools to see these
+      options and set them interactively from their user interfaces.
 
-The ``lib/vtk/Makefile.lammps`` file has settings for accessing VTK files
-and its library, which LAMMPS needs to build with this package.  If
-the settings are not valid for your system, check if one of the other
-``lib/vtk/Makefile.lammps.*`` files is compatible and copy it to
-Makefile.lammps.  If none of the provided files work, you will need to
-edit the ``Makefile.lammps`` file.  See ``lib/vtk/README`` for details.
+   .. tab:: Traditional make
+
+      The ``lib/vtk/Makefile.lammps`` file has settings for accessing
+      VTK files and its library, which LAMMPS needs to build with this
+      package.  If the settings are not valid for your system, check if
+      one of the other ``lib/vtk/Makefile.lammps.*`` files is compatible
+      and copy it to Makefile.lammps.  If none of the provided files
+      work, you will need to edit the ``Makefile.lammps`` file.  See
+      ``lib/vtk/README`` for details.
diff --git a/doc/src/Build_link.rst b/doc/src/Build_link.rst
index 87899739bc..3d66371304 100644
--- a/doc/src/Build_link.rst
+++ b/doc/src/Build_link.rst
@@ -41,42 +41,45 @@ The benefit of linking to a static library is, that the resulting
 executable is independent of that library since all required
 executable code from the library is copied into the calling executable.
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-This assumes that LAMMPS has been configured without setting a
-``LAMMPS_MACHINE`` name, installed with "make install", and the
-``PKG_CONFIG_PATH`` environment variable has been updated to include the
-``liblammps.pc`` file installed into the configured destination folder.
-The commands to compile and link a coupled executable are then:
+   .. tab:: CMake build
 
-.. code-block:: bash
+      This assumes that LAMMPS has been configured without setting a
+      ``LAMMPS_MACHINE`` name, installed with "make install", and the
+      ``PKG_CONFIG_PATH`` environment variable has been updated to
+      include the ``liblammps.pc`` file installed into the configured
+      destination folder.  The commands to compile and link a coupled
+      executable are then:
 
-   mpicc -c -O $(pkgconf liblammps --cflags) caller.c
-   mpicxx -o caller caller.o -$(pkgconf liblammps --libs)
+      .. code-block:: bash
 
-Traditional make
-^^^^^^^^^^^^^^^^
+         mpicc -c -O $(pkgconf liblammps --cflags) caller.c
+         mpicxx -o caller caller.o -$(pkgconf liblammps --libs)
 
-This assumes that LAMMPS has been compiled in the folder
-``${HOME}/lammps/src`` with "make mpi". The commands to compile and link
-a coupled executable are then:
+   .. tab:: Traditional make
 
-.. code-block:: bash
+      This assumes that LAMMPS has been compiled in the folder
+      ``${HOME}/lammps/src`` with "make mpi". The commands to compile
+      and link a coupled executable are then:
 
-   mpicc -c -O -I${HOME}/lammps/src caller.c
-   mpicxx -o caller caller.o -L${HOME}/lammps/src -llammps_mpi
+      .. code-block:: bash
 
-The *-I* argument is the path to the location of the ``library.h``
-header file containing the interface to the LAMMPS C-style library
-interface.  The *-L* argument is the path to where the ``liblammps_mpi.a``
-file is located.  The *-llammps_mpi* argument is shorthand for telling the
-compiler to link the file ``liblammps_mpi.a``.  If LAMMPS has been
-built as a shared library, then the linker will use ``liblammps_mpi.so``
-instead.  If both files are available, the linker will usually prefer
-the shared library.  In case of a shared library, you may need to update
-the ``LD_LIBRARY_PATH`` environment variable or running the ``caller``
-executable will fail since it cannot find the shared library at runtime.
+         mpicc -c -O -I${HOME}/lammps/src caller.c
+         mpicxx -o caller caller.o -L${HOME}/lammps/src -llammps_mpi
+
+      The *-I* argument is the path to the location of the ``library.h``
+      header file containing the interface to the LAMMPS C-style library
+      interface.  The *-L* argument is the path to where the
+      ``liblammps_mpi.a`` file is located.  The *-llammps_mpi* argument
+      is shorthand for telling the compiler to link the file
+      ``liblammps_mpi.a``.  If LAMMPS has been built as a shared
+      library, then the linker will use ``liblammps_mpi.so`` instead.
+      If both files are available, the linker will usually prefer the
+      shared library.  In case of a shared library, you may need to
+      update the ``LD_LIBRARY_PATH`` environment variable or running the
+      ``caller`` executable will fail since it cannot find the shared
+      library at runtime.
 
 However, it is only as simple as shown above for the case of a plain
 LAMMPS library without any optional packages that depend on libraries
@@ -84,61 +87,62 @@ LAMMPS library without any optional packages that depend on libraries
 need to include all flags, libraries, and paths for the coupled
 executable, that are also required to link the LAMMPS executable.
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-When using CMake, additional libraries with sources in the lib folder
-are built, but not included in ``liblammps.a`` and (currently) not
-installed with ``make install`` and not included in the ``pkgconfig``
-configuration file.  They can be found in the top level build folder,
-but you have to determine the necessary link flags manually.  It is
-therefore recommended to either use the traditional make procedure to
-build and link with a static library or build and link with a shared
-library instead.
+   .. tab:: CMake build
 
-Traditional make
-^^^^^^^^^^^^^^^^
+      When using CMake, additional libraries with sources in the lib
+      folder are built, but not included in ``liblammps.a`` and
+      (currently) not installed with ``make install`` and not included
+      in the ``pkgconfig`` configuration file.  They can be found in the
+      top level build folder, but you have to determine the necessary
+      link flags manually.  It is therefore recommended to either use
+      the traditional make procedure to build and link with a static
+      library or build and link with a shared library instead.
 
-After you have compiled a static LAMMPS library using the conventional
-build system for example with "make mode=static serial". And you also
-have installed the ``POEMS`` package after building its bundled library
-in ``lib/poems``. Then the commands to build and link the coupled executable
-change to:
+   .. tab:: Traditional make
 
-.. code-block:: bash
+      After you have compiled a static LAMMPS library using the
+      conventional build system for example with "make mode=static
+      serial". And you also have installed the ``POEMS`` package after
+      building its bundled library in ``lib/poems``. Then the commands
+      to build and link the coupled executable change to:
 
-   gcc -c -O -I${HOME}/lammps/src/STUBS -I${HOME}/lammps/src -caller.c
-   g++ -o caller caller.o -L${HOME}/lammps/lib/poems \
-     -L${HOME}/lammps/src/STUBS -L${HOME}/lammps/src -llammps_serial -lpoems -lmpi_stubs
+      .. code-block:: bash
 
-Note, that you need to link with ``g++`` instead of ``gcc`` even if you have
-written your code in C, since LAMMPS itself is C++ code.  You can display the
-currently applied settings for building LAMMPS for the "serial" machine target
-by using the command:
+         gcc -c -O -I${HOME}/lammps/src/STUBS -I${HOME}/lammps/src -caller.c
+         g++ -o caller caller.o -L${HOME}/lammps/lib/poems \
+                      -L${HOME}/lammps/src/STUBS -L${HOME}/lammps/src \
+                      -llammps_serial -lpoems -lmpi_stubs
 
-.. code-block:: bash
+      Note, that you need to link with ``g++`` instead of ``gcc`` even
+      if you have written your code in C, since LAMMPS itself is C++
+      code.  You can display the currently applied settings for building
+      LAMMPS for the "serial" machine target by using the command:
 
-   make mode=print serial
+      .. code-block:: bash
 
-Which should output something like:
+         make mode=print serial
 
-.. code-block:: bash
+      Which should output something like:
 
-   # Compiler:
-   CXX=g++
-   # Linker:
-   LD=g++
-   # Compilation:
-   CXXFLAGS=-g -O3 -DLAMMPS_GZIP -DLAMMPS_MEMALIGN=64 -I${HOME}/compile/lammps/lib/poems -I${HOME}/compile/lammps/src/STUBS
-   # Linking:
-   LDFLAGS=-g -O
-   # Libraries:
-   LDLIBS=-L${HOME}/compile/lammps/src -llammps_serial -L${HOME}/compile/lammps/lib/poems -L${HOME}/compile/lammps/src/STUBS -lpoems -lmpi_stubs
+      .. code-block:: bash
 
-From this you can gather the necessary paths and flags.  With
-makefiles for other *machine* configurations you need to do the
-equivalent and replace "serial" with the corresponding "machine" name
-of the makefile.
+         # Compiler:
+         CXX=g++
+         # Linker:
+         LD=g++
+         # Compilation:
+         CXXFLAGS=-g -O3 -DLAMMPS_GZIP -DLAMMPS_MEMALIGN=64 -I${HOME}/compile/lammps/lib/poems -I${HOME}/compile/lammps/src/STUBS
+         # Linking:
+         LDFLAGS=-g -O
+         # Libraries:
+         LDLIBS=-L${HOME}/compile/lammps/src -llammps_serial -L${HOME}/compile/lammps/lib/poems -L${HOME}/compile/lammps/src/STUBS -lpoems -lmpi_stubs
+
+      From this you can gather the necessary paths and flags.  With
+      makefiles for other *machine* configurations you need to do the
+      equivalent and replace "serial" with the corresponding "machine"
+      name of the makefile.
 
 Link with LAMMPS as a shared library
 ------------------------------------
@@ -151,35 +155,36 @@ linking the calling executable.  Only the *-I* flags are needed.  So the
 example case from above of the serial version static LAMMPS library with
 the POEMS package installed becomes:
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-The commands with a shared LAMMPS library compiled with the CMake
-build process are the same as for the static library.
+   .. tab:: CMake build
 
-.. code-block:: bash
+      The commands with a shared LAMMPS library compiled with the CMake
+      build process are the same as for the static library.
 
-   mpicc -c -O $(pkgconf liblammps --cflags) caller.c
-   mpicxx -o caller caller.o -$(pkgconf --libs)
+      .. code-block:: bash
 
-Traditional make
-^^^^^^^^^^^^^^^^
+         mpicc -c -O $(pkgconf liblammps --cflags) caller.c
+         mpicxx -o caller caller.o -$(pkgconf --libs)
 
-The commands with a shared LAMMPS library compiled with the
-traditional make build using ``make mode=shared serial`` becomes:
+   .. tab:: Traditional make
 
-.. code-block:: bash
+      The commands with a shared LAMMPS library compiled with the
+      traditional make build using ``make mode=shared serial`` becomes:
 
-   gcc -c -O -I${HOME}/lammps/src/STUBS -I${HOME}/lammps/src -caller.c
-   g++ -o caller caller.o -L${HOME}/lammps/src -llammps_serial
+      .. code-block:: bash
 
-*Locating liblammps.so at runtime*\ :
+         gcc -c -O -I${HOME}/lammps/src/STUBS -I${HOME}/lammps/src -caller.c
+         g++ -o caller caller.o -L${HOME}/lammps/src -llammps_serial
 
-However, now the ``liblammps.so`` file is required at runtime and needs
-to be in a folder, where the shared linker program of the operating
-system can find it.  This would be either a folder like ``/usr/local/lib64``
-or ``${HOME}/.local/lib64`` or a folder pointed to by the ``LD_LIBRARY_PATH``
-environment variable. You can type
+Locating liblammps.so at runtime
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Unlike with a static link, now the ``liblammps.so`` file is required at
+runtime and needs to be in a folder, where the shared linker program of
+the operating system can find it.  This would be either a folder like
+``/usr/local/lib64`` or ``${HOME}/.local/lib64`` or a folder pointed to
+by the ``LD_LIBRARY_PATH`` environment variable. You can type
 
 .. code-block:: bash
 
@@ -187,9 +192,10 @@ environment variable. You can type
 
 to see what directories are in that list.
 
-Or you can add the LAMMPS src directory (or the directory you performed
-a CMake style build in) to your ``LD_LIBRARY_PATH``, so that the current
-version of the shared library is always available to programs that use it.
+Or you can add the LAMMPS src directory or the directory you performed a
+CMake style build in to your ``LD_LIBRARY_PATH`` environment variable,
+so that the current version of the shared library is always available to
+programs that use it.
 
 For the Bourne or Korn shells (/bin/sh, /bin/ksh, /bin/bash etc.), you
 would add something like this to your ``${HOME}/.profile`` file:
diff --git a/doc/src/Build_package.rst b/doc/src/Build_package.rst
index 32b72c544a..72f7ba74f3 100644
--- a/doc/src/Build_package.rst
+++ b/doc/src/Build_package.rst
@@ -45,91 +45,92 @@ packages:
 The mechanism for including packages is simple but different for CMake
 versus make.
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-.. code-block:: csh
+   .. tab:: CMake build
 
-   -D PKG_NAME=value          # yes or no (default)
+      .. code-block:: csh
 
-Examples:
+         -D PKG_NAME=value          # yes or no (default)
 
-.. code-block:: csh
+      Examples:
 
-   -D PKG_MANYBODY=yes
-   -D PKG_USER-INTEL=yes
+      .. code-block:: csh
 
-All standard and user packages are included the same way.  Note that
-USER packages have a hyphen between USER and the rest of the package
-name, not an underscore.
+         -D PKG_MANYBODY=yes
+         -D PKG_USER-INTEL=yes
 
-See the shortcut section below for how to install many packages at
-once with CMake.
+      All standard and user packages are included the same way.  Note
+      that USER packages have a hyphen between USER and the rest of the
+      package name, not an underscore.
+
+      See the shortcut section below for how to install many packages at
+      once with CMake.
+
+      .. note::
+
+         If you switch between building with CMake and make builds, no
+         packages in the src directory can be installed when you invoke
+         ``cmake``.  CMake will give an error if that is not the case,
+         indicating how you can un-install all packages in the src dir.
+
+   .. tab:: Traditional make
+
+      .. code-block:: bash
+
+         cd lammps/src
+         make ps                    # check which packages are currently installed
+         make yes-name              # install a package with name
+         make no-name               # un-install a package with name
+         make mpi                   # build LAMMPS with whatever packages are now installed
+
+      Examples:
+
+      .. code-block:: bash
+
+         make no-rigid
+         make yes-user-intel
+
+      All standard and user packages are included the same way.
+
+      See the shortcut section below for how to install many packages at
+      once with make.
+
+      .. note::
+
+         You must always re-build LAMMPS (via make) after installing or
+         un-installing a package, for the action to take effect. The
+         included dependency tracking will make certain only files that
+         are required to be rebuilt are recompiled.
+
+      .. note::
+
+         You cannot install or un-install packages and build LAMMPS in a
+         single make command with multiple targets, e.g. ``make
+         yes-colloid mpi``.  This is because the make procedure creates
+         a list of source files that will be out-of-date for the build
+         if the package configuration changes within the same command.
+         You can include or exclude multiple packages in a single make
+         command, e.g. ``make yes-colloid no-manybody``.
+
+
+Information for both build systems
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Almost all packages can be included or excluded in a LAMMPS build,
+independent of the other packages.  However, some packages include files
+derived from files in other packages.  LAMMPS checks for this and does
+the right thing.  Individual files are only included if their
+dependencies are already included.  Likewise, if a package is excluded,
+other files dependent on that package are also excluded.
 
 .. note::
 
-   If you toggle back and forth between building with CMake vs
-   make, no packages in the src directory can be installed when you
-   invoke cmake.  CMake will give an error if that is not the case,
-   indicating how you can un-install all packages in the src dir.
-
-Traditional make
-^^^^^^^^^^^^^^^^
-
-.. code-block:: bash
-
-   cd lammps/src
-   make ps                    # check which packages are currently installed
-   make yes-name              # install a package with name
-   make no-name               # un-install a package with name
-   make mpi                   # build LAMMPS with whatever packages are now installed
-
-Examples:
-
-.. code-block:: bash
-
-   make no-rigid
-   make yes-user-intel
-
-All standard and user packages are included the same way.
-
-See the shortcut section below for how to install many packages at
-once with make.
-
-.. note::
-
-   You must always re-build LAMMPS (via make) after installing or
-   un-installing a package, for the action to take effect.
-
-.. note::
-
-   You cannot install or un-install packages and build LAMMPS in a
-   single make command with multiple targets, e.g. make yes-colloid mpi.
-   This is because the make procedure creates a list of source files that
-   will be out-of-date for the build if the package configuration changes
-   within the same command.  You can include or exclude multiple packages
-   in a single make command, e.g. make yes-colloid no-manybody.
-
-CMake and make info
-^^^^^^^^^^^^^^^^^^^
-
-Any package can be included or excluded in a LAMMPS build, independent
-of all other packages.  However, some packages include files derived
-from files in other packages.  LAMMPS checks for this and does the
-right thing.  Individual files are only included if their dependencies
-are already included.  Likewise, if a package is excluded, other files
-dependent on that package are also excluded.
-
-When you download a LAMMPS tarball or download LAMMPS source files
-from the git repository, no packages are pre-installed in the
-src directory.
-
-.. note::
-
-   Prior to Aug 2018, if you downloaded a tarball, 3 packages
-   (KSPACE, MANYBODY, MOLECULE) were pre-installed in the src directory.
-   That is no longer the case, so that CMake will build as-is without the
-   need to un-install those packages.
+   By default no package is installed.  Prior to August 2018, however,
+   if you downloaded a tarball, 3 packages (KSPACE, MANYBODY, MOLECULE)
+   were pre-installed via the traditional make procedure in the ``src``
+   directory.  That is no longer the case, so that CMake will build
+   as-is without needing to un-install those packages.
 
 ----------
 
diff --git a/doc/src/Build_settings.rst b/doc/src/Build_settings.rst
index 21107b7203..d8ac39a0c1 100644
--- a/doc/src/Build_settings.rst
+++ b/doc/src/Build_settings.rst
@@ -44,74 +44,71 @@ require use of an FFT library to compute 1d FFTs.  The KISS FFT
 library is included with LAMMPS but other libraries can be faster.
 LAMMPS can use them if they are available on your system.
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-.. code-block:: bash
+   .. tab:: CMake build
 
-   -D FFT=value              # FFTW3 or MKL or KISS, default is FFTW3 if found, else KISS
-   -D FFT_SINGLE=value       # yes or no (default), no = double precision
-   -D FFT_PACK=value         # array (default) or pointer or memcpy
+      .. code-block:: bash
 
-.. note::
+         -D FFT=value              # FFTW3 or MKL or KISS, default is FFTW3 if found, else KISS
+         -D FFT_SINGLE=value       # yes or no (default), no = double precision
+         -D FFT_PACK=value         # array (default) or pointer or memcpy
 
-   The values for the FFT variable must be in upper-case.  This is
-   an exception to the rule that all CMake variables can be specified
-   with lower-case values.
+      .. note::
 
-Usually these settings are all that is needed.  If FFTW3 is selected,
-then CMake will try to detect, if threaded FFTW libraries are available
-and enable them by default.  This setting is independent of whether
-OpenMP threads are enabled and a packages like KOKKOS or USER-OMP is
-used.  If CMake cannot detect the FFT library, you can set these variables
-to assist:
+         The values for the FFT variable must be in upper-case.  This is
+         an exception to the rule that all CMake variables can be specified
+         with lower-case values.
 
-.. code-block:: bash
+      Usually these settings are all that is needed.  If FFTW3 is
+      selected, then CMake will try to detect, if threaded FFTW
+      libraries are available and enable them by default.  This setting
+      is independent of whether OpenMP threads are enabled and a
+      packages like KOKKOS or USER-OMP is used.  If CMake cannot detect
+      the FFT library, you can set these variables to assist:
 
-   -D FFTW3_INCLUDE_DIRS=path  # path to FFTW3 include files
-   -D FFTW3_LIBRARIES=path     # path to FFTW3 libraries
-   -D FFT_FFTW_THREADS=on      # enable using threaded FFTW3 libraries
-   -D MKL_INCLUDE_DIRS=path    # ditto for Intel MKL library
-   -D FFT_MKL_THREADS=on       # enable using threaded FFTs with MKL libraries
-   -D MKL_LIBRARIES=path
+      .. code-block:: bash
 
-Traditional make
-^^^^^^^^^^^^^^^^
+         -D FFTW3_INCLUDE_DIR=path   # path to FFTW3 include files
+         -D FFTW3_LIBRARY=path       # path to FFTW3 libraries
+         -D FFT_FFTW_THREADS=on      # enable using threaded FFTW3 libraries
+         -D MKL_INCLUDE_DIR=path     # ditto for Intel MKL library
+         -D FFT_MKL_THREADS=on       # enable using threaded FFTs with MKL libraries
+         -D MKL_LIBRARY=path         # path to MKL libraries
 
-To change the FFT library to be used and its options, you have to edit
-your machine Makefile. Below are examples how the makefile variables
-could be changed.
+   .. tab:: Traditional make
 
-.. code-block:: make
+      To change the FFT library to be used and its options, you have to edit
+      your machine Makefile. Below are examples how the makefile variables
+      could be changed.
 
-   FFT_INC = -DFFT_FFTW3         # -DFFT_FFTW3, -DFFT_FFTW (same as -DFFT_FFTW3), -DFFT_MKL, or -DFFT_KISS
-                                 # default is KISS if not specified
-   FFT_INC = -DFFT_SINGLE        # do not specify for double precision
-   FFT_INC = -DFFT_FFTW_THREADS  # enable using threaded FFTW3 libraries
-   FFT_INC = -DFFT_MKL_THREADS   # enable using threaded FFTs with MKL libraries
-   FFT_INC = -DFFT_PACK_ARRAY    # or -DFFT_PACK_POINTER or -DFFT_PACK_MEMCPY
+      .. code-block:: make
 
-# default is FFT_PACK_ARRAY if not specified
+         FFT_INC = -DFFT_FFTW3         # -DFFT_FFTW3, -DFFT_FFTW (same as -DFFT_FFTW3), -DFFT_MKL, or -DFFT_KISS
+                                       # default is KISS if not specified
+         FFT_INC = -DFFT_SINGLE        # do not specify for double precision
+         FFT_INC = -DFFT_FFTW_THREADS  # enable using threaded FFTW3 libraries
+         FFT_INC = -DFFT_MKL_THREADS   # enable using threaded FFTs with MKL libraries
+         FFT_INC = -DFFT_PACK_ARRAY    # or -DFFT_PACK_POINTER or -DFFT_PACK_MEMCPY
+                                       # default is FFT_PACK_ARRAY if not specified
 
-.. code-block:: make
+      .. code-block:: make
 
-   FFT_INC =       -I/usr/local/include
-   FFT_PATH =      -L/usr/local/lib
-   FFT_LIB =       -lfftw3             # FFTW3 double precision
-   FFT_LIB =       -lfftw3 -lfftw3_omp # FFTW3 double precision with threads (needs -DFFT_FFTW_THREADS)
-   FFT_LIB =       -lfftw3 -lfftw3f    # FFTW3 single precision
-   FFT_LIB =       -lmkl_intel_lp64 -lmkl_sequential -lmkl_core   # MKL with Intel compiler, serial interface
-   FFT_LIB =       -lmkl_gf_lp64 -lmkl_sequential -lmkl_core      # MKL with GNU compiler, serial interface
-   FFT_LIB =       -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core # MKL with Intel compiler, threaded interface
-   FFT_LIB =       -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core      # MKL with GNU compiler, threaded interface
-   FFT_LIB =       -lmkl_rt            # MKL with automatic runtime selection of interface libs
+         FFT_INC =       -I/usr/local/include
+         FFT_PATH =      -L/usr/local/lib
+         FFT_LIB =       -lfftw3             # FFTW3 double precision
+         FFT_LIB =       -lfftw3 -lfftw3_omp # FFTW3 double precision with threads (needs -DFFT_FFTW_THREADS)
+         FFT_LIB =       -lfftw3 -lfftw3f    # FFTW3 single precision
+         FFT_LIB =       -lmkl_intel_lp64 -lmkl_sequential -lmkl_core   # MKL with Intel compiler, serial interface
+         FFT_LIB =       -lmkl_gf_lp64 -lmkl_sequential -lmkl_core      # MKL with GNU compiler, serial interface
+         FFT_LIB =       -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core # MKL with Intel compiler, threaded interface
+         FFT_LIB =       -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core      # MKL with GNU compiler, threaded interface
+         FFT_LIB =       -lmkl_rt            # MKL with automatic runtime selection of interface libs
 
-As with CMake, you do not need to set paths in ``FFT_INC`` or ``FFT_PATH``, if
-the compiler can find the FFT header and library files in its default search path.
-You must specify ``FFT_LIB`` with the appropriate FFT libraries to include in the link.
-
-CMake build
-^^^^^^^^^^^
+      As with CMake, you do not need to set paths in ``FFT_INC`` or
+      ``FFT_PATH``, if the compiler can find the FFT header and library
+      files in its default search path.  You must specify ``FFT_LIB``
+      with the appropriate FFT libraries to include in the link.
 
 The `KISS FFT library <http://kissfft.sf.net>`_ is included in the LAMMPS
 distribution.  It is portable across all platforms.  Depending on the size
@@ -177,76 +174,104 @@ ARRAY mode.
 
 .. _size:
 
-Size of LAMMPS integer types
-------------------------------------
+Size of LAMMPS integer types and size limits
+--------------------------------------------
 
 LAMMPS has a few integer data types which can be defined as either
 4-byte (= 32-bit) or 8-byte (= 64-bit) integers at compile time.
+This has an impact on the size of a system that can be simulated
+or how large counters can become before "rolling over".
 The default setting of "smallbig" is almost always adequate.
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-.. code-block:: bash
+   .. tab:: CMake build
 
-   -D LAMMPS_SIZES=value   # smallbig (default) or bigbig or smallsmall
+      With CMake the choice of integer types is made via setting a
+      variable during configuration.
 
-Traditional build
-^^^^^^^^^^^^^^^^^
+      .. code-block:: bash
 
-If you want a setting different from the default, you need to edit your
-machine Makefile.
+         -D LAMMPS_SIZES=value   # smallbig (default) or bigbig or smallsmall
 
-.. code-block:: make
+      If the variable is not set explicitly, "smallbig" is used.
 
-   LMP_INC = -DLAMMPS_SMALLBIG    # or -DLAMMPS_BIGBIG or -DLAMMPS_SMALLSMALL
+   .. tab:: Traditional build
 
-The default setting is ``-DLAMMPS_SMALLBIG`` if nothing is specified
+      If you want a setting different from the default, you need to edit the
+      ``LMP_INC`` variable setting your machine Makefile.
 
-CMake and make info
-^^^^^^^^^^^^^^^^^^^
+      .. code-block:: make
 
-The default "smallbig" setting allows for simulations with:
+         LMP_INC = -DLAMMPS_SMALLBIG    # or -DLAMMPS_BIGBIG or -DLAMMPS_SMALLSMALL
 
-* total atom count = 2\^63 atoms (about 9e18)
-* total timesteps = 2\^63 (about 9e18)
-* atom IDs = 2\^31 (about 2 billion)
-* image flags = roll over at 512
+      The default setting is ``-DLAMMPS_SMALLBIG`` if nothing is specified
 
-The "bigbig" setting increases the latter two limits.  It allows for:
+LAMMPS system size restrictions
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-* total atom count = 2\^63 atoms (about 9e18)
-* total timesteps = 2\^63 (about 9e18)
-* atom IDs = 2\^63 (about 9e18)
-* image flags = roll over at about 1 million (2\^20)
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+   :align: center
 
-The "smallsmall" setting is only needed if your machine does not
-support 8-byte integers.  It allows for:
+   * -
+     - smallbig
+     - bigbig
+     - smallsmall
+   * - Total atom count
+     - :math:`2^{63}` atoms (= :math:`9.223 \cdot 10^{18}`)
+     - :math:`2^{63}` atoms (= :math:`9.223 \cdot 10^{18}`)
+     - :math:`2^{31}` atoms (= :math:`2.147 \cdot 10^9`)
+   * - Total timesteps
+     - :math:`2^{63}` steps (= :math:`9.223 \cdot 10^{18}`)
+     - :math:`2^{63}` steps (= :math:`9.223 \cdot 10^{18}`)
+     - :math:`2^{31}` steps (= :math:`2.147 \cdot 10^9`)
+   * - Atom ID values
+     - :math:`1 \le i \le 2^{31} (= 2.147 \dot 10^9)`
+     - :math:`1 \le i \le 2^{63} (= 9.223 \cdot 10^{18})`
+     - :math:`1 \le i \le 2^{31} (= 2.147 \dot 10^9)`
+   * - Image flag values
+     - :math:`-512 \le i \le 511`
+     - :math:`- 1\,048\,576 \le i \le 1\,048\,575`
+     - :math:`-512 \le i \le 511`
 
-* total atom count = 2\^31 atoms (about 2 billion)
-* total timesteps = 2\^31 (about 2 billion)
-* atom IDs = 2\^31 (about 2 billion)
-* image flags = roll over at 512 (2\^9)
+The "bigbig" setting increases the size of image flags and atom IDs over
+"smallbig" and the "smallsmall" setting is only needed if your machine
+does not support 64-bit integers or incurs performance penalties when
+using them.
+
+These are limits for the core of the LAMMPS code, specific features or
+some styles may impose additional limits.  The :ref:`USER-ATC
+<PKG-USER-ATC>` package cannot be compiled with the "bigbig" setting.
+Also, there are limitations when using the library interface where some
+functions with known issues have been replaced by dummy calls printing a
+corresponding error message rather than crashing randomly or corrupting
+data.
 
 Atom IDs are not required for atomic systems which do not store bond
 topology information, though IDs are enabled by default.  The
 :doc:`atom_modify id no <atom_modify>` command will turn them off.  Atom
 IDs are required for molecular systems with bond topology (bonds,
-angles, dihedrals, etc).  Thus if you model a molecular system with
-more than 2 billion atoms, you need the "bigbig" setting.
+angles, dihedrals, etc).  Similarly, some force or compute or fix styles
+require atom IDs.  Thus if you model a molecular system or use one of
+those styles with more than 2 billion atoms, you need the "bigbig"
+setting.
 
-Image flags store 3 values per atom which count the number of times an
-atom has moved through the periodic box in each dimension.  See the
-:doc:`dump <dump>` doc page for a discussion.  If an atom moves through
-the periodic box more than this limit, the value will "roll over",
-e.g. from 511 to -512, which can cause diagnostics like the
-mean-squared displacement, as calculated by the :doc:`compute msd <compute_msd>` command, to be faulty.
+Regardless of the total system size limits, the maximum number of atoms
+per MPI rank (local + ghost atoms) is limited to 2 billion for atomic
+systems and 500 million for systems with bonds (the additional
+restriction is due to using the 2 upper bits of the local atom index
+in neighbor lists for storing special bonds info).
+
+Image flags store 3 values per atom in a single integer which count the
+number of times an atom has moved through the periodic box in each
+dimension.  See the :doc:`dump <dump>` doc page for a discussion.  If an
+atom moves through the periodic box more than this limit, the value will
+"roll over", e.g. from 511 to -512, which can cause diagnostics like the
+mean-squared displacement, as calculated by the :doc:`compute msd
+<compute_msd>` command, to be faulty.
 
-Note that the USER-ATC package and the USER-INTEL package are currently
-not compatible with the "bigbig" setting. Also, there are limitations
-when using the library interface. Some functions with known issues
-have been replaced by dummy calls printing a corresponding error rather
-than crashing randomly or corrupting data.
 
 Also note that the GPU package requires its lib/gpu library to be
 compiled with the same size setting, or the link will fail.  A CMake
@@ -265,54 +290,51 @@ PNG image files.  Likewise the :doc:`dump movie <dump_image>` command
 outputs movie files in MPEG format.  Using these options requires the
 following settings:
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-.. code-block:: bash
+   .. tab:: CMake build
 
-   -D WITH_JPEG=value      # yes or no
-                           # default = yes if CMake finds JPEG files, else no
-   -D WITH_PNG=value       # yes or no
-                           # default = yes if CMake finds PNG and ZLIB files, else no
-   -D WITH_FFMPEG=value    # yes or no
-                           # default = yes if CMake can find ffmpeg, else no
+      .. code-block:: bash
 
-Usually these settings are all that is needed.  If CMake cannot find
-the graphics header, library, executable files, you can set these
-variables:
+         -D WITH_JPEG=value      # yes or no
+                                 # default = yes if CMake finds JPEG files, else no
+         -D WITH_PNG=value       # yes or no
+                                 # default = yes if CMake finds PNG and ZLIB files, else no
+         -D WITH_FFMPEG=value    # yes or no
+                                 # default = yes if CMake can find ffmpeg, else no
 
-.. code-block:: bash
+      Usually these settings are all that is needed.  If CMake cannot
+      find the graphics header, library, executable files, you can set
+      these variables:
 
-   -D JPEG_INCLUDE_DIR=path    # path to jpeglib.h header file
-   -D JPEG_LIBRARIES=path      # path to libjpeg.a (.so) file
-   -D PNG_INCLUDE_DIR=path     # path to png.h header file
-   -D PNG_LIBRARIES=path       # path to libpng.a (.so) file
-   -D ZLIB_INCLUDE_DIR=path    # path to zlib.h header file
-   -D ZLIB_LIBRARIES=path      # path to libz.a (.so) file
-   -D FFMPEG_EXECUTABLE=path   # path to ffmpeg executable
+      .. code-block:: bash
 
-Traditional make
-^^^^^^^^^^^^^^^^
+         -D JPEG_INCLUDE_DIR=path    # path to jpeglib.h header file
+         -D JPEG_LIBRARY=path        # path to libjpeg.a (.so) file
+         -D PNG_INCLUDE_DIR=path     # path to png.h header file
+         -D PNG_LIBRARY=path         # path to libpng.a (.so) file
+         -D ZLIB_INCLUDE_DIR=path    # path to zlib.h header file
+         -D ZLIB_LIBRARY=path        # path to libz.a (.so) file
+         -D FFMPEG_EXECUTABLE=path   # path to ffmpeg executable
 
-.. code-block:: make
+   .. tab:: Traditional make
 
-   LMP_INC = -DLAMMPS_JPEG
-   LMP_INC = -DLAMMPS_PNG
-   LMP_INC = -DLAMMPS_FFMPEG
+      .. code-block:: make
 
-   JPG_INC = -I/usr/local/include   # path to jpeglib.h, png.h, zlib.h header files if make cannot find them
-   JPG_PATH = -L/usr/lib            # paths to libjpeg.a, libpng.a, libz.a (.so) files if make cannot find them
-   JPG_LIB = -ljpeg -lpng -lz       # library names
+         LMP_INC = -DLAMMPS_JPEG
+         LMP_INC = -DLAMMPS_PNG
+         LMP_INC = -DLAMMPS_FFMPEG
 
-As with CMake, you do not need to set ``JPG_INC`` or ``JPG_PATH``,
-if make can find the graphics header and library files.  You must
-specify ``JPG_LIB``
-with a list of graphics libraries to include in the link.  You must
-insure ffmpeg is in a directory where LAMMPS can find it at runtime,
-that is a directory in your PATH environment variable.
+         JPG_INC = -I/usr/local/include   # path to jpeglib.h, png.h, zlib.h header files if make cannot find them
+         JPG_PATH = -L/usr/lib            # paths to libjpeg.a, libpng.a, libz.a (.so) files if make cannot find them
+         JPG_LIB = -ljpeg -lpng -lz       # library names
 
-CMake and make info
-^^^^^^^^^^^^^^^^^^^
+      As with CMake, you do not need to set ``JPG_INC`` or ``JPG_PATH``,
+      if make can find the graphics header and library files.  You must
+      specify ``JPG_LIB`` with a list of graphics libraries to include
+      in the link.  You must insure ffmpeg is in a directory where
+      LAMMPS can find it at runtime, that is a directory in your PATH
+      environment variable.
 
 Using ``ffmpeg`` to output movie files requires that your machine
 supports the "popen" function in the standard runtime library.
@@ -335,37 +357,34 @@ If this option is enabled, large files can be read or written with
 gzip compression by several LAMMPS commands, including
 :doc:`read_data <read_data>`, :doc:`rerun <rerun>`, and :doc:`dump <dump>`.
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-.. code-block:: bash
+   .. tab:: CMake build
 
-   -D WITH_GZIP=value       # yes or no
-                            # default is yes if CMake can find gzip, else no
-   -D GZIP_EXECUTABLE=path  # path to gzip executable if CMake cannot find it
+      .. code-block:: bash
 
-Traditional make
-^^^^^^^^^^^^^^^^
+         -D WITH_GZIP=value       # yes or no
+                                  # default is yes if CMake can find gzip, else no
+         -D GZIP_EXECUTABLE=path  # path to gzip executable if CMake cannot find it
 
-.. code-block:: make
+   .. tab:: Traditional make
 
-   LMP_INC = -DLAMMPS_GZIP
+      .. code-block:: make
 
-CMake and make info
-^^^^^^^^^^^^^^^^^^^
+         LMP_INC = -DLAMMPS_GZIP
 
-This option requires that your machine supports the "popen()" function
-in the standard runtime library and that a gzip executable can be
+This option requires that your operating system fully supports the "popen()"
+function in the standard runtime library and that a ``gzip`` executable can be
 found by LAMMPS during a run.
 
 .. note::
 
-   On some clusters with high-speed networks, using the fork()
-   library call (required by popen()) can interfere with the fast
-   communication library and lead to simulations using compressed output
-   or input to hang or crash. For selected operations, compressed file
-   I/O is also available using a compression library instead, which is
-   what the :ref:`COMPRESS package <PKG-COMPRESS>` enables.
+   On some clusters with high-speed networks, using the "fork()" library
+   call (required by "popen()") can interfere with the fast communication
+   library and lead to simulations using compressed output or input to
+   hang or crash. For selected operations, compressed file I/O is also
+   available using a compression library instead, which is what the
+   :ref:`COMPRESS package <PKG-COMPRESS>` enables.
 
 ----------
 
@@ -374,65 +393,66 @@ found by LAMMPS during a run.
 Memory allocation alignment
 ---------------------------------------
 
-This setting enables the use of the posix_memalign() call instead of
-malloc() when LAMMPS allocates large chunks or memory.  This can make
-vector instructions on CPUs more efficient, if dynamically allocated
-memory is aligned on larger-than-default byte boundaries.
-On most current systems, the malloc() implementation returns
+This setting enables the use of the "posix_memalign()" call instead of
+"malloc()" when LAMMPS allocates large chunks or memory.  Vector
+instructions on CPUs may become more efficient, if dynamically allocated
+memory is aligned on larger-than-default byte boundaries.  On most
+current operating systems, the "malloc()" implementation returns
 pointers that are aligned to 16-byte boundaries. Using SSE vector
-instructions efficiently, however, requires memory blocks being
-aligned on 64-byte boundaries.
+instructions efficiently, however, requires memory blocks being aligned
+on 64-byte boundaries.
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-.. code-block:: bash
+   .. tab:: CMake build
 
-   -D LAMMPS_MEMALIGN=value            # 0, 8, 16, 32, 64 (default)
+      .. code-block:: bash
 
-Use a ``LAMMPS_MEMALIGN`` value of 0 to disable using posix_memalign()
-and revert to using the malloc() C-library function instead.  When
-compiling LAMMPS for Windows systems, malloc() will always be used
-and this setting ignored.
+         -D LAMMPS_MEMALIGN=value            # 0, 8, 16, 32, 64 (default)
 
-Traditional make
-^^^^^^^^^^^^^^^^
+      Use a ``LAMMPS_MEMALIGN`` value of 0 to disable using
+      "posix_memalign()" and revert to using the "malloc()" C-library
+      function instead.  When compiling LAMMPS for Windows systems,
+      "malloc()" will always be used and this setting is ignored.
 
-.. code-block:: make
+   .. tab:: Traditional make
 
-   LMP_INC = -DLAMMPS_MEMALIGN=value   # 8, 16, 32, 64
+      .. code-block:: make
 
-Do not set ``-DLAMMPS_MEMALIGN``, if you want to have memory allocated
-with the malloc() function call instead. ``-DLAMMPS_MEMALIGN`` **cannot**
-be used on Windows, as it does use different function calls for
-allocating aligned memory, that are not compatible with how LAMMPS
-manages its dynamical memory.
+         LMP_INC = -DLAMMPS_MEMALIGN=value   # 8, 16, 32, 64
+
+      Do not set ``-DLAMMPS_MEMALIGN``, if you want to have memory
+      allocated with the "malloc()" function call
+      instead. ``-DLAMMPS_MEMALIGN`` **cannot** be used on Windows, as
+      Windows different function calls with different semantics for
+      allocating aligned memory, that are not compatible with how LAMMPS
+      manages its dynamical memory.
 
 ----------
 
 .. _longlong:
 
 Workaround for long long integers
-------------------------------------------------
+---------------------------------
 
 If your system or MPI version does not recognize "long long" data
 types, the following setting will be needed.  It converts "long long"
 to a "long" data type, which should be the desired 8-byte integer on
 those systems:
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-.. code-block:: bash
+   .. tab:: CMake build
 
-   -D LAMMPS_LONGLONG_TO_LONG=value     # yes or no (default)
+      .. code-block:: bash
 
-Traditional make
-^^^^^^^^^^^^^^^^
+         -D LAMMPS_LONGLONG_TO_LONG=value     # yes or no (default)
 
-.. code-block:: make
+   .. tab:: Traditional make
 
-   LMP_INC = -DLAMMPS_LONGLONG_TO_LONG
+      .. code-block:: make
+
+         LMP_INC = -DLAMMPS_LONGLONG_TO_LONG
 
 ----------
 
@@ -447,19 +467,19 @@ Instead, the call stack is unwound and control returns to the caller,
 e.g. to Python. Of course, the calling code has to be set up to
 *catch* exceptions thrown from within LAMMPS.
 
-CMake build
-^^^^^^^^^^^
+.. tabs::
 
-.. code-block:: bash
+   .. tab:: CMake build
 
-   -D LAMMPS_EXCEPTIONS=value        # yes or no (default)
+      .. code-block:: bash
 
-Traditional make
-^^^^^^^^^^^^^^^^
+         -D LAMMPS_EXCEPTIONS=value        # yes or no (default)
 
-.. code-block:: make
+   .. tab:: Traditional make
 
-   LMP_INC = -DLAMMPS_EXCEPTIONS
+      .. code-block:: make
+
+         LMP_INC = -DLAMMPS_EXCEPTIONS
 
 .. note::
 
diff --git a/doc/src/Manual.rst b/doc/src/Manual.rst
index ba6c40b255..7c715239df 100644
--- a/doc/src/Manual.rst
+++ b/doc/src/Manual.rst
@@ -72,8 +72,6 @@ every LAMMPS command.
    pg_library
    Modify
    pg_developer
-..   pg_modify
-..   pg_base
 
 .. toctree::
    :caption: Index
diff --git a/doc/src/_ext/tab_or_note.py b/doc/src/_ext/tab_or_note.py
new file mode 100644
index 0000000000..40a27deca5
--- /dev/null
+++ b/doc/src/_ext/tab_or_note.py
@@ -0,0 +1,15 @@
+
+def replace_tabs_handler(app, docname, source):
+    """ When builder is not 'html', remove 'tabs' directive
+    and replace any 'tab' directive with 'admonition'"""
+    if app.builder.name != 'html':
+        for i in range(len(source)):
+            source[i] = source[i].replace('.. tabs::','').replace('.. tab::','.. admonition::')
+
+def setup(app):
+    app.connect('source-read', replace_tabs_handler)
+    return {
+        'version': '0.1',
+        'parallel_read_safe': True,
+        'parallel_write_safe': True,
+    }
diff --git a/doc/src/pg_atom.rst b/doc/src/pg_atom.rst
new file mode 100644
index 0000000000..f9cbf63322
--- /dev/null
+++ b/doc/src/pg_atom.rst
@@ -0,0 +1,9 @@
+LAMMPS Atom and AtomVec Base Classes
+************************************
+
+.. doxygenclass:: LAMMPS_NS::Atom
+   :project: progguide
+   :members:
+
+
+
diff --git a/doc/src/pg_dev_classes.rst b/doc/src/pg_dev_classes.rst
new file mode 100644
index 0000000000..78e1dabcd1
--- /dev/null
+++ b/doc/src/pg_dev_classes.rst
@@ -0,0 +1,38 @@
+LAMMPS C++ base classes
+=======================
+
+LAMMPS is designed to be used as a C++ class library where one can set
+up and drive a simulation through creating a class instance and then
+calling some abstract operations or commands on that class or its member
+class instances.  These are interfaced to the :doc:`C library API
+<pg_library>`, which providing an additional level of abstraction
+simplification for common operations. The C API is also the basis for
+calling LAMMPS from Python or Fortran.
+
+When used from a C++ program, most of the symbols and functions in
+LAMMPS are wrapped into the ``LAMMPS_NS`` namespace so they will not
+collide with your own classes or other libraries. This, however, does
+not extend to the additional libraries bundled with LAMMPS in the lib
+folder and some of the low-level code of some packages.
+
+Behind the scenes this is implemented through inheritance and
+polymorphism where base classes define the abstract interface and
+derived classes provide the specialized implementation for specific
+models or optimizations or ports to accelerator platforms.  This
+document will provide an outline of the fundamental class hierarchy and
+some selected examples for derived classes of specific models.
+
+.. note::
+
+   Please see the :ref:`note about thread-safety <thread-safety>`
+   in the library Howto doc page.
+
+-----------------------------------
+
+.. toctree::
+   :caption: Individual Base Classes
+   :name: lammpsbase
+
+   pg_lammps
+   pg_atom
+   pg_input
diff --git a/doc/src/pg_dev_flow.rst b/doc/src/pg_dev_flow.rst
new file mode 100644
index 0000000000..729044206e
--- /dev/null
+++ b/doc/src/pg_dev_flow.rst
@@ -0,0 +1,236 @@
+How a timestep works
+====================
+
+The first and most fundamental operation within LAMMPS to understand is
+how a timestep is structured.  Timestepping is performed by calling
+methods of the Integrate class instance within the Update class.  Since
+Integrate is a base class, it will point to an instance of a derived
+class corresponding to what is selected by the :doc:`run_style
+<run_style>` input script command.
+
+In this section, the timestep implemented by the Verlet class is
+described.  A similar timestep protocol is implemented by the Respa
+class, for the r-RESPA hierarchical timestepping method.
+
+The Min base class performs energy minimization, so does not perform a
+literal timestep.  But it has logic similar to what is described here,
+to compute forces and invoke fixes at each iteration of a minimization.
+Differences between time integration and minimization are highlighted at
+the end of this section.
+
+The Verlet class is encoded in the ``src/verlet.cpp`` and ``verlet.h``
+files.  It implements the velocity-Verlet timestepping algorithm.  The
+workhorse method is ``Verlet::run()``, but first we highlight several
+other methods in the class.
+
+- The ``init()`` method is called at the beginning of each dynamics
+  run.  It simply sets some internal flags, based on user settings in
+  other parts of the code.
+
+- The ``setup()`` or ``setup_minimal()`` methods are also called before
+  each run.  The velocity-Verlet method requires current forces be
+  calculated before the first timestep, so these routines compute
+  forces due to all atomic interactions, using the same logic that
+  appears in the timestepping described next.  A few fixes are also
+  invoked, using the mechanism described in the next section.  Various
+  counters are also initialized before the run begins.  The
+  ``setup_minimal()`` method is a variant that has a flag for performing
+  less setup.  This is used when runs are continued and information
+  from the previous run is still valid.  For example, if repeated
+  short LAMMPS runs are being invoked, interleaved by other commands,
+  via the *pre no* and *every* options of the run command, the
+  ``setup_minimal()`` method is used.
+
+- The ``force_clear()`` method initializes force and other arrays to
+  zero before each timestep, so that forces (torques, etc) can be
+  accumulated.
+
+Now for the ``Verlet::run()`` method.  Its basic structure in hi-level pseudo
+code is shown below.  In the actual code in ``src/verlet.cpp`` some of
+these operations are conditionally invoked.
+
+.. code-block:: python
+
+   loop over N timesteps:
+     if timeout condition: break
+     ev_set()
+
+     fix->initial_integrate()
+     fix->post_integrate()
+
+     nflag = neighbor->decide()
+     if nflag:
+       fix->pre_exchange()
+       domain->pbc()
+       domain->reset_box()
+       comm->setup()
+       neighbor->setup_bins()
+       comm->exchange()
+       comm->borders()
+       fix->pre_neighbor()
+       neighbor->build()
+       fix->post_neighbor()
+     else:
+       comm->forward_comm()
+
+     force_clear()
+     fix->pre_force()
+
+     pair->compute()
+     bond->compute()
+     angle->compute()
+     dihedral->compute()
+     improper->compute()
+     kspace->compute()
+
+     fix->pre_reverse()
+     comm->reverse_comm()
+
+     fix->post_force()
+     fix->final_integrate()
+     fix->end_of_step()
+
+     if any output on this step:
+       output->write()
+
+   # after loop
+   fix->post_run()
+
+
+The ``ev_set()`` method (in the parent Integrate class), sets two flags
+(*eflag* and *vflag*) for energy and virial computation.  Each flag
+encodes whether global and/or per-atom energy and virial should be
+calculated on this timestep, because some fix or variable or output will
+need it.  These flags are passed to the various methods that compute
+particle interactions, so that they either compute and tally the
+corresponding data or can skip the extra calculations if the energy and
+virial are not needed.  See the comments for the ``Integrate::ev_set()``
+method which document the flag values.
+
+At various points of the timestep, fixes are invoked,
+e.g. ``fix->initial_integrate()``.  In the code, this is actually done
+via the Modify class which stores all the Fix objects and lists of which
+should be invoked at what point in the timestep.  Fixes are the LAMMPS
+mechanism for tailoring the operations of a timestep for a particular
+simulation.  As described elsewhere, each fix has one or more methods,
+each of which is invoked at a specific stage of the timestep, as show in
+the timestep pseudo-code.  All the active fixes defined in an input
+script, that are flagged to have an ``initial_integrate()`` method are
+invoked at the beginning of each timestep.  Examples are :doc:`fix nve
+<fix_nve>` or :doc:`fix nvt or fix npt <fix_nh>` which perform the
+start-of-timestep velocity-Verlet integration operations to update
+velocities by a half-step, and coordinates by a full step.  The
+``post_integrate()`` method is next for operations that need to happen
+immediately after those updates.  Only a few fixes use this, e.g. to
+reflect particles off box boundaries in the :doc:`FixWallReflect class
+<fix_wall_reflect>`.
+
+The ``decide()`` method in the Neighbor class determines whether
+neighbor lists need to be rebuilt on the current timestep (conditions
+can be changed using the :doc:`neigh_modify every/delay/check
+<neigh_modify>` command.  If not, coordinates of ghost atoms are
+acquired by each processor via the ``forward_comm()`` method of the Comm
+class.  If neighbor lists need to be built, several operations within
+the inner if clause of the pseudo-code are first invoked.  The
+``pre_exchange()`` method of any defined fixes is invoked first.
+Typically this inserts or deletes particles from the system.
+
+Periodic boundary conditions are then applied by the Domain class via
+its ``pbc()`` method to remap particles that have moved outside the
+simulation box back into the box.  Note that this is not done every
+timestep, but only when neighbor lists are rebuilt.  This is so that
+each processor's sub-domain will have consistent (nearby) atom
+coordinates for its owned and ghost atoms.  It is also why dumped atom
+coordinates may be slightly outside the simulation box if not dumped
+on a step where the neighbor lists are rebuilt.
+
+The box boundaries are then reset (if needed) via the ``reset_box()``
+method of the Domain class, e.g. if box boundaries are shrink-wrapped to
+current particle coordinates.  A change in the box size or shape
+requires internal information for communicating ghost atoms (Comm class)
+and neighbor list bins (Neighbor class) be updated.  The ``setup()``
+method of the Comm class and ``setup_bins()`` method of the Neighbor
+class perform the update.
+
+The code is now ready to migrate atoms that have left a processor's
+geometric sub-domain to new processors.  The ``exchange()`` method of
+the Comm class performs this operation.  The ``borders()`` method of the
+Comm class then identifies ghost atoms surrounding each processor's
+sub-domain and communicates ghost atom information to neighboring
+processors.  It does this by looping over all the atoms owned by a
+processor to make lists of those to send to each neighbor processor.  On
+subsequent timesteps, the lists are used by the ``Comm::forward_comm()``
+method.
+
+Fixes with a ``pre_neighbor()`` method are then called.  These typically
+re-build some data structure stored by the fix that depends on the
+current atoms owned by each processor.
+
+Now that each processor has a current list of its owned and ghost
+atoms, LAMMPS is ready to rebuild neighbor lists via the ``build()``
+method of the Neighbor class.  This is typically done by binning all
+owned and ghost atoms, and scanning a stencil of bins around each
+owned atom's bin to make a Verlet list of neighboring atoms within the
+force cutoff plus neighbor skin distance.
+
+In the next portion of the timestep, all interaction forces between
+particles are computed, after zeroing the per-atom force vector via the
+``force_clear()`` method.  If the newton flag is set to *on* by the
+newton command, forces are added to both owned and ghost atoms, otherwise
+only to owned (aka local) atoms.
+
+Pairwise forces are calculated first, which enables the global virial
+(if requested) to be calculated cheaply (at O(N) cost instead of O(N**2)
+at the end of the ``Pair::compute()`` method), by a dot product of atom
+coordinates and forces.  By including owned and ghost atoms in the dot
+product, the effect of periodic boundary conditions is correctly
+accounted for.  Molecular topology interactions (bonds, angles,
+dihedrals, impropers) are calculated next (if supported by the current
+atom style).  The final contribution is from long-range Coulombic
+interactions, invoked by the KSpace class.
+
+The ``pre_reverse()`` method in fixes is used for operations that have to
+be done *before* the upcoming reverse communication (e.g. to perform
+additional data transfers or reductions for data computed during the
+force computation and stored with ghost atoms).
+
+If the newton flag is on, forces on ghost atoms are communicated and
+summed back to their corresponding owned atoms.  The ``reverse_comm()``
+method of the Comm class performs this operation, which is essentially
+the inverse operation of sending copies of owned atom coordinates to
+other processor's ghost atoms.
+
+At this point in the timestep, the total force on each (local) atom is
+known.  Additional force constraints (external forces, SHAKE, etc) are
+applied by Fixes that have a ``post_force()`` method.  The second half
+of the velocity-Verlet integration, ``final_integrate()`` is then
+performed (another half-step update of the velocities) via fixes like
+nve, nvt, npt.
+
+At the end of the timestep, fixes that contain an ``end_of_step()``
+method are invoked.  These typically perform a diagnostic calculation,
+e.g. the ave/time and ave/spatial fixes.  The final operation of the
+timestep is to perform any requested output, via the ``write()`` method
+of the Output class.  There are 3 kinds of LAMMPS output: thermodynamic
+output to the screen and log file, snapshots of atom data to a dump
+file, and restart files.  See the :doc:`thermo_style <thermo_style>`,
+:doc:`dump <dump>`, and :doc:`restart <restart>` commands for more
+details.
+
+The the flow of control during energy minimization iterations is
+similar to that of a molecular dynamics timestep.  Forces are computed,
+neighbor lists are built as needed, atoms migrate to new processors, and
+atom coordinates and forces are communicated to neighboring processors.
+The only difference is what Fix class operations are invoked when.  Only
+a subset of LAMMPS fixes are useful during energy minimization, as
+explained in their individual doc pages.  The relevant Fix class methods
+are ``min_pre_exchange()``, ``min_pre_force()``, and ``min_post_force()``.
+Each fix is invoked at the appropriate place within the minimization
+iteration.  For example, the ``min_post_force()`` method is analogous to
+the ``post_force()`` method for dynamics; it is used to alter or constrain
+forces on each atom, which affects the minimization procedure.
+
+After all iterations are completed there is a ``cleanup`` step which
+calls the ``post_run()`` method of fixes to perform operations only required
+at the end of a calculations (like freeing temporary storage or creating
+final outputs).
diff --git a/doc/src/pg_dev_org.rst b/doc/src/pg_dev_org.rst
new file mode 100644
index 0000000000..84432f52cd
--- /dev/null
+++ b/doc/src/pg_dev_org.rst
@@ -0,0 +1,250 @@
+LAMMPS source files
+===================
+
+The source files of the LAMMPS code are found in two
+directories of the distribution: ``src`` and ``lib``.
+Most of the code is C++ but there are small numbers of files
+in several other languages.
+
+The core of the code is located in the
+``src`` folder and its sub-directories.
+A sizable number of these files are in the ``src`` directory
+itself, but there are plenty of :doc:`packages <Packages>`, which can be
+included or excluded when LAMMPS is built.  See the :doc:`Include
+packages in build <Build_package>` section of the manual for more
+information about that part of the build process.  LAMMPS currently
+supports building with :doc:`conventional makefiles <Build_make>` and
+through :doc:`CMake <Build_cmake>` which differ in how packages are
+enabled or disabled for a LAMMPS binary.  The source files for each
+package are in all-uppercase sub-directories of the ``src`` folder, for
+example ``src/MOLECULE`` or ``src/USER-MISC``.  The ``src/STUBS``
+sub-directory is not a package but contains a dummy MPI library, that is
+used when building a serial version of the code. The ``src/MAKE``
+directory contains makefiles with settings and flags for a variety of
+configuration and machines for the build process with traditional
+makefiles.
+
+The ``lib`` directory contains the source code for several supporting
+libraries or files with configuration settings to use globally installed
+libraries, that are required by some of the optional packages.
+Each sub-directory, like ``lib/poems`` or ``lib/gpu``, contains the
+source files, some of which are in different languages such as Fortran
+or CUDA. These libraries are linked to during a LAMMPS build, if the
+corresponding package is installed.
+
+LAMMPS C++ source files almost always come in pairs, such as
+``src/run.cpp`` (implementation file) and ``src/run.h`` (header file).
+Each pair of files defines a C++
+class, for example the :cpp:class:`LAMMPS_NS::Run` class which contains
+the code invoked by the :doc:`run <run>` command in a LAMMPS input script.
+As this example illustrates, source file and class names often have a
+one-to-one correspondence with a command used in a LAMMPS input script.
+Some source files and classes do not have a corresponding input script
+command, e.g. ``src/force.cpp`` and the :cpp:class:`LAMMPS_NS::Force`
+class.  They are discussed in the next section.
+
+A small number of C++ classes and utility functions are implemented with
+only a ``.h`` file. Examples are the Pointer class or the MathVec functions.
+
+LAMMPS class topology
+=====================
+
+Though LAMMPS has a lot of source files and classes, its class topology
+is relative flat, as outlined in the :ref:`class-topology` figure.  Each
+name refers to a class and has a pair of associated source files in the
+``src`` folder, for example the class :cpp:class:`LAMMPS_NS::Memory`
+corresponds to the files ``memory.cpp`` and ``memory.h``, or the class
+:cpp:class:`LAMMPS_NS::AtomVec` corresponds to the files
+``atom_vec.cpp`` and ``atom_vec.h``.  Full lines in the figure represent
+compositing: that is the class to the left holds a pointer to an
+instance of the class to the right.  Dashed lines instead represent
+inheritance: the class to the right is derived from the class on the
+left. Classes with a red boundary are not instantiated directly, but
+they represent the base classes for "styles".  Those "styles" make up
+the bulk of the LAMMPS code and only a few typical examples are included
+in the figure for demonstration purposes.
+
+.. _class-topology:
+.. figure:: JPG/lammps-classes.png
+
+   LAMMPS class topology
+
+   This figure shows some of the relations of the base classes of the
+   LAMMPS simulation package.  Full lines indicate that a class holds an
+   instance of the class it is pointing to; dashed lines point to
+   derived classes that are given as examples of what classes may be
+   instantiated during a LAMMPS run based on the input commands and
+   accessed through the API define by their respective base classes.  At
+   the core is the :cpp:class:`LAMMPS <LAMMPS_NS::LAMMPS>` class, which
+   holds pointers to class instances with specific purposes.  Those may
+   hold instances of other classes, sometimes directly, or only
+   temporarily, sometimes as derived classes or derived classes or
+   derived classes, which may also hold instances of other classes.
+
+The :cpp:class:`LAMMPS_NS::LAMMPS` class is the topmost class and
+represents what is referred to an "instance" of LAMMPS.  It is a
+composite holding references to instances of other core classes
+providing the core functionality of the MD engine in LAMMPS and through
+them abstractions of the required operations.  The constructor of the
+LAMMPS class will instantiate those instances, process the command line
+flags, initialize MPI (if not already done) and set up file pointers for
+input and output. The destructor will shut everything down and free all
+associated memory.  Thus code for the standalone LAMMPS executable in
+``main.cpp`` simply initializes MPI, instantiates a single instance of
+LAMMPS, and passes it the command line flags and input script. It
+deletes the LAMMPS instance after the method reading the input returns
+and shuts down the MPI environment before it exits the executable.
+
+The :cpp:class:`LAMMPS_NS::Pointers` is not shown in the
+:ref:`class-topology` figure, it holds references to members of the
+`LAMMPS_NS::LAMMPS`, so that all classes derived from
+:cpp:class:`LAMMPS_NS::Pointers` have direct access to those reference.
+From the class topology all classes with blue boundary are referenced in
+this class and all classes in the second and third columns, that are not
+listed as derived classes are instead derived from
+:cpp:class:`LAMMPS_NS::Pointers`.
+
+Since all storage is encapsulated, the LAMMPS class can also be
+instantiated multiple times by a calling code, and that can be either
+simultaneously or consecutively.  When running in parallel with MPI,
+care has to be taken, that suitable communicators are used to not
+create conflicts between different instances.
+
+The LAMMPS class currently holds instances of 19 classes representing
+different core functionalities There are a handful of virtual parent
+classes in LAMMPS that define what LAMMPS calls ``styles``.  They are
+shaded red in the :ref:`class-topology` figure.  Each of these are
+parents of a number of child classes that implement the interface
+defined by the parent class.  There are two main categories of these
+``styles``: some may only have one instance active at a time (e.g. atom,
+pair, bond, angle, dihedral, improper, kspace, comm) and there is a
+dedicated pointer variable in the composite class that manages them.
+Setups that require a mix of different such styles have to use a
+*hybrid* class that manages and forwards calls to the corresponding
+sub-styles for the designated subset of atoms or data. or the composite
+class may have lists of class instances, e.g. Modify handles lists of
+compute and fix styles, while Output handles dumps class instances.
+
+The exception to this scheme are the ``command`` style classes. These
+implement specific commands that can be invoked before, after, or between
+runs or are commands which launch a simulation.  For these an instance
+of the class is created, its command() method called and then, after
+completion, the class instance deleted.  Examples for this are the
+create_box, create_atoms, minimize, run, or velocity command styles.
+
+For all those ``styles`` certain naming conventions are employed: for
+the fix nve command the class is called FixNVE and the files are
+``fix_nve.h`` and ``fix_nve.cpp``. Similarly for fix ave/time we have
+FixAveTime and ``fix_ave_time.h`` and ``fix_ave_time.cpp``. Style names
+are lower case and without spaces or special characters. A suffix or
+multiple appended with a forward slash '/' denotes a variant of the
+corresponding class without the suffix. To connect the style name and
+the class name, LAMMPS uses macros like the following ATOM\_CLASS,
+PAIR\_CLASS, BOND\_CLASS, REGION\_CLASS, FIX\_CLASS, COMPUTE\_CLASS,
+or DUMP\_CLASS in the corresponding header file.  During compilation
+files with the pattern ``style_name.h`` are created that contain include
+statements including all headers of all styles of a given type that
+are currently active (or "installed).
+
+
+More details on individual classes in the :ref:`class-topology` are as
+follows:
+
+- The Memory class handles allocation of all large vectors and arrays.
+
+- The Error class prints all error and warning messages.
+
+- The Universe class sets up partitions of processors so that multiple
+  simulations can be run, each on a subset of the processors allocated
+  for a run, e.g. by the mpirun command.
+
+- The Input class reads and processes input input strings and files,
+  stores variables, and invokes :doc:`commands <Commands_all>`.
+
+- As discussed above, command style classes are directly derived from
+  the Pointers class. They provide input script commands that perform
+  one-time operations before/after/between simulations or which invoke a
+  simulation.  They are instantiated from within the Input class,
+  invoked, then immediately destructed.
+
+- The Finish class is instantiated to print statistics to the screen
+  after a simulation is performed, by commands like run and minimize.
+
+- The Special class walks the bond topology of a molecular system to
+  find first, second, third neighbors of each atom.  It is invoked by
+  several commands, like :doc:`read_data <read_data>`,
+  :doc:`read_restart <read_restart>`, or :doc:`replicate <replicate>`.
+
+- The Atom class stores per-atom properties associated with atom styles.
+  More precisely, they are allocated and managed by a class derived from
+  the AtomVec class, and the Atom class simply stores pointers to them.
+  The classes derived from AtomVec represent the different atom styles
+  and they are instantiated through the :doc:`atom_style <atom_style>`
+  command.
+
+- The Update class holds instances of an integrator and a minimizer
+  class.  The Integrate class is a parent style for the Verlet and
+  r-RESPA time integrators, as defined by the :doc:`run_style
+  <run_style>` command.  The Min class is a parent style for various
+  energy minimizers.
+
+- The Neighbor class builds and stores neighbor lists.  The NeighList
+  class stores a single list (for all atoms).  A NeighRequest class
+  instance is created by pair, fix, or compute styles when they need a
+  particular kind of neighbor list and use the NeighRequest properties
+  to select the neighbor list settings for the given request. There can
+  be multiple instances of the NeighRequest class and the Neighbor class
+  will try to optimize how they are computed by creating copies or
+  sub-lists where possible.
+
+- The Comm class performs inter-processor communication, typically of
+  ghost atom information.  This usually involves MPI message exchanges
+  with 6 neighboring processors in the 3d logical grid of processors
+  mapped to the simulation box. There are two :doc:`communication styles
+  <comm_style>` enabling different ways to do the domain decomposition.
+  Sometimes the Irregular class is used, when atoms may migrate to
+  arbitrary processors.
+
+- The Domain class stores the simulation box geometry, as well as
+  geometric Regions and any user definition of a Lattice.  The latter
+  are defined by the :doc:`region <region>` and :doc:`lattice <lattice>`
+  commands in an input script.
+
+- The Force class computes various forces between atoms.  The Pair
+  parent class is for non-bonded or pair-wise forces, which in LAMMPS
+  also includes many-body forces such as the Tersoff 3-body potential if
+  those are computed by walking pairwise neighbor lists.  The Bond,
+  Angle, Dihedral, Improper parent classes are styles for bonded
+  interactions within a static molecular topology.  The KSpace parent
+  class is for computing long-range Coulombic interactions.  One of its
+  child classes, PPPM, uses the FFT3D and Remap classes to redistribute
+  and communicate grid-based information across the parallel processors.
+
+- The Modify class stores lists of class instances derived from the
+  :doc:`Fix <fix>` and :doc:`Compute <compute>` base classes.
+
+- The Group class manipulates groups that atoms are assigned to via the
+  :doc:`group <group>` command.  It also has functions to compute
+  various attributes of groups of atoms.
+
+- The Output class is used to generate 3 kinds of output from a LAMMPS
+  simulation: thermodynamic information printed to the screen and log
+  file, dump file snapshots, and restart files.  These correspond to the
+  :doc:`Thermo <thermo_style>`, :doc:`Dump <dump>`, and
+  :doc:`WriteRestart <write_restart>` classes respectively.  The Dump
+  class is a base class with several derived classes implementing
+  various dump style variants.
+
+- The Timer class logs timing information, output at the end
+  of a run.
+
+.. TODO section on "Spatial decomposition and parallel operations"
+..       diagram of 3d processor grid, brick vs. tiled. local vs. ghost
+..       atoms, 6-way communication with pack/unpack functions,
+..       PBC as part of the communication
+
+.. TODO section on "Fixes, Computes, and Variables"
+..      how and when data is computed and provided and how it is
+..      referenced. flags in Fix/Compute/Variable classes tell
+..      style and amount of available data.
+
diff --git a/doc/src/pg_dev_utils.rst b/doc/src/pg_dev_utils.rst
new file mode 100644
index 0000000000..e34f8c806e
--- /dev/null
+++ b/doc/src/pg_dev_utils.rst
@@ -0,0 +1,417 @@
+
+LAMMPS utility functions
+========================
+
+The ``utils`` sub-namespace inside the ``LAMMPS_NS`` namespace provides
+a collection of convenience functions and utilities that perform common
+tasks that are required repeatedly throughout the LAMMPS code like
+reading or writing to files with error checking or translation of
+strings into specific types of numbers with checking for validity.  This
+reduces redundant implementations and encourages consistent behavior.
+
+I/O with status check
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+These are wrappers around the corresponding C library calls like
+``fgets()`` or ``fread()``.  They will check if there were errors
+on reading or an unexpected end-of-file state was reached.  In that
+case, the functions will stop the calculation with an error message,
+indicating the name of the problematic file, if possible.
+
+----------
+
+.. doxygenfunction:: sfgets
+   :project: progguide
+
+.. doxygenfunction:: sfread
+   :project: progguide
+
+String to number conversions with validity check
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+These functions should be used to convert strings to numbers. They are
+are strongly preferred over C library calls like ``atoi()`` or
+``atof()`` since they check if the **entire** provided string is a valid
+(floating-point or integer) number, and will error out instead of
+silently returning the result of a partial conversion or zero in cases
+where the string is not a valid number.  This behavior allows to more
+easily detect typos or issues when processing input files.
+
+The *do_abort* flag should be set to ``true`` in case  this function
+is called only on a single MPI rank, as that will then trigger the
+a call to ``Error::one()`` for errors instead of ``Error::all()``
+and avoids a "hanging" calculation when run in parallel.
+
+Please also see :cpp:func:`is_integer` and :cpp:func:`is_double` for
+testing strings for compliance without conversion.
+
+----------
+
+.. doxygenfunction:: numeric
+   :project: progguide
+
+.. doxygenfunction:: inumeric
+   :project: progguide
+
+.. doxygenfunction:: bnumeric
+   :project: progguide
+
+.. doxygenfunction:: tnumeric
+   :project: progguide
+
+
+String processing
+^^^^^^^^^^^^^^^^^
+
+The following are functions to help with processing strings
+and parsing files or arguments.
+
+----------
+
+.. doxygenfunction:: trim
+   :project: progguide
+
+.. doxygenfunction:: trim_comment
+   :project: progguide
+
+.. doxygenfunction:: count_words(const char *text)
+   :project: progguide
+
+.. doxygenfunction:: count_words(const std::string &text)
+   :project: progguide
+
+.. doxygenfunction:: count_words(const std::string &text, const std::string &separators)
+   :project: progguide
+
+.. doxygenfunction:: trim_and_count_words
+   :project: progguide
+
+.. doxygenfunction:: split_words
+   :project: progguide
+
+.. doxygenfunction:: strmatch
+   :project: progguide
+
+.. doxygenfunction:: is_integer
+   :project: progguide
+
+.. doxygenfunction:: is_double
+   :project: progguide
+
+File and path functions
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. doxygenfunction:: guesspath
+   :project: progguide
+
+.. doxygenfunction:: path_basename
+   :project: progguide
+
+.. doxygenfunction:: path_join
+   :project: progguide
+
+.. doxygenfunction:: file_is_readable
+   :project: progguide
+
+Potential file functions
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. doxygenfunction:: get_potential_file_path
+   :project: progguide
+
+.. doxygenfunction:: get_potential_date
+   :project: progguide
+
+.. doxygenfunction:: get_potential_units
+   :project: progguide
+
+.. doxygenfunction:: get_supported_conversions
+   :project: progguide
+
+.. doxygenfunction:: get_conversion_factor
+   :project: progguide
+
+.. doxygenfunction:: open_potential(const std::string &name, LAMMPS *lmp, int *auto_convert)
+   :project: progguide
+
+Argument processing
+^^^^^^^^^^^^^^^^^^^
+
+.. doxygenfunction:: bounds
+   :project: progguide
+
+.. doxygenfunction:: expand_args
+   :project: progguide
+
+Convenience functions
+^^^^^^^^^^^^^^^^^^^^^
+
+.. doxygenfunction:: logmesg
+   :project: progguide
+
+.. doxygenfunction:: getsyserror
+   :project: progguide
+
+.. doxygenfunction:: check_packages_for_style
+   :project: progguide
+
+.. doxygenfunction:: timespec2seconds
+   :project: progguide
+
+.. doxygenfunction:: date2num
+   :project: progguide
+
+Customized standard functions
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. doxygenfunction:: merge_sort
+   :project: progguide
+
+---------------------------
+
+Tokenizer classes
+=================
+
+The purpose of the tokenizer classes is to simplify the recurring task
+of breaking lines of text down into words and/or numbers.
+Traditionally, LAMMPS code would be using the ``strtok()`` function from
+the C library for that purpose, but that function has two significant
+disadvantages: 1) it cannot be used concurrently from different LAMMPS
+instances since it stores its status in a global variable and 2) it
+modifies the string that it is processing.  These classes were
+implemented to avoid both of these issues and also to reduce the amount
+of code that needs to be written.
+
+The basic procedure is to create an instance of the tokenizer class with
+the string to be processed as an argument and then do a loop until all
+available tokens are read.  The constructor has a default set of
+separator characters, but that can be overridden. The default separators
+are all "whitespace" characters, i.e. the space character, the tabulator
+character, the carriage return character, the linefeed character, and
+the form feed character.
+
+.. code-block:: C++
+   :caption: Tokenizer class example listing entries of the PATH environment variable
+
+   #include "tokenizer.h"
+   #include <cstdlib>
+   #include <string>
+   #include <iostream>
+
+   using namespace LAMMPS_NS;
+
+   int main(int, char **)
+   {
+       const char *path = getenv("PATH");
+
+       if (path != nullptr) {
+           Tokenizer p(path,":");
+           while (p.has_next())
+               std::cout << "Entry: " << p.next() << "\n";
+       }
+       return 0;
+   }
+
+Most tokenizer operations cannot fail except for
+:cpp:func:`LAMMPS_NS::Tokenizer::next` (when used without first
+checking with :cpp:func:`LAMMPS_NS::Tokenizer::has_next`) and
+:cpp:func:`LAMMPS_NS::Tokenizer::skip`.  In case of failure, the class
+will throw an exception, so you may need to wrap the code using the
+tokenizer into a ``try`` / ``catch`` block to handle errors.  The
+:cpp:class:`LAMMPS_NS::ValueTokenizer` class may also throw an exception
+when a (type of) number is requested as next token that is not
+compatible with the string representing the next word.
+
+.. code-block:: C++
+   :caption: ValueTokenizer class example with exception handling
+
+   #include "tokenizer.h"
+   #include <cstdlib>
+   #include <string>
+   #include <iostream>
+
+   using namespace LAMMPS_NS;
+
+   int main(int, char **)
+   {
+       const char *text = "1 2 3 4 5 20.0 21 twentytwo 2.3";
+       double num1(0),num2(0),num3(0),num4(0);
+
+       ValueTokenizer t(text);
+       // read 4 doubles after skipping over 5 numbers
+       try {
+           t.skip(5);
+           num1 = t.next_double();
+           num2 = t.next_double();
+           num3 = t.next_double();
+           num4 = t.next_double();
+       } catch (TokenizerException &e) {
+           std::cout << "Reading numbers failed: " << e.what() << "\n";
+       }
+       std::cout << "Values: " << num1 << " " << num2 << " " << num3 << " " << num4 << "\n";
+       return 0;
+   }
+
+This code example should produce the following output:
+
+.. code-block::
+
+   Reading numbers failed: Not a valid floating-point number: 'twentytwo'
+   Values: 20 21 0 0
+
+----------
+
+.. doxygenclass:: LAMMPS_NS::Tokenizer
+   :project: progguide
+   :members:
+
+.. doxygenclass:: LAMMPS_NS::TokenizerException
+   :project: progguide
+   :members:
+
+.. doxygenclass:: LAMMPS_NS::ValueTokenizer
+   :project: progguide
+   :members:
+
+.. doxygenclass:: LAMMPS_NS::InvalidIntegerException
+   :project: progguide
+   :members: what
+
+.. doxygenclass:: LAMMPS_NS::InvalidFloatException
+   :project: progguide
+   :members: what
+
+File reader classes
+====================
+
+The purpose of the file reader classes is to simplify the recurring task
+of reading and parsing files. They can use the
+:cpp:class:`LAMMPS_NS::ValueTokenizer` class to process the read in
+text.  The :cpp:class:`LAMMPS_NS::TextFileReader` is a more general
+version while :cpp:class:`LAMMPS_NS::PotentialFileReader` is specialized
+to implement the behavior expected for looking up and reading/parsing
+files with potential parameters in LAMMPS.  The potential file reader
+class requires a LAMMPS instance, requires to be run on MPI rank 0 only,
+will use the :cpp:func:`LAMMPS_NS::utils::get_potential_file_path`
+function to look up and open the file, and will call the
+:cpp:class:`LAMMPS_NS::Error` class in case of failures to read or to
+convert numbers, so that LAMMPS will be aborted.
+
+.. code-block:: C++
+   :caption: Use of PotentialFileReader class in pair style coul/streitz
+
+    PotentialFileReader reader(lmp, file, "coul/streitz");
+    char * line;
+
+    while((line = reader.next_line(NPARAMS_PER_LINE))) {
+      try {
+        ValueTokenizer values(line);
+        std::string iname = values.next_string();
+
+        int ielement;
+        for (ielement = 0; ielement < nelements; ielement++)
+          if (iname == elements[ielement]) break;
+
+        if (nparams == maxparam) {
+          maxparam += DELTA;
+          params = (Param *) memory->srealloc(params,maxparam*sizeof(Param),
+                                              "pair:params");
+        }
+
+        params[nparams].ielement = ielement;
+        params[nparams].chi = values.next_double();
+        params[nparams].eta = values.next_double();
+        params[nparams].gamma = values.next_double();
+        params[nparams].zeta = values.next_double();
+        params[nparams].zcore = values.next_double();
+
+      } catch (TokenizerException & e) {
+        error->one(FLERR, e.what());
+      }
+      nparams++;
+    }
+
+A file that would be parsed by the reader code fragment looks like this:
+
+   # DATE: 2015-02-19 UNITS: metal CONTRIBUTOR: Ray Shan CITATION: Streitz and Mintmire, Phys Rev B, 50, 11996-12003 (1994)
+   #
+   # X (eV)                J (eV)          gamma (1/\AA)   zeta (1/\AA)    Z (e)
+
+   Al      0.000000        10.328655       0.000000        0.968438        0.763905
+   O       5.484763        14.035715       0.000000        2.143957        0.000000
+
+
+----------
+
+.. doxygenclass:: LAMMPS_NS::TextFileReader
+   :project: progguide
+   :members:
+
+.. doxygenclass:: LAMMPS_NS::PotentialFileReader
+   :project: progguide
+   :members:
+
+
+----------
+
+Memory pool classes
+===================
+
+The memory pool classes are used for cases where otherwise many
+small memory allocations would be needed and where the data would
+be either all used or all freed.  One example for that is the
+storage of neighbor lists.  The memory management strategy is
+based on the assumption that allocations will be in chunks of similar
+sizes.  The allocation is then not done per individual call for a
+reserved chunk of memory, but for a "page" that can hold multiple
+chunks of data.  A parameter for the maximum chunk size must be
+provided, as that is used to determine whether a new page of memory
+must be used.
+
+The :cpp:class:`MyPage <LAMMPS_NS::MyPage>` class offers two ways to
+reserve a chunk: 1) with :cpp:func:`get() <LAMMPS_NS::MyPage::get>` the
+chunk size needs to be known in advance, 2) with :cpp:func:`vget()
+<LAMMPS_NS::MyPage::vget>` a pointer to the next chunk is returned, but
+its size is registered later with :cpp:func:`vgot()
+<LAMMPS_NS::MyPage::vgot>`.
+
+.. code-block:: C++
+   :caption: Example of using :cpp:class:`MyPage <LAMMPS_NS::MyPage>`
+
+      #include "my_page.h"
+      using namespace LAMMPS_NS;
+
+      MyPage<double> *dpage = new MyPage<double>;
+      // max size of chunk: 256, size of page: 10240 doubles (=81920 bytes)
+      dpage->init(256,10240);
+
+      double **build_some_lists(int num)
+      {
+          dpage->reset();
+          double **dlist = new double*[num];
+          for (int i=0; i < num; ++i) {
+              double *dptr = dpage.vget();
+              int jnum = 0;
+              for (int j=0; j < jmax; ++j) {
+                  // compute some dvalue for eligible loop index j
+                  dptr[j] = dvalue;
+                  ++jnum;
+              }
+              if (dpage.status() != 0) {
+                  // handle out of memory or jnum too large errors
+              }
+              dpage.vgot(jnum);
+              dlist[i] = dptr;
+          }
+          return dlist;
+      }
+
+----------
+
+.. doxygenclass:: LAMMPS_NS::MyPage
+   :project: progguide
+   :members:
+
+.. doxygenclass:: LAMMPS_NS::MyPoolChunk
+   :project: progguide
+   :members:
diff --git a/doc/src/pg_dev_write.rst b/doc/src/pg_dev_write.rst
new file mode 100644
index 0000000000..7765ed1262
--- /dev/null
+++ b/doc/src/pg_dev_write.rst
@@ -0,0 +1,253 @@
+Writing LAMMPS styles
+=====================
+
+The :doc:`Modify` section of the manual gives an overview of how LAMMPS can
+be extended by writing new classes that derive from existing
+parent classes in LAMMPS.  Here, some specific coding
+details are provided for writing code for LAMMPS.
+
+Writing a new fix style
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Writing fixes is a flexible way of extending LAMMPS.  Users can
+implement many things using fixes:
+
+- changing particles attributes (positions, velocities, forces, etc.). Examples: FixNVE, FixFreeze.
+- reading/writing data. Example: FixRestart.
+- adding or modifying properties due to geometry. Example: FixWall.
+- interacting with other subsystems or external code: Examples: FixTTM, FixExternal, FixLATTE
+- saving information for analysis or future use (previous positions,
+  for instance). Examples: Fix AveTime, FixStoreState.
+
+
+All fixes are derived from the Fix base class and must have a
+constructor with the signature: ``FixPrintVel(class LAMMPS *, int, char **)``.
+
+Every fix must be registered in LAMMPS by writing the following lines
+of code in the header before include guards:
+
+.. code-block:: c
+
+   #ifdef FIX_CLASS
+   FixStyle(print/vel,FixPrintVel)
+   #else
+   /* the definition of the FixPrintVel class comes here */
+   ...
+   #endif
+
+Where ``print/vel`` is the style name of your fix in the input script and
+``FixPrintVel`` is the name of the class. The header file would be called
+``fix_print_vel.h`` and the implementation file ``fix_print_vel.cpp``.
+These conventions allow LAMMPS to automatically integrate it into the
+executable when compiling and associate your new fix class with the designated
+keyword when it parses the input script.
+
+Let's write a simple fix which will print the average velocity at the end
+of each timestep. First of all, implement a constructor:
+
+.. code-block:: C++
+
+   FixPrintVel::FixPrintVel(LAMMPS *lmp, int narg, char **arg)
+   : Fix(lmp, narg, arg)
+   {
+     if (narg < 4)
+       error->all(FLERR,"Illegal fix print/vel command");
+
+     nevery = force->inumeric(FLERR,arg[3]);
+     if (nevery <= 0)
+       error->all(FLERR,"Illegal fix print/vel command");
+   }
+
+In the constructor you should parse your fix arguments which are
+specified in the script. All fixes have pretty the same syntax:
+``fix <fix-ID> <fix group> <fix name> <fix arguments ...>``. The
+first 3 parameters are parsed by Fix base class constructor, while
+``<fix arguments>`` should be parsed by you. In our case, we need to
+specify how often we want to print an average velocity. For instance,
+once in 50 timesteps: ``fix 1 print/vel 50``. There is a special variable
+in the Fix class called ``nevery`` which specifies how often the method
+``end_of_step()`` is called. Thus all we need to do is just set it up.
+
+The next method we need to implement is ``setmask()``:
+
+.. code-block:: C++
+
+   int FixPrintVel::setmask()
+   {
+     int mask = 0;
+     mask |= FixConst::END_OF_STEP;
+     return mask;
+   }
+
+Here the user specifies which methods of your fix should be called
+during execution. The constant ``END_OF_STEP`` corresponds to the
+``end_of_step()`` method. The most important available methods that
+are called during a timestep and the order in which they are called
+are shown in the previous section.
+
+.. code-block:: C++
+
+   void FixPrintVel::end_of_step()
+   {
+     // for add3, scale3
+     using namespace MathExtra;
+
+     double** v = atom->v;
+     int nlocal = atom->nlocal;
+     double localAvgVel[4]; // 4th element for particles count
+     memset(localAvgVel, 0, 4 * sizeof(double));
+     for (int particleInd = 0; particleInd < nlocal; ++particleInd) {
+       add3(localAvgVel, v[particleInd], localAvgVel);
+     }
+     localAvgVel[3] = nlocal;
+     double globalAvgVel[4];
+     memset(globalAvgVel, 0, 4 * sizeof(double));
+     MPI_Allreduce(localAvgVel, globalAvgVel, 4, MPI_DOUBLE, MPI_SUM, world);
+     scale3(1.0 / globalAvgVel[3], globalAvgVel);
+     if ((comm->me == 0) && screen) {
+       fmt::print(screen,"{}, {}, {}\n",
+                  globalAvgVel[0], globalAvgVel[1], globalAvgVel[2]);
+     }
+   }
+
+In the code above, we use MathExtra routines defined in
+``math_extra.h``.  There are bunch of math functions to work with
+arrays of doubles as with math vectors.  It is also important to note
+that LAMMPS code should always assume to be run in parallel and that
+atom data is thus distributed across the MPI ranks.  Thus you can
+only process data from local atoms directly and need to use MPI library
+calls to combine or exchange data.  For serial execution, LAMMPS
+comes bundled with the MPI STUBS library that contains the MPI library
+function calls in dummy versions that only work for a single MPI rank.
+
+In this code we use an instance of Atom class. This object is stored
+in the Pointers class (see ``pointers.h``) which is the base class of
+the Fix base class. This object contains references to various class
+instances (the original instances are created and held by the LAMMPS
+class) with all global information about the simulation system.
+Data from the Pointers class is available to all classes inherited from
+it using protected inheritance. Hence when you write you own class,
+which is going to use LAMMPS data, don't forget to inherit from Pointers
+or pass an Pointer to it to all functions that need access. When writing
+fixes we inherit from class Fix which is inherited from Pointers so
+there is no need to inherit from it directly.
+
+The code above computes average velocity for all particles in the
+simulation.  Yet you have one unused parameter in fix call from the
+script: ``group_name``.  This parameter specifies the group of atoms
+used in the fix. So we should compute average for all particles in the
+simulation only if ``group_name == "all"``, but it can be any group.
+The group membership information of an atom is contained in the *mask*
+property of and atom and the bit corresponding to a given group is
+stored in the groupbit variable which is defined in Fix base class:
+
+.. code-block:: C++
+
+   for (int i = 0; i < nlocal; ++i) {
+     if (atom->mask[i] & groupbit) {
+     // Do all job here
+     }
+   }
+
+Class Atom encapsulates atoms positions, velocities, forces, etc. User
+can access them using particle index. Note, that particle indexes are
+usually changed every few timesteps because of neighbor list rebuilds
+and spatial sorting (to improve cache efficiency).
+
+Let us consider another Fix example: We want to have a fix which stores
+atoms position from previous time step in your fix. The local atoms
+indexes may not be valid on the next iteration. In order to handle
+this situation there are several methods which should be implemented:
+
+- ``double memory_usage()``: return how much memory the fix uses (optional)
+- ``void grow_arrays(int)``: do reallocation of the per particle arrays in your fix
+- ``void copy_arrays(int i, int j, int delflag)``: copy i-th per-particle
+  information to j-th. Used when atom sorting is performed. if delflag is set
+  and atom j owns a body, move the body information to atom i.
+- ``void set_arrays(int i)``: sets i-th particle related information to zero
+
+Note, that if your class implements these methods, it must call add calls of
+add_callback and delete_callback to constructor and destructor. Since we want
+to store positions of atoms from previous timestep, we need to add
+``double** xold`` to the header file. Than add allocation code
+to the constructor:
+
+.. code-block:: C++
+
+   FixSavePos::FixSavePos(LAMMPS *lmp, int narg, char **arg), xold(nullptr)
+   {
+   //...
+     memory->create(xold, atom->nmax, 3, "FixSavePos:x");
+     atom->add_callback(0);
+   }
+
+   FixSavePos::~FixSavePos() {
+     atom->delete_callback(id, 0);
+     memory->destroy(xold);
+   }
+
+Implement the aforementioned methods:
+
+.. code-block:: C++
+
+   double FixSavePos::memory_usage()
+   {
+     int nmax = atom->nmax;
+     double bytes = 0.0;
+     bytes += nmax * 3 * sizeof(double);
+     return bytes;
+   }
+
+   void FixSavePos::grow_arrays(int nmax)
+   {
+     memory->grow(xold, nmax, 3, "FixSavePos:xold");
+   }
+
+   void FixSavePos::copy_arrays(int i, int j, int delflag)
+   {
+     memcpy(xold[j], xold[i], sizeof(double) * 3);
+   }
+
+   void FixSavePos::set_arrays(int i)
+   {
+     memset(xold[i], 0, sizeof(double) * 3);
+   }
+
+   int FixSavePos::pack_exchange(int i, double *buf)
+   {
+     int m = 0;
+     buf[m++] = xold[i][0];
+     buf[m++] = xold[i][1];
+     buf[m++] = xold[i][2];
+
+     return m;
+   }
+
+   int FixSavePos::unpack_exchange(int nlocal, double *buf)
+   {
+     int m = 0;
+     xold[nlocal][0] = buf[m++];
+     xold[nlocal][1] = buf[m++];
+     xold[nlocal][2] = buf[m++];
+
+     return m;
+   }
+
+Now, a little bit about memory allocation. We use the Memory class which
+is just a bunch of template functions for allocating 1D and 2D
+arrays. So you need to add include ``memory.h`` to have access to them.
+
+Finally, if you need to write/read some global information used in
+your fix to the restart file, you might do it by setting flag
+``restart_global = 1`` in the constructor and implementing methods void
+``write_restart(FILE *fp)`` and ``void restart(char *buf)``.
+If, in addition, you want to write the per-atom property to restart
+files additional settings and functions are needed:
+
+- a fix flag indicating this needs to be set ``restart_peratom = 1;``
+- ``atom->add_callback()`` and ``atom->delete_callback()`` must be called
+  a second time with the final argument set to 1 instead of 0 (indicating
+  restart processing instead of per-atom data memory management).
+- the functions ``void pack_restart(int i, double *buf)`` and
+  ``void unpack_restart(int nlocal, int nth)`` need to be implemented
+
diff --git a/doc/src/pg_developer.rst b/doc/src/pg_developer.rst
index 6b818676aa..89282820ae 100644
--- a/doc/src/pg_developer.rst
+++ b/doc/src/pg_developer.rst
@@ -7,1282 +7,11 @@ information will be added incrementally depending on availability
 of time and requests from the LAMMPS user community.
 
 
-LAMMPS source files
-===================
-
-The source files of the LAMMPS code are found in two
-directories of the distribution: ``src`` and ``lib``.
-Most of the code is C++ but there are small numbers of files
-in several other languages.
-
-The core of the code is located in the
-``src`` folder and its sub-directories.
-A sizable number of these files are in the ``src`` directory
-itself, but there are plenty of :doc:`packages <Packages>`, which can be
-included or excluded when LAMMPS is built.  See the :doc:`Include
-packages in build <Build_package>` section of the manual for more
-information about that part of the build process.  LAMMPS currently
-supports building with :doc:`conventional makefiles <Build_make>` and
-through :doc:`CMake <Build_cmake>` which differ in how packages are
-enabled or disabled for a LAMMPS binary.  The source files for each
-package are in all-uppercase sub-directories of the ``src`` folder, for
-example ``src/MOLECULE`` or ``src/USER-MISC``.  The ``src/STUBS``
-sub-directory is not a package but contains a dummy MPI library, that is
-used when building a serial version of the code. The ``src/MAKE``
-directory contains makefiles with settings and flags for a variety of
-configuration and machines for the build process with traditional
-makefiles.
-
-The ``lib`` directory contains the source code for several supporting
-libraries or files with configuration settings to use globally installed
-libraries, that are required by some of the optional packages.
-Each sub-directory, like ``lib/poems`` or ``lib/gpu``, contains the
-source files, some of which are in different languages such as Fortran
-or CUDA. These libraries are linked to during a LAMMPS build, if the
-corresponding package is installed.
-
-LAMMPS C++ source files almost always come in pairs, such as
-``src/run.cpp`` (implementation file) and ``src/run.h`` (header file).
-Each pair of files defines a C++
-class, for example the :cpp:class:`LAMMPS_NS::Run` class which contains
-the code invoked by the :doc:`run <run>` command in a LAMMPS input script.
-As this example illustrates, source file and class names often have a
-one-to-one correspondence with a command used in a LAMMPS input script.
-Some source files and classes do not have a corresponding input script
-command, e.g. ``src/force.cpp`` and the :cpp:class:`LAMMPS_NS::Force`
-class.  They are discussed in the next section.
-
-A small number of C++ classes and utility functions are implemented with
-only a ``.h`` file. Examples are the Pointer class and the mergesort function.
-
-LAMMPS class topology
-=====================
-
-Though LAMMPS has a lot of source files and classes, its class topology
-is relative flat, as outlined in the :ref:`class-topology` figure.  Each
-name refers to a class and has a pair of associated source files in the
-``src`` folder, for example the class :cpp:class:`LAMMPS_NS::Memory`
-corresponds to the files ``memory.cpp`` and ``memory.h``, or the class
-:cpp:class:`LAMMPS_NS::AtomVec` corresponds to the files
-``atom_vec.cpp`` and ``atom_vec.h``.  Full lines in the figure represent
-compositing: that is the class to the left holds a pointer to an
-instance of the class to the right.  Dashed lines instead represent
-inheritance: the class to the right is derived from the class on the
-left. Classes with a red boundary are not instantiated directly, but
-they represent the base classes for "styles".  Those "styles" make up
-the bulk of the LAMMPS code and only a few typical examples are included
-in the figure for demonstration purposes.
-
-.. _class-topology:
-.. figure:: JPG/lammps-classes.png
-
-   LAMMPS class topology
-
-   This figure shows some of the relations of the base classes of the
-   LAMMPS simulation package.  Full lines indicate that a class holds an
-   instance of the class it is pointing to; dashed lines point to
-   derived classes that are given as examples of what classes may be
-   instantiated during a LAMMPS run based on the input commands and
-   accessed through the API define by their respective base classes.  At
-   the core is the :cpp:class:`LAMMPS <LAMMPS_NS::LAMMPS>` class, which
-   holds pointers to class instances with specific purposes.  Those may
-   hold instances of other classes, sometimes directly, or only
-   temporarily, sometimes as derived classes or derived classes or
-   derived classes, which may also hold instances of other classes.
-
-The :cpp:class:`LAMMPS_NS::LAMMPS` class is the topmost class and
-represents what is referred to an "instance" of LAMMPS.  It is a
-composite holding references to instances of other core classes
-providing the core functionality of the MD engine in LAMMPS and through
-them abstractions of the required operations.  The constructor of the
-LAMMPS class will instantiate those instances, process the command line
-flags, initialize MPI (if not already done) and set up file pointers for
-input and output. The destructor will shut everything down and free all
-associated memory.  Thus code for the standalone LAMMPS executable in
-``main.cpp`` simply initializes MPI, instantiates a single instance of
-LAMMPS, and passes it the command line flags and input script. It
-deletes the LAMMPS instance after the method reading the input returns
-and shuts down the MPI environment before it exits the executable.
-
-The :cpp:class:`LAMMPS_NS::Pointers` is not shown in the
-:ref:`class-topology` figure, it holds references to members of the
-`LAMMPS_NS::LAMMPS`, so that all classes derived from
-:cpp:class:`LAMMPS_NS::Pointers` have direct access to those reference.
-From the class topology all classes with blue boundary are referenced in
-this class and all classes in the second and third columns, that are not
-listed as derived classes are instead derived from
-:cpp:class:`LAMMPS_NS::Pointers`.
-
-Since all storage is encapsulated, the LAMMPS class can also be
-instantiated multiple times by a calling code, and that can be either
-simultaneously or consecutively.  When running in parallel with MPI,
-care has to be taken, that suitable communicators are used to not
-create conflicts between different instances.
-
-The LAMMPS class currently holds instances of 19 classes representing
-different core functionalities There are a handful of virtual parent
-classes in LAMMPS that define what LAMMPS calls ``styles``.  They are
-shaded red in the :ref:`class-topology` figure.  Each of these are
-parents of a number of child classes that implement the interface
-defined by the parent class.  There are two main categories of these
-``styles``: some may only have one instance active at a time (e.g. atom,
-pair, bond, angle, dihedral, improper, kspace, comm) and there is a
-dedicated pointer variable in the composite class that manages them.
-Setups that require a mix of different such styles have to use a
-*hybrid* class that manages and forwards calls to the corresponding
-sub-styles for the designated subset of atoms or data. or the composite
-class may have lists of class instances, e.g. Modify handles lists of
-compute and fix styles, while Output handles dumps class instances.
-
-The exception to this scheme are the ``command`` style classes. These
-implement specific commands that can be invoked before, after, or between
-runs or are commands which launch a simulation.  For these an instance
-of the class is created, its command() method called and then, after
-completion, the class instance deleted.  Examples for this are the
-create_box, create_atoms, minimize, run, or velocity command styles.
-
-For all those ``styles`` certain naming conventions are employed: for
-the fix nve command the class is called FixNVE and the files are
-``fix_nve.h`` and ``fix_nve.cpp``. Similarly for fix ave/time we have
-FixAveTime and ``fix_ave_time.h`` and ``fix_ave_time.cpp``. Style names
-are lower case and without spaces or special characters. A suffix or
-multiple appended with a forward slash '/' denotes a variant of the
-corresponding class without the suffix. To connect the style name and
-the class name, LAMMPS uses macros like the following ATOM\_CLASS,
-PAIR\_CLASS, BOND\_CLASS, REGION\_CLASS, FIX\_CLASS, COMPUTE\_CLASS,
-or DUMP\_CLASS in the corresponding header file.  During compilation
-files with the pattern ``style_name.h`` are created that contain include
-statements including all headers of all styles of a given type that
-are currently active (or "installed).
-
-
-More details on individual classes in the :ref:`class-topology` are as
-follows:
-
-- The Memory class handles allocation of all large vectors and arrays.
-
-- The Error class prints all error and warning messages.
-
-- The Universe class sets up partitions of processors so that multiple
-  simulations can be run, each on a subset of the processors allocated
-  for a run, e.g. by the mpirun command.
-
-- The Input class reads and processes input input strings and files,
-  stores variables, and invokes :doc:`commands <Commands_all>`.
-
-- As discussed above, command style classes are directly derived from
-  the Pointers class. They provide input script commands that perform
-  one-time operations before/after/between simulations or which invoke a
-  simulation.  They are instantiated from within the Input class,
-  invoked, then immediately destructed.
-
-- The Finish class is instantiated to print statistics to the screen
-  after a simulation is performed, by commands like run and minimize.
-
-- The Special class walks the bond topology of a molecular system to
-  find first, second, third neighbors of each atom.  It is invoked by
-  several commands, like :doc:`read_data <read_data>`,
-  :doc:`read_restart <read_restart>`, or :doc:`replicate <replicate>`.
-
-- The Atom class stores per-atom properties associated with atom styles.
-  More precisely, they are allocated and managed by a class derived from
-  the AtomVec class, and the Atom class simply stores pointers to them.
-  The classes derived from AtomVec represent the different atom styles
-  and they are instantiated through the :doc:`atom_style <atom_style>`
-  command.
-
-- The Update class holds instances of an integrator and a minimizer
-  class.  The Integrate class is a parent style for the Verlet and
-  r-RESPA time integrators, as defined by the :doc:`run_style
-  <run_style>` command.  The Min class is a parent style for various
-  energy minimizers.
-
-- The Neighbor class builds and stores neighbor lists.  The NeighList
-  class stores a single list (for all atoms).  A NeighRequest class
-  instance is created by pair, fix, or compute styles when they need a
-  particular kind of neighbor list and use the NeighRequest properties
-  to select the neighbor list settings for the given request. There can
-  be multiple instances of the NeighRequest class and the Neighbor class
-  will try to optimize how they are computed by creating copies or
-  sub-lists where possible.
-
-- The Comm class performs inter-processor communication, typically of
-  ghost atom information.  This usually involves MPI message exchanges
-  with 6 neighboring processors in the 3d logical grid of processors
-  mapped to the simulation box. There are two :doc:`communication styles
-  <comm_style>` enabling different ways to do the domain decomposition.
-  Sometimes the Irregular class is used, when atoms may migrate to
-  arbitrary processors.
-
-- The Domain class stores the simulation box geometry, as well as
-  geometric Regions and any user definition of a Lattice.  The latter
-  are defined by the :doc:`region <region>` and :doc:`lattice <lattice>`
-  commands in an input script.
-
-- The Force class computes various forces between atoms.  The Pair
-  parent class is for non-bonded or pair-wise forces, which in LAMMPS
-  also includes many-body forces such as the Tersoff 3-body potential if
-  those are computed by walking pairwise neighbor lists.  The Bond,
-  Angle, Dihedral, Improper parent classes are styles for bonded
-  interactions within a static molecular topology.  The KSpace parent
-  class is for computing long-range Coulombic interactions.  One of its
-  child classes, PPPM, uses the FFT3D and Remap classes to redistribute
-  and communicate grid-based information across the parallel processors.
-
-- The Modify class stores lists of class instances derived from the
-  :doc:`Fix <fix>` and :doc:`Compute <compute>` base classes.
-
-- The Group class manipulates groups that atoms are assigned to via the
-  :doc:`group <group>` command.  It also has functions to compute
-  various attributes of groups of atoms.
-
-- The Output class is used to generate 3 kinds of output from a LAMMPS
-  simulation: thermodynamic information printed to the screen and log
-  file, dump file snapshots, and restart files.  These correspond to the
-  :doc:`Thermo <thermo_style>`, :doc:`Dump <dump>`, and
-  :doc:`WriteRestart <write_restart>` classes respectively.  The Dump
-  class is a base class with several derived classes implementing
-  various dump style variants.
-
-- The Timer class logs timing information, output at the end
-  of a run.
-
-.. TODO section on "Spatial decomposition and parallel operations"
-..       diagram of 3d processor grid, brick vs. tiled. local vs. ghost
-..       atoms, 6-way communication with pack/unpack functions,
-..       PBC as part of the communication
-
-.. TODO section on "Fixes, Computes, and Variables"
-..      how and when data is computed and provided and how it is
-..      referenced. flags in Fix/Compute/Variable classes tell
-..      style and amount of available data.
-
-
-How a timestep works
-====================
-
-The first and most fundamental operation within LAMMPS to understand is
-how a timestep is structured.  Timestepping is performed by calling
-methods of the Integrate class instance within the Update class.  Since
-Integrate is a base class, it will point to an instance of a derived
-class corresponding to what is selected by the :doc:`run_style
-<run_style>` input script command.
-
-In this section, the timestep implemented by the Verlet class is
-described.  A similar timestep protocol is implemented by the Respa
-class, for the r-RESPA hierarchical timestepping method.
-
-The Min base class performs energy minimization, so does not perform a
-literal timestep.  But it has logic similar to what is described here,
-to compute forces and invoke fixes at each iteration of a minimization.
-Differences between time integration and minimization are highlighted at
-the end of this section.
-
-The Verlet class is encoded in the ``src/verlet.cpp`` and ``verlet.h``
-files.  It implements the velocity-Verlet timestepping algorithm.  The
-workhorse method is ``Verlet::run()``, but first we highlight several
-other methods in the class.
-
-- The ``init()`` method is called at the beginning of each dynamics
-  run.  It simply sets some internal flags, based on user settings in
-  other parts of the code.
-
-- The ``setup()`` or ``setup_minimal()`` methods are also called before
-  each run.  The velocity-Verlet method requires current forces be
-  calculated before the first timestep, so these routines compute
-  forces due to all atomic interactions, using the same logic that
-  appears in the timestepping described next.  A few fixes are also
-  invoked, using the mechanism described in the next section.  Various
-  counters are also initialized before the run begins.  The
-  ``setup_minimal()`` method is a variant that has a flag for performing
-  less setup.  This is used when runs are continued and information
-  from the previous run is still valid.  For example, if repeated
-  short LAMMPS runs are being invoked, interleaved by other commands,
-  via the *pre no* and *every* options of the run command, the
-  ``setup_minimal()`` method is used.
-
-- The ``force_clear()`` method initializes force and other arrays to
-  zero before each timestep, so that forces (torques, etc) can be
-  accumulated.
-
-Now for the ``Verlet::run()`` method.  Its basic structure in hi-level pseudo
-code is shown below.  In the actual code in ``src/verlet.cpp`` some of
-these operations are conditionally invoked.
-
-.. code-block:: python
-
-   loop over N timesteps:
-     if timeout condition: break
-     ev_set()
-
-     fix->initial_integrate()
-     fix->post_integrate()
-
-     nflag = neighbor->decide()
-     if nflag:
-       fix->pre_exchange()
-       domain->pbc()
-       domain->reset_box()
-       comm->setup()
-       neighbor->setup_bins()
-       comm->exchange()
-       comm->borders()
-       fix->pre_neighbor()
-       neighbor->build()
-       fix->post_neighbor()
-     else:
-       comm->forward_comm()
-
-     force_clear()
-     fix->pre_force()
-
-     pair->compute()
-     bond->compute()
-     angle->compute()
-     dihedral->compute()
-     improper->compute()
-     kspace->compute()
-
-     fix->pre_reverse()
-     comm->reverse_comm()
-
-     fix->post_force()
-     fix->final_integrate()
-     fix->end_of_step()
-
-     if any output on this step:
-       output->write()
-
-   # after loop
-   fix->post_run()
-
-
-The ``ev_set()`` method (in the parent Integrate class), sets two flags
-(*eflag* and *vflag*) for energy and virial computation.  Each flag
-encodes whether global and/or per-atom energy and virial should be
-calculated on this timestep, because some fix or variable or output will
-need it.  These flags are passed to the various methods that compute
-particle interactions, so that they either compute and tally the
-corresponding data or can skip the extra calculations if the energy and
-virial are not needed.  See the comments for the ``Integrate::ev_set()``
-method which document the flag values.
-
-At various points of the timestep, fixes are invoked,
-e.g. ``fix->initial_integrate()``.  In the code, this is actually done
-via the Modify class which stores all the Fix objects and lists of which
-should be invoked at what point in the timestep.  Fixes are the LAMMPS
-mechanism for tailoring the operations of a timestep for a particular
-simulation.  As described elsewhere, each fix has one or more methods,
-each of which is invoked at a specific stage of the timestep, as show in
-the timestep pseudo-code.  All the active fixes defined in an input
-script, that are flagged to have an ``initial_integrate()`` method are
-invoked at the beginning of each timestep.  Examples are :doc:`fix nve
-<fix_nve>` or :doc:`fix nvt or fix npt <fix_nh>` which perform the
-start-of-timestep velocity-Verlet integration operations to update
-velocities by a half-step, and coordinates by a full step.  The
-``post_integrate()`` method is next for operations that need to happen
-immediately after those updates.  Only a few fixes use this, e.g. to
-reflect particles off box boundaries in the :doc:`FixWallReflect class
-<fix_wall_reflect>`.
-
-The ``decide()`` method in the Neighbor class determines whether
-neighbor lists need to be rebuilt on the current timestep (conditions
-can be changed using the :doc:`neigh_modify every/delay/check
-<neigh_modify>` command.  If not, coordinates of ghost atoms are
-acquired by each processor via the ``forward_comm()`` method of the Comm
-class.  If neighbor lists need to be built, several operations within
-the inner if clause of the pseudo-code are first invoked.  The
-``pre_exchange()`` method of any defined fixes is invoked first.
-Typically this inserts or deletes particles from the system.
-
-Periodic boundary conditions are then applied by the Domain class via
-its ``pbc()`` method to remap particles that have moved outside the
-simulation box back into the box.  Note that this is not done every
-timestep, but only when neighbor lists are rebuilt.  This is so that
-each processor's sub-domain will have consistent (nearby) atom
-coordinates for its owned and ghost atoms.  It is also why dumped atom
-coordinates may be slightly outside the simulation box if not dumped
-on a step where the neighbor lists are rebuilt.
-
-The box boundaries are then reset (if needed) via the ``reset_box()``
-method of the Domain class, e.g. if box boundaries are shrink-wrapped to
-current particle coordinates.  A change in the box size or shape
-requires internal information for communicating ghost atoms (Comm class)
-and neighbor list bins (Neighbor class) be updated.  The ``setup()``
-method of the Comm class and ``setup_bins()`` method of the Neighbor
-class perform the update.
-
-The code is now ready to migrate atoms that have left a processor's
-geometric sub-domain to new processors.  The ``exchange()`` method of
-the Comm class performs this operation.  The ``borders()`` method of the
-Comm class then identifies ghost atoms surrounding each processor's
-sub-domain and communicates ghost atom information to neighboring
-processors.  It does this by looping over all the atoms owned by a
-processor to make lists of those to send to each neighbor processor.  On
-subsequent timesteps, the lists are used by the ``Comm::forward_comm()``
-method.
-
-Fixes with a ``pre_neighbor()`` method are then called.  These typically
-re-build some data structure stored by the fix that depends on the
-current atoms owned by each processor.
-
-Now that each processor has a current list of its owned and ghost
-atoms, LAMMPS is ready to rebuild neighbor lists via the ``build()``
-method of the Neighbor class.  This is typically done by binning all
-owned and ghost atoms, and scanning a stencil of bins around each
-owned atom's bin to make a Verlet list of neighboring atoms within the
-force cutoff plus neighbor skin distance.
-
-In the next portion of the timestep, all interaction forces between
-particles are computed, after zeroing the per-atom force vector via the
-``force_clear()`` method.  If the newton flag is set to *on* by the
-newton command, forces are added to both owned and ghost atoms, otherwise
-only to owned (aka local) atoms.
-
-Pairwise forces are calculated first, which enables the global virial
-(if requested) to be calculated cheaply (at O(N) cost instead of O(N**2)
-at the end of the ``Pair::compute()`` method), by a dot product of atom
-coordinates and forces.  By including owned and ghost atoms in the dot
-product, the effect of periodic boundary conditions is correctly
-accounted for.  Molecular topology interactions (bonds, angles,
-dihedrals, impropers) are calculated next (if supported by the current
-atom style).  The final contribution is from long-range Coulombic
-interactions, invoked by the KSpace class.
-
-The ``pre_reverse()`` method in fixes is used for operations that have to
-be done *before* the upcoming reverse communication (e.g. to perform
-additional data transfers or reductions for data computed during the
-force computation and stored with ghost atoms).
-
-If the newton flag is on, forces on ghost atoms are communicated and
-summed back to their corresponding owned atoms.  The ``reverse_comm()``
-method of the Comm class performs this operation, which is essentially
-the inverse operation of sending copies of owned atom coordinates to
-other processor's ghost atoms.
-
-At this point in the timestep, the total force on each (local) atom is
-known.  Additional force constraints (external forces, SHAKE, etc) are
-applied by Fixes that have a ``post_force()`` method.  The second half
-of the velocity-Verlet integration, ``final_integrate()`` is then
-performed (another half-step update of the velocities) via fixes like
-nve, nvt, npt.
-
-At the end of the timestep, fixes that contain an ``end_of_step()``
-method are invoked.  These typically perform a diagnostic calculation,
-e.g. the ave/time and ave/spatial fixes.  The final operation of the
-timestep is to perform any requested output, via the ``write()`` method
-of the Output class.  There are 3 kinds of LAMMPS output: thermodynamic
-output to the screen and log file, snapshots of atom data to a dump
-file, and restart files.  See the :doc:`thermo_style <thermo_style>`,
-:doc:`dump <dump>`, and :doc:`restart <restart>` commands for more
-details.
-
-The the flow of control during energy minimization iterations is
-similar to that of a molecular dynamics timestep.  Forces are computed,
-neighbor lists are built as needed, atoms migrate to new processors, and
-atom coordinates and forces are communicated to neighboring processors.
-The only difference is what Fix class operations are invoked when.  Only
-a subset of LAMMPS fixes are useful during energy minimization, as
-explained in their individual doc pages.  The relevant Fix class methods
-are ``min_pre_exchange()``, ``min_pre_force()``, and ``min_post_force()``.
-Each fix is invoked at the appropriate place within the minimization
-iteration.  For example, the ``min_post_force()`` method is analogous to
-the ``post_force()`` method for dynamics; it is used to alter or constrain
-forces on each atom, which affects the minimization procedure.
-
-After all iterations are completed there is a ``cleanup`` step which
-calls the ``post_run()`` method of fixes to perform operations only required
-at the end of a calculations (like freeing temporary storage or creating
-final outputs).
-
-Writing LAMMPS styles
-=====================
-
-The :doc:`Modify` section of the manual gives an overview of how LAMMPS can
-be extended by writing new classes that derive from existing
-parent classes in LAMMPS.  Here, some specific coding
-details are provided for writing code for LAMMPS.
-
-Writing a new fix style
-^^^^^^^^^^^^^^^^^^^^^^^
-
-Writing fixes is a flexible way of extending LAMMPS.  Users can
-implement many things using fixes:
-
-- changing particles attributes (positions, velocities, forces, etc.). Examples: FixNVE, FixFreeze.
-- reading/writing data. Example: FixRestart.
-- adding or modifying properties due to geometry. Example: FixWall.
-- interacting with other subsystems or external code: Examples: FixTTM, FixExternal, FixLATTE
-- saving information for analysis or future use (previous positions,
-  for instance). Examples: Fix AveTime, FixStoreState.
-
-
-All fixes are derived from the Fix base class and must have a
-constructor with the signature: ``FixPrintVel(class LAMMPS *, int, char **)``.
-
-Every fix must be registered in LAMMPS by writing the following lines
-of code in the header before include guards:
-
-.. code-block:: c
-
-   #ifdef FIX_CLASS
-   FixStyle(print/vel,FixPrintVel)
-   #else
-   /* the definition of the FixPrintVel class comes here */
-   ...
-   #endif
-
-Where ``print/vel`` is the style name of your fix in the input script and
-``FixPrintVel`` is the name of the class. The header file would be called
-``fix_print_vel.h`` and the implementation file ``fix_print_vel.cpp``.
-These conventions allow LAMMPS to automatically integrate it into the
-executable when compiling and associate your new fix class with the designated
-keyword when it parses the input script.
-
-Let's write a simple fix which will print the average velocity at the end
-of each timestep. First of all, implement a constructor:
-
-.. code-block:: C++
-
-   FixPrintVel::FixPrintVel(LAMMPS *lmp, int narg, char **arg)
-   : Fix(lmp, narg, arg)
-   {
-     if (narg < 4)
-       error->all(FLERR,"Illegal fix print/vel command");
-
-     nevery = force->inumeric(FLERR,arg[3]);
-     if (nevery <= 0)
-       error->all(FLERR,"Illegal fix print/vel command");
-   }
-
-In the constructor you should parse your fix arguments which are
-specified in the script. All fixes have pretty the same syntax:
-``fix <fix-ID> <fix group> <fix name> <fix arguments ...>``. The
-first 3 parameters are parsed by Fix base class constructor, while
-``<fix arguments>`` should be parsed by you. In our case, we need to
-specify how often we want to print an average velocity. For instance,
-once in 50 timesteps: ``fix 1 print/vel 50``. There is a special variable
-in the Fix class called ``nevery`` which specifies how often the method
-``end_of_step()`` is called. Thus all we need to do is just set it up.
-
-The next method we need to implement is ``setmask()``:
-
-.. code-block:: C++
-
-   int FixPrintVel::setmask()
-   {
-     int mask = 0;
-     mask |= FixConst::END_OF_STEP;
-     return mask;
-   }
-
-Here the user specifies which methods of your fix should be called
-during execution. The constant ``END_OF_STEP`` corresponds to the
-``end_of_step()`` method. The most important available methods that
-are called during a timestep and the order in which they are called
-are shown in the previous section.
-
-.. code-block:: C++
-
-   void FixPrintVel::end_of_step()
-   {
-     // for add3, scale3
-     using namespace MathExtra;
-
-     double** v = atom->v;
-     int nlocal = atom->nlocal;
-     double localAvgVel[4]; // 4th element for particles count
-     memset(localAvgVel, 0, 4 * sizeof(double));
-     for (int particleInd = 0; particleInd < nlocal; ++particleInd) {
-       add3(localAvgVel, v[particleInd], localAvgVel);
-     }
-     localAvgVel[3] = nlocal;
-     double globalAvgVel[4];
-     memset(globalAvgVel, 0, 4 * sizeof(double));
-     MPI_Allreduce(localAvgVel, globalAvgVel, 4, MPI_DOUBLE, MPI_SUM, world);
-     scale3(1.0 / globalAvgVel[3], globalAvgVel);
-     if ((comm->me == 0) && screen) {
-       fmt::print(screen,"{}, {}, {}\n",
-                  globalAvgVel[0], globalAvgVel[1], globalAvgVel[2]);
-     }
-   }
-
-In the code above, we use MathExtra routines defined in
-``math_extra.h``.  There are bunch of math functions to work with
-arrays of doubles as with math vectors.  It is also important to note
-that LAMMPS code should always assume to be run in parallel and that
-atom data is thus distributed across the MPI ranks.  Thus you can
-only process data from local atoms directly and need to use MPI library
-calls to combine or exchange data.  For serial execution, LAMMPS
-comes bundled with the MPI STUBS library that contains the MPI library
-function calls in dummy versions that only work for a single MPI rank.
-
-In this code we use an instance of Atom class. This object is stored
-in the Pointers class (see ``pointers.h``) which is the base class of
-the Fix base class. This object contains references to various class
-instances (the original instances are created and held by the LAMMPS
-class) with all global information about the simulation system.
-Data from the Pointers class is available to all classes inherited from
-it using protected inheritance. Hence when you write you own class,
-which is going to use LAMMPS data, don't forget to inherit from Pointers
-or pass an Pointer to it to all functions that need access. When writing
-fixes we inherit from class Fix which is inherited from Pointers so
-there is no need to inherit from it directly.
-
-The code above computes average velocity for all particles in the
-simulation.  Yet you have one unused parameter in fix call from the
-script: ``group_name``.  This parameter specifies the group of atoms
-used in the fix. So we should compute average for all particles in the
-simulation only if ``group_name == "all"``, but it can be any group.
-The group membership information of an atom is contained in the *mask*
-property of and atom and the bit corresponding to a given group is
-stored in the groupbit variable which is defined in Fix base class:
-
-.. code-block:: C++
-
-   for (int i = 0; i < nlocal; ++i) {
-     if (atom->mask[i] & groupbit) {
-     // Do all job here
-     }
-   }
-
-Class Atom encapsulates atoms positions, velocities, forces, etc. User
-can access them using particle index. Note, that particle indexes are
-usually changed every few timesteps because of neighbor list rebuilds
-and spatial sorting (to improve cache efficiency).
-
-Let us consider another Fix example: We want to have a fix which stores
-atoms position from previous time step in your fix. The local atoms
-indexes may not be valid on the next iteration. In order to handle
-this situation there are several methods which should be implemented:
-
-- ``double memory_usage()``: return how much memory the fix uses (optional)
-- ``void grow_arrays(int)``: do reallocation of the per particle arrays in your fix
-- ``void copy_arrays(int i, int j, int delflag)``: copy i-th per-particle
-  information to j-th. Used when atom sorting is performed. if delflag is set
-  and atom j owns a body, move the body information to atom i.
-- ``void set_arrays(int i)``: sets i-th particle related information to zero
-
-Note, that if your class implements these methods, it must call add calls of
-add_callback and delete_callback to constructor and destructor. Since we want
-to store positions of atoms from previous timestep, we need to add
-``double** xold`` to the header file. Than add allocation code
-to the constructor:
-
-.. code-block:: C++
-
-   FixSavePos::FixSavePos(LAMMPS *lmp, int narg, char **arg), xold(nullptr)
-   {
-   //...
-     memory->create(xold, atom->nmax, 3, "FixSavePos:x");
-     atom->add_callback(0);
-   }
-
-   FixSavePos::~FixSavePos() {
-     atom->delete_callback(id, 0);
-     memory->destroy(xold);
-   }
-
-Implement the aforementioned methods:
-
-.. code-block:: C++
-
-   double FixSavePos::memory_usage()
-   {
-     int nmax = atom->nmax;
-     double bytes = 0.0;
-     bytes += nmax * 3 * sizeof(double);
-     return bytes;
-   }
-
-   void FixSavePos::grow_arrays(int nmax)
-   {
-     memory->grow(xold, nmax, 3, "FixSavePos:xold");
-   }
-
-   void FixSavePos::copy_arrays(int i, int j, int delflag)
-   {
-     memcpy(xold[j], xold[i], sizeof(double) * 3);
-   }
-
-   void FixSavePos::set_arrays(int i)
-   {
-     memset(xold[i], 0, sizeof(double) * 3);
-   }
-
-   int FixSavePos::pack_exchange(int i, double *buf)
-   {
-     int m = 0;
-     buf[m++] = xold[i][0];
-     buf[m++] = xold[i][1];
-     buf[m++] = xold[i][2];
-
-     return m;
-   }
-
-   int FixSavePos::unpack_exchange(int nlocal, double *buf)
-   {
-     int m = 0;
-     xold[nlocal][0] = buf[m++];
-     xold[nlocal][1] = buf[m++];
-     xold[nlocal][2] = buf[m++];
-
-     return m;
-   }
-
-Now, a little bit about memory allocation. We use the Memory class which
-is just a bunch of template functions for allocating 1D and 2D
-arrays. So you need to add include ``memory.h`` to have access to them.
-
-Finally, if you need to write/read some global information used in
-your fix to the restart file, you might do it by setting flag
-``restart_global = 1`` in the constructor and implementing methods void
-``write_restart(FILE *fp)`` and ``void restart(char *buf)``.
-If, in addition, you want to write the per-atom property to restart
-files additional settings and functions are needed:
-
-- a fix flag indicating this needs to be set ``restart_peratom = 1;``
-- ``atom->add_callback()`` and ``atom->delete_callback()`` must be called
-  a second time with the final argument set to 1 instead of 0 (indicating
-  restart processing instead of per-atom data memory management).
-- the functions ``void pack_restart(int i, double *buf)`` and
-  ``void unpack_restart(int nlocal, int nth)`` need to be implemented
-
----------------------------
-
-LAMMPS utility functions
-========================
-
-The ``utils`` sub-namespace inside the ``LAMMPS_NS`` namespace provides
-a collection of convenience functions and utilities that perform common
-tasks that are required repeatedly throughout the LAMMPS code like
-reading or writing to files with error checking or translation of
-strings into specific types of numbers with checking for validity.  This
-reduces redundant implementations and encourages consistent behavior.
-
-I/O with status check
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-These are wrappers around the corresponding C library calls like
-``fgets()`` or ``fread()``.  They will check if there were errors
-on reading or an unexpected end-of-file state was reached.  In that
-case, the functions will stop the calculation with an error message,
-indicating the name of the problematic file, if possible.
-
-----------
-
-.. doxygenfunction:: sfgets
-   :project: progguide
-
-.. doxygenfunction:: sfread
-   :project: progguide
-
-String to number conversions with validity check
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-These functions should be used to convert strings to numbers. They are
-are strongly preferred over C library calls like ``atoi()`` or
-``atof()`` since they check if the **entire** provided string is a valid
-(floating-point or integer) number, and will error out instead of
-silently returning the result of a partial conversion or zero in cases
-where the string is not a valid number.  This behavior allows to more
-easily detect typos or issues when processing input files.
-
-The *do_abort* flag should be set to ``true`` in case  this function
-is called only on a single MPI rank, as that will then trigger the
-a call to ``Error::one()`` for errors instead of ``Error::all()``
-and avoids a "hanging" calculation when run in parallel.
-
-Please also see :cpp:func:`is_integer` and :cpp:func:`is_double` for
-testing strings for compliance without conversion.
-
-----------
-
-.. doxygenfunction:: numeric
-   :project: progguide
-
-.. doxygenfunction:: inumeric
-   :project: progguide
-
-.. doxygenfunction:: bnumeric
-   :project: progguide
-
-.. doxygenfunction:: tnumeric
-   :project: progguide
-
-
-String processing
-^^^^^^^^^^^^^^^^^
-
-The following are functions to help with processing strings
-and parsing files or arguments.
-
-----------
-
-.. doxygenfunction:: trim
-   :project: progguide
-
-.. doxygenfunction:: trim_comment
-   :project: progguide
-
-.. doxygenfunction:: count_words(const char *text)
-   :project: progguide
-
-.. doxygenfunction:: count_words(const std::string &text)
-   :project: progguide
-
-.. doxygenfunction:: count_words(const std::string &text, const std::string &separators)
-   :project: progguide
-
-.. doxygenfunction:: trim_and_count_words
-   :project: progguide
-
-.. doxygenfunction:: split_words
-   :project: progguide
-
-.. doxygenfunction:: strmatch
-   :project: progguide
-
-.. doxygenfunction:: is_integer
-   :project: progguide
-
-.. doxygenfunction:: is_double
-   :project: progguide
-
-File and path functions
-^^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. doxygenfunction:: guesspath
-   :project: progguide
-
-.. doxygenfunction:: path_basename
-   :project: progguide
-
-.. doxygenfunction:: path_join
-   :project: progguide
-
-.. doxygenfunction:: file_is_readable
-   :project: progguide
-
-Potential file functions
-^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. doxygenfunction:: get_potential_file_path
-   :project: progguide
-
-.. doxygenfunction:: get_potential_date
-   :project: progguide
-
-.. doxygenfunction:: get_potential_units
-   :project: progguide
-
-.. doxygenfunction:: get_supported_conversions
-   :project: progguide
-
-.. doxygenfunction:: get_conversion_factor
-   :project: progguide
-
-.. doxygenfunction:: open_potential(const std::string &name, LAMMPS *lmp, int *auto_convert)
-   :project: progguide
-
-Argument processing
-^^^^^^^^^^^^^^^^^^^
-
-.. doxygenfunction:: bounds
-   :project: progguide
-
-.. doxygenfunction:: expand_args
-   :project: progguide
-
-Convenience functions
-^^^^^^^^^^^^^^^^^^^^^
-
-.. doxygenfunction:: logmesg
-   :project: progguide
-
-.. doxygenfunction:: getsyserror
-   :project: progguide
-
-.. doxygenfunction:: check_packages_for_style
-   :project: progguide
-
-.. doxygenfunction:: timespec2seconds
-   :project: progguide
-
-.. doxygenfunction:: date2num
-   :project: progguide
-
----------------------------
-
-Tokenizer classes
-=================
-
-The purpose of the tokenizer classes is to simplify the recurring task
-of breaking lines of text down into words and/or numbers.
-Traditionally, LAMMPS code would be using the ``strtok()`` function from
-the C library for that purpose, but that function has two significant
-disadvantages: 1) it cannot be used concurrently from different LAMMPS
-instances since it stores its status in a global variable and 2) it
-modifies the string that it is processing.  These classes were
-implemented to avoid both of these issues and also to reduce the amount
-of code that needs to be written.
-
-The basic procedure is to create an instance of the tokenizer class with
-the string to be processed as an argument and then do a loop until all
-available tokens are read.  The constructor has a default set of
-separator characters, but that can be overridden. The default separators
-are all "whitespace" characters, i.e. the space character, the tabulator
-character, the carriage return character, the linefeed character, and
-the form feed character.
-
-.. code-block:: C++
-   :caption: Tokenizer class example listing entries of the PATH environment variable
-
-   #include "tokenizer.h"
-   #include <cstdlib>
-   #include <string>
-   #include <iostream>
-
-   using namespace LAMMPS_NS;
-
-   int main(int, char **)
-   {
-       const char *path = getenv("PATH");
-
-       if (path != nullptr) {
-           Tokenizer p(path,":");
-           while (p.has_next())
-               std::cout << "Entry: " << p.next() << "\n";
-       }
-       return 0;
-   }
-
-Most tokenizer operations cannot fail except for
-:cpp:func:`LAMMPS_NS::Tokenizer::next` (when used without first
-checking with :cpp:func:`LAMMPS_NS::Tokenizer::has_next`) and
-:cpp:func:`LAMMPS_NS::Tokenizer::skip`.  In case of failure, the class
-will throw an exception, so you may need to wrap the code using the
-tokenizer into a ``try`` / ``catch`` block to handle errors.  The
-:cpp:class:`LAMMPS_NS::ValueTokenizer` class may also throw an exception
-when a (type of) number is requested as next token that is not
-compatible with the string representing the next word.
-
-.. code-block:: C++
-   :caption: ValueTokenizer class example with exception handling
-
-   #include "tokenizer.h"
-   #include <cstdlib>
-   #include <string>
-   #include <iostream>
-
-   using namespace LAMMPS_NS;
-
-   int main(int, char **)
-   {
-       const char *text = "1 2 3 4 5 20.0 21 twentytwo 2.3";
-       double num1(0),num2(0),num3(0),num4(0);
-
-       ValueTokenizer t(text);
-       // read 4 doubles after skipping over 5 numbers
-       try {
-           t.skip(5);
-           num1 = t.next_double();
-           num2 = t.next_double();
-           num3 = t.next_double();
-           num4 = t.next_double();
-       } catch (TokenizerException &e) {
-           std::cout << "Reading numbers failed: " << e.what() << "\n";
-       }
-       std::cout << "Values: " << num1 << " " << num2 << " " << num3 << " " << num4 << "\n";
-       return 0;
-   }
-
-This code example should produce the following output:
-
-.. code-block::
-
-   Reading numbers failed: Not a valid floating-point number: 'twentytwo'
-   Values: 20 21 0 0
-
-----------
-
-.. doxygenclass:: LAMMPS_NS::Tokenizer
-   :project: progguide
-   :members:
-
-.. doxygenclass:: LAMMPS_NS::TokenizerException
-   :project: progguide
-   :members:
-
-.. doxygenclass:: LAMMPS_NS::ValueTokenizer
-   :project: progguide
-   :members:
-
-.. doxygenclass:: LAMMPS_NS::InvalidIntegerException
-   :project: progguide
-   :members: what
-
-.. doxygenclass:: LAMMPS_NS::InvalidFloatException
-   :project: progguide
-   :members: what
-
-File reader classes
-====================
-
-The purpose of the file reader classes is to simplify the recurring task
-of reading and parsing files. They can use the
-:cpp:class:`LAMMPS_NS::ValueTokenizer` class to process the read in
-text.  The :cpp:class:`LAMMPS_NS::TextFileReader` is a more general
-version while :cpp:class:`LAMMPS_NS::PotentialFileReader` is specialized
-to implement the behavior expected for looking up and reading/parsing
-files with potential parameters in LAMMPS.  The potential file reader
-class requires a LAMMPS instance, requires to be run on MPI rank 0 only,
-will use the :cpp:func:`LAMMPS_NS::utils::get_potential_file_path`
-function to look up and open the file, and will call the
-:cpp:class:`LAMMPS_NS::Error` class in case of failures to read or to
-convert numbers, so that LAMMPS will be aborted.
-
-.. code-block:: C++
-   :caption: Use of PotentialFileReader class in pair style coul/streitz
-
-    PotentialFileReader reader(lmp, file, "coul/streitz");
-    char * line;
-
-    while((line = reader.next_line(NPARAMS_PER_LINE))) {
-      try {
-        ValueTokenizer values(line);
-        std::string iname = values.next_string();
-
-        int ielement;
-        for (ielement = 0; ielement < nelements; ielement++)
-          if (iname == elements[ielement]) break;
-
-        if (nparams == maxparam) {
-          maxparam += DELTA;
-          params = (Param *) memory->srealloc(params,maxparam*sizeof(Param),
-                                              "pair:params");
-        }
-
-        params[nparams].ielement = ielement;
-        params[nparams].chi = values.next_double();
-        params[nparams].eta = values.next_double();
-        params[nparams].gamma = values.next_double();
-        params[nparams].zeta = values.next_double();
-        params[nparams].zcore = values.next_double();
-
-      } catch (TokenizerException & e) {
-        error->one(FLERR, e.what());
-      }
-      nparams++;
-    }
-
-A file that would be parsed by the reader code fragment looks like this:
-
-   # DATE: 2015-02-19 UNITS: metal CONTRIBUTOR: Ray Shan CITATION: Streitz and Mintmire, Phys Rev B, 50, 11996-12003 (1994)
-   #
-   # X (eV)                J (eV)          gamma (1/\AA)   zeta (1/\AA)    Z (e)
-
-   Al      0.000000        10.328655       0.000000        0.968438        0.763905
-   O       5.484763        14.035715       0.000000        2.143957        0.000000
-
-
-----------
-
-.. doxygenclass:: LAMMPS_NS::TextFileReader
-   :project: progguide
-   :members:
-
-.. doxygenclass:: LAMMPS_NS::PotentialFileReader
-   :project: progguide
-   :members:
-
-Eigensolver classes
-===============================================================
-
-The "math_eigen.h" file contains the definition of 3 template classes
-used for calculating eigenvalues and eigenvectors of matrices:
-"Jacobi", "PEigenDense", and "LambdaLanczos".
-
-"Jacobi" calculates all of the eigenvalues and eigenvectors
-of a dense, symmetric, real matrix.
-
-The "PEigenDense" class only calculates the principal eigenvalue
-(ie. the largest or smallest eigenvalue), and its corresponding eigenvector.
-However it is much more efficient than "Jacobi" when applied to large matrices
-(larger than 13x13).  PEigenDense also can understand complex-valued
-Hermitian matrices.
-
-The "LambdaLanczos" class is a generalization of "PEigenDense" which can be
-applied to arbitrary sparse matrices.
-
-Together, these matrix eigensolvers cover a fairly wide range of use cases.
-
-Note: The code described here does not take advantage of parallelization.
-(It is assumed that the matrices are small enough
-that they can be diagonalized using individual CPU cores.)
-
-.. code-block:: C++
-   :caption: Jacobi usage example
-
-   #include "math_eigen.h"
-   using namespace MathEigen;
-
-   int n = 5;       // Matrix size
-   double **M;      // A symmetric n x n matrix you want to diagonalize
-   double *evals;   // Store the eigenvalues here.
-   double **evects; // Store the eigenvectors here.
-   // Allocate space for M, evals, and evects, and load contents of M (omitted)
-
-   // Now create an instance of Jacobi ("eigen_calc"). This will allocate space
-   // for storing intermediate calculations.  Once created, it can be reused
-   // multiple times without paying the cost of allocating memory on the heap.
-
-   Jacobi<double, double*, double**> eigen_calc(n);
-
-   // Note:
-   // If the matrix you plan to diagonalize (M) is read-only, use this instead:
-   // Jacobi<double, double*, double**, double const*const*> eigen_calc(n);
-
-   // Now, calculate the eigenvalues and eigenvectors of M
-   eigen_calc.Diagonalize(M, evals, evects);
-
-The Jacobi class is not limited to double** matrices.  It works on any C or C++
-object that supports indexing using [i][j] bracket notation.
-For example, if you prefer using std::vectors, then define a
-Jacobi instance this way instead:
-
-.. code-block:: C++
-   :caption: Jacobi std::vector example
-
-   Jacobi<double, vector<double>&, vector<vector<double>>&, const vector<vector<double>>&> eigen_calc(n);
-
-
-The PEigenDense class is useful for diagonalizing larger matrices
-which can be real (symmetric) or complex-valued (Hermitian):
-
-.. code-block:: C++
-   :caption: PEigenDense usage example
-
-   #include "math_eigen.h"
-   using namespace MathEigen;
-
-   const int n = 100;
-
-   PEigenDense<double, double*, double const*const*>  pe(n);
-
-   double **M;       // A symmetric n x n matrix you want to diagonalize
-   double evect[n];  // Store the principal eigenvector here.
-
-   // Now, allocate space for M and load it's contents. (omitted)
-
-   double eval = pe.PrincipalEigen(M, evect, true);
-
-   // This calculates only the maximum eigenvalue and its eigenvector
-
-
-The "LambdaLanczos" class generalizes "PEigenDense" by allowing the user
-to diagonalize arbitrary sparse matrices.  The "LambdaLanczos" class
-does not need to know how the matrices are implemented or stored in memory.
-Instead, users supply a function as an argument to the "LambdaLanczos"
-constructor (a lambda expression) that multiplies vectors by matrices. The
-specific implementation details are never revealed to the LambdaLanczos class.
-This allows users to choose arbitrary data structures to represent
-(sparse or dense) matrices.
-
-Note: If the matrix is not positive or negative definite,
-then user must specify an "eigenvalue_offset" parameter.  (See below.)
-
-Note: Both "LambdaLanczos" and "PEigenDense" use the Lanczos algorithm.
-
-.. code-block:: C++
-   :caption: LambdaLanczos usage example
-
-   #include "math_eigen.h"
-   using namespace MathEigen;
-
-   const int n = 3;
-   double M[n][n] = { {-1.0, -1.0, 1.0},
-                      {-1.0, 1.0, 1.0},
-                      { 1.0, 1.0, 1.0} };
-   // (Its eigenvalues are {-2, 1, 2})
-
-   // Specify the matrix-vector multiplication function
-   auto mv_mul = [&](const vector<double>& in, vector<double>& out) {
-     for(int i = 0;i < n;i++) {
-       for(int j = 0;j < n;j++) {
-         out[i] += M[i][j]*in[j];
-       }
-     }
-   };
-
-   LambdaLanczos<double> engine(mv_mul, n, true);
-   //(Setting 3rd arg (find_maximum) to true calculates the largest eigenvalue.)
-
-   engine.eigenvalue_offset = 3.0;   // = max_i{sum_j|Mij|}  (see below)
-
-   double eigenvalue; //(must never be a complex number, even if M is complex)
-   vector<double> eigenvector(n);
-
-   int itern = engine.run(eigenvalue, eigenvector);
-
-   cout << "Iteration count: " << itern << endl;
-   cout << "Eigenvalue: " << eigenvalue << endl;
-   cout << "Eigenvector:";
-   for(int i = 0; i < n; i++) {
-     cout << eigenvector[i] << " ";
-   }
-   cout << endl;
-
-In this example, an small dense square matrix was used for simplicity.
-One could however, implement a large sparse matrix whose elements are
-stored as a list of {row-index, column-index, value} tuples,
-and modify the "mv_mult" function accordingly.
-
-
-IMPORTANT:
-The Lanczos algorithm finds the largest magnitude eigenvalue, so you
-MUST ensure that the eigenvalue you are seeking has the largest magnitude
-(regardless of whether it is the maximum or minimum eigenvalue).
-To insure that this is so, you can add or subtract a number to all
-of the eigenvalues of the matrix by specifying the "eigenvalue_offset".
-This number should exceed the largest magnitude eigenvalue of the matrix.
-According to the Gershgorin theorem, you can estimate this number using
-
-r = max_i{sum_j|Mij|}
-or
-r = max_j{sum_i|Mij|}
-
-(where Mij are the elements of the matrix and sum_j denotes the sum over j).
-
-If you are seeking the maximum eigenvalue, then use:
-
-   eigenvalue_offset = +r
-
-If you are seeking the minimum eigenvalue, use:
-
-   eigenvalue_offset = -r
-
-You can omit this step if you are seeking the maximum eigenvalue,
-and the matrix is positive definite, or if you are seeking the minimum
-eigenvalue and the matrix is negative definite.)
-Otherwise, for dense (or mostly-dense) matrices, you can use the
-"ChooseOffset()" member function to pick the eigenvalue_offset automatically.
-Otherwise, the eigenvalue_offset MUST be specified by the user explicitly.
-(LambdaLanczos is ignorant of the way the matrix is implemented internally,
-so it does not have an efficient and general way to access the
-elements of a sparse matrix.)
-
-----------
-
-.. doxygenclass:: MathEigen::Jacobi
-   :project: progguide
-   :members:
-
-.. doxygenclass:: MathEigen::PEigenDense
-   :project: progguide
-   :members:
-
-.. doxygenclass:: MathEigen::LambdaLanczos
-   :project: progguide
-   :members:
-
+.. toctree::
+   :maxdepth: 1
+
+   pg_dev_org
+   pg_dev_flow
+   pg_dev_write
+   pg_dev_utils
+   pg_dev_classes
diff --git a/doc/src/pg_input.rst b/doc/src/pg_input.rst
new file mode 100644
index 0000000000..72fdd93578
--- /dev/null
+++ b/doc/src/pg_input.rst
@@ -0,0 +1,7 @@
+LAMMPS Input Base Class
+************************
+
+.. doxygenclass:: LAMMPS_NS::Input
+      :project: progguide
+      :members:
+
diff --git a/doc/src/pg_lammps.rst b/doc/src/pg_lammps.rst
new file mode 100644
index 0000000000..efa282e33a
--- /dev/null
+++ b/doc/src/pg_lammps.rst
@@ -0,0 +1,22 @@
+LAMMPS Class
+************
+
+The LAMMPS class is encapsulating an MD simulation state and thus it is
+the class that needs to be created when starting a new simulation system
+state.  The LAMMPS executable essentially creates one instance of this
+class and passes the command line flags and tells it to process the
+provided input (a file or ``stdin``).  It shuts the class down when
+control is returned to it and then exits.  When using LAMMPS as a
+library from another code it is required to create an instance of this
+class, either directly from C++ with ``new LAMMPS()`` or through one
+of the library interface functions like :cpp:func:`lammps_open` of the
+C-library interface, or the :py:class:`lammps.lammps` class constructor
+of the Python module, or the :f:func:`lammps` constructor of the Fortran
+module.
+
+--------------------
+
+.. doxygenclass:: LAMMPS_NS::LAMMPS
+   :project: progguide
+   :members:
+
diff --git a/doc/utils/requirements.txt b/doc/utils/requirements.txt
index 2312037b5c..e025e23b09 100644
--- a/doc/utils/requirements.txt
+++ b/doc/utils/requirements.txt
@@ -1,5 +1,6 @@
 Sphinx
 sphinxcontrib-spelling
 git+https://github.com/akohlmey/sphinx-fortran@parallel-read
+sphinx_tabs
 breathe
 Pygments
diff --git a/doc/utils/sphinx-config/_themes/lammps_theme/static/css/lammps.css b/doc/utils/sphinx-config/_themes/lammps_theme/static/css/lammps.css
index b5637b389c..b9816c3f8c 100644
--- a/doc/utils/sphinx-config/_themes/lammps_theme/static/css/lammps.css
+++ b/doc/utils/sphinx-config/_themes/lammps_theme/static/css/lammps.css
@@ -3,3 +3,7 @@
     display: block;
     margin-bottom: 0.809em;
 }
+
+.versionmodified {
+    font-weight: bold;
+}
diff --git a/doc/utils/sphinx-config/conf.py.in b/doc/utils/sphinx-config/conf.py.in
index c6b161ee14..23168bf080 100644
--- a/doc/utils/sphinx-config/conf.py.in
+++ b/doc/utils/sphinx-config/conf.py.in
@@ -47,7 +47,9 @@ extensions = [
     'sphinx.ext.imgmath',
     'sphinx.ext.autodoc',
     'sphinxfortran.fortran_domain',
+    'sphinx_tabs.tabs',
     'table_from_list',
+    'tab_or_note',
     'breathe',
 ]
 # 2017-12-07: commented out, since this package is broken with Sphinx 16.x
diff --git a/doc/utils/sphinx-config/false_positives.txt b/doc/utils/sphinx-config/false_positives.txt
index 53d8ca254d..68646be308 100644
--- a/doc/utils/sphinx-config/false_positives.txt
+++ b/doc/utils/sphinx-config/false_positives.txt
@@ -1321,6 +1321,7 @@ initializations
 initio
 InP
 inregion
+instantiation
 Institut
 integrators
 Integrators
@@ -1777,6 +1778,7 @@ Mattox
 Mattson
 maxangle
 maxbond
+maxchunk
 maxelt
 maxeval
 maxfiles
@@ -2002,6 +2004,7 @@ MxN
 myCompute
 myIndex
 mylammps
+MyPool
 mysocket
 myTemp
 myVec
@@ -3492,6 +3495,7 @@ zz
 Zm
 PowerShell
 filesystems
+Zstandard
 Zstd
 zstd
 checksum
\ No newline at end of file
diff --git a/fortran/lammps.f90 b/fortran/lammps.f90
index ba2e376b77..6511673c66 100644
--- a/fortran/lammps.f90
+++ b/fortran/lammps.f90
@@ -54,12 +54,11 @@ MODULE LIBLAMMPS
 
   ! interface definitions for calling functions in library.cpp
   INTERFACE
-      FUNCTION lammps_open(argc,argv,comm,handle) &
+      FUNCTION lammps_open(argc,argv,comm) &
           BIND(C, name='lammps_open_fortran')
         IMPORT :: c_ptr, c_int
         INTEGER(c_int), VALUE, INTENT(in)     :: argc, comm
         TYPE(c_ptr), DIMENSION(*), INTENT(in) :: argv
-        TYPE(c_ptr), INTENT(out)              :: handle
         TYPE(c_ptr)                           :: lammps_open
       END FUNCTION lammps_open
 
@@ -161,7 +160,7 @@ CONTAINS
     ENDIF
 
     IF (PRESENT(comm)) THEN
-        lmp_open%handle = lammps_open(argc,argv,comm,dummy)
+        lmp_open%handle = lammps_open(argc,argv,comm)
     ELSE
         lmp_open%handle = lammps_open_no_mpi(argc,argv,dummy)
     END IF
diff --git a/lib/compress/Makefile.lammps b/lib/compress/Makefile.lammps
index 2d06990d82..c0a42443ba 100644
--- a/lib/compress/Makefile.lammps
+++ b/lib/compress/Makefile.lammps
@@ -1,21 +1,24 @@
 # This file contains the settings to build and link LAMMPS with
 # support for data compression libraries.
-# 
+#
 # When you build LAMMPS with the COMPRESS package installed, it will
 # use the 3 settings in this file.  They should be set as follows.
-# 
-# The compress_SYSLIB setting is for linking the compression library.
-# By default, the setting will point to zlib (-lz).
-# 
-# The compress_SYSINC and compress_SYSPATH variables do not typically need
-# to be set, as compression libraries are usually installed as packages
-# in system locations. Otherwise, specify its directory via the
-# compress_SYSPATH variable, e.g. -Ldir or compress_SYSINC variable( -Idir)
+#
+# The compress_SYSLIB setting is for linking the compression libraries.
+# By default, the setting will point to zlib (-lz). For including
+# Zstandard support add -DLAMMPS_ZSTD to compress_SYSINC and also
+# add -lzstd to compress_SYSLIB to link to the library.
+#
+# The compress_SYSINC and compress_SYSPATH variables typically do not
+# need any additional settings, as compression libraries are usually
+# installed as packages in system locations. Otherwise, specify its
+# library directory via the compress_SYSPATH variable, e.g. -Ldir or
+# its include directory via the compress_SYSINC variable( -Idir)
 
 # -----------------------------------------------------------
 
 # Settings that the LAMMPS build will import when this package is installed
 
-compress_SYSINC =
-compress_SYSLIB = -lz
+compress_SYSINC = # -DLAMMPS_ZSTD
+compress_SYSLIB = -lz # -lzstd
 compress_SYSPATH =
diff --git a/src/Purge.list b/src/Purge.list
index ebe1a9c484..0251f923be 100644
--- a/src/Purge.list
+++ b/src/Purge.list
@@ -49,6 +49,8 @@ packages_ntopo.h
 # other auto-generated files
 lmpinstalledpkgs.h
 lmpgitversion.h
+# removed on 9 Sep 2020
+mergesort.h
 # renamed on 8 May 2020
 fix_meso.cpp
 fix_meso.h
diff --git a/src/REPLICA/fix_hyper_local.cpp b/src/REPLICA/fix_hyper_local.cpp
index e57419022c..da444bc451 100644
--- a/src/REPLICA/fix_hyper_local.cpp
+++ b/src/REPLICA/fix_hyper_local.cpp
@@ -164,7 +164,7 @@ FixHyperLocal::FixHyperLocal(LAMMPS *lmp, int narg, char **arg) :
   maxbondperatom = FCCBONDS;
   numcoeff = NULL;
   clist = NULL;
-  cpage = new MyPage<OneCoeff>;
+  cpage = new MyPage<HyperOneCoeff>;
   cpage->init(maxbondperatom,1024*maxbondperatom,1);
 
   // set comm sizes needed by this fix
@@ -976,7 +976,7 @@ void FixHyperLocal::build_bond_list(int natom)
     memory->sfree(clist);
     maxcoeff = atom->nmax;
     memory->create(numcoeff,maxcoeff,"hyper/local:numcoeff");
-    clist = (OneCoeff **) memory->smalloc(maxcoeff*sizeof(OneCoeff *),
+    clist = (HyperOneCoeff **) memory->smalloc(maxcoeff*sizeof(HyperOneCoeff *),
                                          "hyper/local:clist");
   }
 
@@ -1741,7 +1741,7 @@ double FixHyperLocal::memory_usage()
   bytes += 2*maxall * sizeof(double);             // maxstrain,maxstrain_domain
   if (checkbias) bytes += maxall * sizeof(tagint);  // biasflag
   bytes += maxcoeff * sizeof(int);                // numcoeff
-  bytes += maxcoeff * sizeof(OneCoeff *);         // clist
-  bytes += maxlocal*maxbondperatom * sizeof(OneCoeff);  // cpage estimate
+  bytes += maxcoeff * sizeof(HyperOneCoeff *);         // clist
+  bytes += maxlocal*maxbondperatom * sizeof(HyperOneCoeff);  // cpage estimate
   return bytes;
 }
diff --git a/src/REPLICA/fix_hyper_local.h b/src/REPLICA/fix_hyper_local.h
index 4fb54d166c..aa3f050bb9 100644
--- a/src/REPLICA/fix_hyper_local.h
+++ b/src/REPLICA/fix_hyper_local.h
@@ -23,6 +23,8 @@ FixStyle(hyper/local,FixHyperLocal)
 #include "fix_hyper.h"
 
 namespace LAMMPS_NS {
+  // forward declaration. struct HyperOneCoeff is defined in my_page.h
+  struct HyperOneCoeff;
 
 class FixHyperLocal : public FixHyper {
  public:
@@ -183,13 +185,8 @@ class FixHyperLocal : public FixHyper {
 
   // data structs for persisting bias coeffs when bond list is reformed
 
-  struct OneCoeff {
-    double biascoeff;
-    tagint tag;
-  };
-
-  MyPage<OneCoeff> *cpage;     // pages of OneCoeff datums for clist
-  OneCoeff **clist;            // ptrs to vectors of bias coeffs for each atom
+  MyPage<HyperOneCoeff> *cpage;// pages of OneCoeff datums for clist
+  HyperOneCoeff **clist;       // ptrs to vectors of bias coeffs for each atom
   int *numcoeff;               // # of bias coeffs per atom (one per bond)
   int maxcoeff;                // allocate sized of clist and numcoeff
 
diff --git a/src/atom.cpp b/src/atom.cpp
index 90dce44764..49ff262764 100644
--- a/src/atom.cpp
+++ b/src/atom.cpp
@@ -47,6 +47,33 @@ using namespace MathConst;
 
 /* ---------------------------------------------------------------------- */
 
+/** \class LAMMPS_NS::Atom
+ *  \brief Class to provide access to atom data
+
+\verbatim embed:rst
+The Atom class provides access to atom style related global settings and
+per-atom data that is stored with atoms and migrates with them from
+sub-domain to sub-domain as atoms move around.  This includes topology
+data, which is stored with either one specific atom or all atoms involved
+depending on the settings of the :doc:`newton command <newton>`.
+
+The actual per-atom data is allocated and managed by one of the various
+classes derived from the AtomVec class as determined by
+the :doc:`atom_style command <atom_style>`.  The pointers in the Atom class
+are updated by the AtomVec class as needed.
+\endverbatim
+ */
+
+/** Atom class constructor
+ *
+ * This resets and initialized all kinds of settings,
+ * parameters, and pointer variables for per-atom arrays.
+ * This also initializes the factory for creating
+ * instances of classes derived from the AtomVec base
+ * class, which correspond to the selected atom style.
+ *
+ * \param  lmp  pointer to the base LAMMPS class */
+
 Atom::Atom(LAMMPS *lmp) : Pointers(lmp)
 {
   natoms = 0;
@@ -688,7 +715,6 @@ AtomVec *Atom::avec_creator(LAMMPS *lmp)
   return new T(lmp);
 }
 
-
 /* ---------------------------------------------------------------------- */
 
 void Atom::init()
@@ -2301,12 +2327,17 @@ int Atom::find_custom(const char *name, int &flag)
   return -1;
 }
 
-/* ----------------------------------------------------------------------
-   add a custom variable with name of type flag = 0/1 for int/double
-   assumes name does not already exist
-   return index in ivector or dvector of its location
-------------------------------------------------------------------------- */
+/** \brief Add a custom per-atom property with the given name and type
+\verbatim embed:rst
 
+This function will add a custom per-atom property with the name "name"
+as either list of int or double to the list of custom properties.  This
+function is called, e.g. from :doc:`fix property/atom <fix_property_atom>`.
+\endverbatim
+ * \param name Name of the property (w/o a "d_" or "i_" prefix)
+ * \param flag Data type of property: 0 for int, 1 for double
+ * \return Index of property in the respective list of properties
+ */
 int Atom::add_custom(const char *name, int flag)
 {
   int index;
@@ -2338,12 +2369,19 @@ int Atom::add_custom(const char *name, int flag)
   return index;
 }
 
-/* ----------------------------------------------------------------------
-   remove a custom variable of type flag = 0/1 for int/double at index
-   free memory for vector and name and set ptrs to NULL
-   ivector/dvector and iname/dname lists never shrink
-------------------------------------------------------------------------- */
-
+/*! \brief Remove a custom per-atom property of a given type
+ *
+\verbatim embed:rst
+This will remove a property that was requested e.g. by the
+:doc:`fix property/atom <fix_property_atom>` command.  It frees the
+allocated memory and sets the pointer to ``NULL`` to the entry in
+the list can be reused. The lists of those pointers will never be
+compacted or never shrink, so that index to name mappings remain valid.
+\endverbatim
+ *
+ * \param flag whether the property is integer (=0) or double (=1)
+ * \param index of that property in the respective list.
+ */
 void Atom::remove_custom(int flag, int index)
 {
   if (flag == 0) {
@@ -2359,16 +2397,123 @@ void Atom::remove_custom(int flag, int index)
   }
 }
 
-/* ----------------------------------------------------------------------
-   return a pointer to a named internal variable
-   if don't recognize name, return NULL
-------------------------------------------------------------------------- */
+/** Provide access to internal data of the Atom class by keyword
+ *
+\verbatim embed:rst
+
+This function is a way to access internal per-atom data.  This data is
+distributed across MPI ranks and thus only the data for "local" atoms
+are expected to be available.  Whether also data for "ghost" atoms is
+stored and up-to-date depends on various simulation settings.
+
+This table lists a large part of the supported names, their data types,
+length of the data area, and a short description.
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name
+     - Type
+     - Items per atom
+     - Description
+   * - mass
+     - double
+     - 1
+     - per-type mass. This array is **NOT** a per-atom array
+       but of length ``ntypes+1``, element 0 is ignored.
+   * - id
+     - tagint
+     - 1
+     - atom ID of the particles
+   * - type
+     - int
+     - 1
+     - atom type of the particles
+   * - mask
+     - int
+     - 1
+     - bitmask for mapping to groups. Individual bits are set
+       to 0 or 1 for each group.
+   * - image
+     - imageint
+     - 1
+     - 3 image flags encoded into a single integer.
+       See :cpp:func:`lammps_encode_image_flags`.
+   * - x
+     - double
+     - 3
+     - x-, y-, and z-coordinate of the particles
+   * - v
+     - double
+     - 3
+     - x-, y-, and z-component of the velocity of the particles
+   * - f
+     - double
+     - 3
+     - x-, y-, and z-component of the force on the particles
+   * - molecule
+     - int
+     - 1
+     - molecule ID of the particles
+   * - q
+     - double
+     - 1
+     - charge of the particles
+   * - mu
+     - double
+     - 3
+     - dipole moment of the particles
+   * - omega
+     - double
+     - 3
+     - x-, y-, and z-component of rotational velocity of the particles
+   * - angmom
+     - double
+     - 3
+     - x-, y-, and z-component of angular momentum of the particles
+   * - torque
+     - double
+     - 3
+     - x-, y-, and z-component of the torque on the particles
+   * - radius
+     - double
+     - 1
+     - radius of the (extended) particles
+   * - rmass
+     - double
+     - 1
+     - per-atom mass of the particles. ``NULL`` if per-type masses are
+       used. See the :cpp:func:`rmass_flag<lammps_extract_setting>` setting.
+   * - ellipsoid
+     - int
+     - 1
+     - 1 if the particle is an ellipsoidal particle, 0 if not
+   * - line
+     - int
+     - 1
+     - 1 if the particle is a line particle, 0 if not
+   * - tri
+     - int
+     - 1
+     - 1 if the particle is a triangulated particle, 0 if not
+   * - body
+     - int
+     - 1
+     - 1 if the particle is a body particle, 0 if not
+
+\endverbatim
+ *
+ * \param  name  string with the keyword of the desired property.
+                 Typically the name of the pointer variable returned
+ * \return       pointer to the requested data cast to ``void *`` or NULL */
 
 void *Atom::extract(char *name)
 {
   // --------------------------------------------------------------------
   // 4th customization section: customize by adding new variable name
 
+  /* NOTE: this array is only of length ntypes+1 */
   if (strcmp(name,"mass") == 0) return (void *) mass;
 
   if (strcmp(name,"id") == 0) return (void *) tag;
@@ -2389,6 +2534,7 @@ void *Atom::extract(char *name)
   if (strcmp(name,"ellipsoid") == 0) return (void *) ellipsoid;
   if (strcmp(name,"line") == 0) return (void *) line;
   if (strcmp(name,"tri") == 0) return (void *) tri;
+  if (strcmp(name,"body") == 0) return (void *) body;
 
   if (strcmp(name,"vfrac") == 0) return (void *) vfrac;
   if (strcmp(name,"s0") == 0) return (void *) s0;
diff --git a/src/atom_vec_body.cpp b/src/atom_vec_body.cpp
index 608c803e20..e9044519c9 100644
--- a/src/atom_vec_body.cpp
+++ b/src/atom_vec_body.cpp
@@ -555,7 +555,7 @@ bigint AtomVecBody::memory_usage_bonus()
 {
   bigint bytes = 0;
   bytes += nmax_bonus*sizeof(Bonus);
-  bytes += icp->size + dcp->size;
+  bytes += icp->size() + dcp->size();
 
   int nall = nlocal_bonus + nghost_bonus;
   for (int i = 0; i < nall; i++) {
diff --git a/src/dump.cpp b/src/dump.cpp
index ecb87b79fd..febc3d0821 100644
--- a/src/dump.cpp
+++ b/src/dump.cpp
@@ -32,8 +32,6 @@ using namespace LAMMPS_NS;
 #if defined(LMP_QSORT)
 // allocate space for static class variable
 Dump *Dump::dumpptr;
-#else
-#include "mergesort.h"
 #endif
 
 #define BIG 1.0e20
@@ -766,9 +764,9 @@ void Dump::sort()
 #else
   if (!reorderflag) {
     for (i = 0; i < nme; i++) index[i] = i;
-    if (sortcol == 0) merge_sort(index,nme,(void *)this,idcompare);
-    else if (sortorder == ASCEND) merge_sort(index,nme,(void *)this,bufcompare);
-    else merge_sort(index,nme,(void *)this,bufcompare_reverse);
+    if (sortcol == 0) utils::merge_sort(index,nme,(void *)this,idcompare);
+    else if (sortorder == ASCEND) utils::merge_sort(index,nme,(void *)this,bufcompare);
+    else utils::merge_sort(index,nme,(void *)this,bufcompare_reverse);
   }
 #endif
 
diff --git a/src/input.cpp b/src/input.cpp
index 1cb7893871..6456f6c229 100644
--- a/src/input.cpp
+++ b/src/input.cpp
@@ -60,6 +60,37 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
+/** \class LAMMPS_NS::Input
+ *  \brief Class for processing commands and input files
+ *
+\verbatim embed:rst
+
+The Input class contains methods for reading, pre-processing and
+parsing LAMMPS commands and input files and will dispatch commands
+to the respective class instances or contains the code to execute
+the commands directly.  It also contains the instance of the
+Variable class which performs computations and text substitutions.
+
+\endverbatim */
+
+/** Input class constructor
+ *
+\verbatim embed:rst
+
+This sets up the input processing, processes the *-var* and *-echo*
+command line flags, holds the factory of commands and creates and
+initializes an instance of the Variable class.
+
+To execute a command, a specific class instance, derived from
+:cpp:class:`Pointers`, is created, then its ``command()`` member
+function executed, and finally the class instance is deleted.
+
+\endverbatim
+ *
+ * \param  lmp   pointer to the base LAMMPS class
+ * \param  argc  number of entries in *argv*
+ * \param  argv  argument vector  */
+
 Input::Input(LAMMPS *lmp, int argc, char **argv) : Pointers(lmp)
 {
   MPI_Comm_rank(world,&me);
@@ -137,10 +168,15 @@ Input::~Input()
   delete command_map;
 }
 
-/* ----------------------------------------------------------------------
-   process all input from infile
-   infile = stdin or file if command-line arg "-in" was used
-------------------------------------------------------------------------- */
+/** Process all input from the ``FILE *`` pointer *infile*
+ *
+\verbatim embed:rst
+
+This will read lines from *infile*, parse and execute them until the end
+of the file is reached.  The *infile* pointer will usually point to
+``stdin`` or the input file given with the ``-in`` command line flag.
+
+\endverbatim */
 
 void Input::file()
 {
@@ -229,10 +265,21 @@ void Input::file()
   }
 }
 
-/* ----------------------------------------------------------------------
-   process all input from file at filename
-   mostly called from library interface
-------------------------------------------------------------------------- */
+/** Process all input from the file *filename*
+ *
+\verbatim embed:rst
+
+This function opens the file at the path *filename*, put the current
+file pointer stored in *infile* on a stack and instead assign *infile*
+with the newly opened file pointer.  Then it will call the
+:cpp:func:`Input::file() <LAMMPS_NS::Input::file()>` function to read,
+parse and execute the contents of that file.  When the end of the file
+is reached, it is closed and the previous file pointer from the infile
+file pointer stack restored to *infile*.
+
+\endverbatim
+ *
+ * \param  filename  name of file with LAMMPS commands */
 
 void Input::file(const char *filename)
 {
@@ -263,11 +310,19 @@ void Input::file(const char *filename)
   }
 }
 
-/* ----------------------------------------------------------------------
-   invoke one command in single
-   first copy to line, then parse, then execute it
-   return command name to caller
-------------------------------------------------------------------------- */
+/** Process a single command from a string in *single*
+ *
+\verbatim embed:rst
+
+This function takes the text in *single*, makes a copy, parses that,
+executes the command and returns the name of the command (without the
+arguments).  If there was no command in *single* it will return
+``NULL``.
+
+\endverbatim
+ *
+ * \param  single  string with LAMMPS command
+ * \return         string with name of the parsed command w/o arguments */
 
 char *Input::one(const std::string &single)
 {
diff --git a/src/irregular.cpp b/src/irregular.cpp
index f6de6947bd..fbb572fd88 100644
--- a/src/irregular.cpp
+++ b/src/irregular.cpp
@@ -31,7 +31,6 @@ using namespace LAMMPS_NS;
 int *Irregular::proc_recv_copy;
 static int compare_standalone(const void *, const void *);
 #else
-#include "mergesort.h"
 // prototype for non-class function
 static int compare_standalone(const int, const int, void *);
 #endif
@@ -441,7 +440,7 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag)
     proc_recv_copy = proc_recv;
     qsort(order,nrecv_proc,sizeof(int),compare_standalone);
 #else
-    merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone);
+    utils::merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone);
 #endif
 
     int j;
@@ -715,7 +714,7 @@ int Irregular::create_data(int n, int *proclist, int sortflag)
     proc_recv_copy = proc_recv;
     qsort(order,nrecv_proc,sizeof(int),compare_standalone);
 #else
-    merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone);
+    utils::merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone);
 #endif
 
     int j;
@@ -889,7 +888,7 @@ int Irregular::create_data_grouped(int n, int *procs, int sortflag)
     proc_recv_copy = proc_recv;
     qsort(order,nrecv_proc,sizeof(int),compare_standalone);
 #else
-    merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone);
+    utils::merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone);
 #endif
 
     int j;
diff --git a/src/lammps.cpp b/src/lammps.cpp
index 4d643c327c..f6952354ba 100644
--- a/src/lammps.cpp
+++ b/src/lammps.cpp
@@ -80,14 +80,30 @@ struct LAMMPS_NS::package_styles_lists {
 
 using namespace LAMMPS_NS;
 
-/* ----------------------------------------------------------------------
-   start up LAMMPS
-   allocate fundamental classes (memory, error, universe, input)
-   parse input switches
-   initialize communicators, screen & logfile output
-   input is allocated at end after MPI info is setup
-------------------------------------------------------------------------- */
+/** \class LAMMPS_NS::LAMMPS
+ * \brief LAMMPS simulation instance
+ *
+ * The LAMMPS class contains pointers of all constituent class instances
+ * and global variables that are used by a LAMMPS simulation. Its contents
+ * represent the entire state of the simulation.
+ *
+ * The LAMMPS class manages the components of an MD simulation by creating,
+ * deleting, and initializing instances of the classes it is composed of,
+ * processing command line flags, and providing access to some global properties.
+ * The specifics of setting up and running a simulation are handled by the
+ * individual component class instances. */
 
+/** Create a LAMMPS simulation instance
+ *
+ * The LAMMPS constructor starts up a simulation by allocating all
+ * fundamental classes in the necessary order, parses input switches
+ * and their arguments, initializes communicators, screen and logfile
+ * output FILE pointers.
+ *
+ * \param narg number of arguments
+ * \param arg list of arguments
+ * \param communicator MPI communicator used by this LAMMPS instance
+ */
 LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator) :
   memory(NULL), error(NULL), universe(NULL), input(NULL), atom(NULL),
   update(NULL), neighbor(NULL), comm(NULL), domain(NULL), force(NULL),
@@ -636,14 +652,13 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator) :
   }
 }
 
-/* ----------------------------------------------------------------------
-   shutdown LAMMPS
-   delete top-level classes
-   close screen and log files in world and universe
-   output files were already closed in destroy()
-   delete fundamental classes
-------------------------------------------------------------------------- */
-
+/** Shut down a LAMMPS simulation instance
+ *
+ * The LAMMPS destructor shuts down the simulation by deleting top-level class
+ * instances, closing screen and log files for the global instance (aka "world")
+ * and files and MPI communicators in sub-partitions ("universes"). Then it
+ * deletes the fundamental class instances and copies of data inside the class.
+ */
 LAMMPS::~LAMMPS()
 {
   const int me = comm->me;
@@ -989,6 +1004,11 @@ void _noopt LAMMPS::init_pkg_lists()
 #undef REGION_CLASS
 }
 
+/** Return true if a LAMMPS package is enabled in this binary
+ *
+ * \param pkg name of package
+ * \return true if yes, else false
+ */
 bool LAMMPS::is_installed_pkg(const char *pkg)
 {
   for (int i=0; installed_packages[i] != NULL; ++i)
@@ -1005,6 +1025,16 @@ bool LAMMPS::is_installed_pkg(const char *pkg)
     }                                                                   \
   }
 
+/** \brief Return name of package that a specific style belongs to
+ *
+ * This function checks the given name against all list of styles
+ * for all type of styles and if the name and the style match, it
+ * returns which package this style belongs to.
+ *
+ * \param style Type of style (e.g. atom, pair, fix, etc.)
+ * \param name Name of style
+ * \return Name of the package this style is part of
+ */
 const char *LAMMPS::match_style(const char *style, const char *name)
 {
   check_for_match(angle,style,name);
diff --git a/src/lammps.h b/src/lammps.h
index dc2916f214..e1fcc0e319 100644
--- a/src/lammps.h
+++ b/src/lammps.h
@@ -85,8 +85,10 @@ class LAMMPS {
   struct package_styles_lists *pkg_lists;
   void init_pkg_lists();
   void help();
-  LAMMPS() {};                   // prohibit using the default constructor
-  LAMMPS(const LAMMPS &) {};     // prohibit using the copy constructor
+  /// Default constructor. Declared private to prohibit its use
+  LAMMPS() {};
+  /// Copy constructor. Declared private to prohibit its use
+  LAMMPS(const LAMMPS &) {};
 };
 
 }
diff --git a/src/library.cpp b/src/library.cpp
index 3f37aecd7a..72105bdd15 100644
--- a/src/library.cpp
+++ b/src/library.cpp
@@ -108,18 +108,23 @@ thus is otherwise ignored.  However ``argc`` may be set to 0 and then
 ``argv`` may be ``NULL``.  If MPI is not yet initialized, ``MPI_Init()``
 will be called during creation of the LAMMPS class instance.
 
-The function returns a pointer to the created LAMMPS class. If for some
-reason the initialization of the LAMMPS instance fails, the function
-returns ``NULL``.  For backward compatibility it is also possible to
-provide the address of a pointer variable as argument *ptr*\ . This
-argument may be ``NULL`` and is then ignored.
+If for some reason the creation or initialization of the LAMMPS instance
+fails a null pointer is returned.
+
+.. versionchanged:: 15Sep2020
+
+   This function now has the pointer to the created LAMMPS class
+   instance as return value.  For backward compatibility it is still
+   possible to provide the address of a pointer variable as final
+   argument *ptr*\ .  This use is deprecated and may be removed in
+   the future.  The *ptr* argument may be ``NULL`` and is then ignored.
 
 .. note::
 
    This function is not declared when the code linking to the LAMMPS
    library interface is compiled with ``-DLAMMPS_LIB_NO_MPI``, or
    contains a ``#define LAMMPS_LIB_NO_MPI 1`` statement before
-   ``#include "library.h"``.  In that case, you need to use the
+   ``#include "library.h"``.  In that case, you must use the
    :cpp:func:`lammps_open_no_mpi` function.
 
 \endverbatim
@@ -169,6 +174,17 @@ library was compiled in serial mode, but the calling code runs in
 parallel and the ``MPI_Comm`` data type of the STUBS library would not
 be compatible with that of the calling code.
 
+If for some reason the creation or initialization of the LAMMPS instance
+fails a null pointer is returned.
+
+.. versionchanged:: 15Sep2020
+
+   This function now has the pointer to the created LAMMPS class
+   instance as return value.  For backward compatibility it is still
+   possible to provide the address of a pointer variable as final
+   argument *ptr*\ .  This use is deprecated and may be removed in
+   the future.  The *ptr* argument may be ``NULL`` and is then ignored.
+
 \endverbatim
  *
  * \param  argc  number of command line arguments
@@ -195,20 +211,23 @@ module.  Internally it converts the *f_comm* argument into a C-style MPI
 communicator with ``MPI_Comm_f2c()`` and then calls
 :cpp:func:`lammps_open`.
 
+If for some reason the creation or initialization of the LAMMPS instance
+fails a null pointer is returned.
+
+.. versionadded:: 15Sep2020
+
 \endverbatim
  *
  * \param  argc   number of command line arguments
  * \param  argv   list of command line argument strings
  * \param  f_comm Fortran style MPI communicator for this LAMMPS instance
- * \param  ptr    pointer to a void pointer variable
- *                which serves as a handle; may be ``NULL``
  * \return        pointer to new LAMMPS instance cast to ``void *`` */
 
-void *lammps_open_fortran(int argc, char **argv, int f_comm, void **ptr)
+void *lammps_open_fortran(int argc, char **argv, int f_comm)
 {
   lammps_mpi_init();
   MPI_Comm c_comm = MPI_Comm_f2c((MPI_Fint)f_comm);
-  return lammps_open(argc, argv, c_comm, ptr);
+  return lammps_open(argc, argv, c_comm, nullptr);
 }
 
 /* ---------------------------------------------------------------------- */
@@ -244,6 +263,8 @@ The MPI standard requires that any MPI application must call
 calls.  This function checks, whether MPI is already initialized and
 calls ``MPI_Init()`` in case it is not.
 
+.. versionadded:: 15Sep2020
+
 \endverbatim */
 
 void lammps_mpi_init()
@@ -274,6 +295,8 @@ before exiting the program to wait until all (parallel) tasks are
 completed and then MPI is cleanly shut down.  After this function no
 more MPI calls may be made.
 
+.. versionadded:: 15Sep2020
+
 \endverbatim */
 
 void lammps_mpi_finalize()
diff --git a/src/library.h b/src/library.h
index 63167c1fbd..0ffd111d7b 100644
--- a/src/library.h
+++ b/src/library.h
@@ -77,7 +77,7 @@ extern "C" {
 void *lammps_open(int argc, char **argv, MPI_Comm comm, void **ptr);
 #endif
 void *lammps_open_no_mpi(int argc, char **argv, void **ptr);
-void *lammps_open_fortran(int argc, char **argv, int f_comm, void **ptr);
+void *lammps_open_fortran(int argc, char **argv, int f_comm);
 void  lammps_close(void *handle);
 void  lammps_mpi_init();
 void  lammps_mpi_finalize();
diff --git a/src/mergesort.h b/src/mergesort.h
deleted file mode 100644
index edb745922f..0000000000
--- a/src/mergesort.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-#ifndef LMP_MERGESORT
-#define LMP_MERGESORT
-
-#include <cstring>
-
-// custom hybrid upward merge sort implementation with support to pass
-// an opaque pointer to the comparison function, e.g. for access to
-// class members. this avoids having to use global variables.
-// for improved performance, we employ an in-place insertion sort on
-// chunks of up to 64 elements and switch to merge sort from then on.
-
-// part 1. insertion sort for pre-sorting of small chunks
-
-static void insertion_sort(int *index, int num, void *ptr,
-                           int (*comp)(int, int, void*))
-{
-  if (num < 2) return;
-  for (int i=1; i < num; ++i) {
-    int tmp = index[i];
-    for (int j=i-1; j >= 0; --j) {
-      if ((*comp)(index[j],tmp,ptr) > 0) {
-        index[j+1] = index[j];
-      } else {
-        index[j+1] = tmp;
-        break;
-      }
-      if (j == 0) index[0] = tmp;
-    }
-  }
-}
-
-// part 2. merge two sublists
-
-static void do_merge(int *idx, int *buf, int llo, int lhi, int rlo, int rhi,
-                     void *ptr, int (*comp)(int, int, void *))
-{
-  int i = llo;
-  int l = llo;
-  int r = rlo;
-  while ((l < lhi) && (r < rhi)) {
-    if ((*comp)(buf[l],buf[r],ptr) < 0)
-      idx[i++] = buf[l++];
-    else idx[i++] = buf[r++];
-  }
-
-  while (l < lhi) idx[i++] = buf[l++];
-  while (r < rhi) idx[i++] = buf[r++];
-}
-
-// part 3: loop over sublists doubling in size with each iteration.
-//         pre-sort sublists with insertion sort for better performance.
-
-static void merge_sort(int *index, int num, void *ptr,
-                       int (*comp)(int, int, void *))
-{
-  if (num < 2) return;
-
-  int chunk,i,j;
-
-  // do insertion sort on chunks of up to 64 elements
-
-  chunk = 64;
-  for (i=0; i < num; i += chunk) {
-    j = (i+chunk > num) ? num-i : chunk;
-    insertion_sort(index+i,j,ptr,comp);
-  }
-
-  // already done?
-
-  if (chunk >= num) return;
-
-  // continue with merge sort on the pre-sorted chunks.
-  // we need an extra buffer for temporary storage and two
-  // pointers to operate on, so we can swap the pointers
-  // rather than copying to the hold buffer in each pass
-
-  int *buf = new int[num];
-  int *dest = index;
-  int *hold = buf;
-
-  while (chunk < num) {
-    int m;
-
-    // swap hold and destination buffer
-
-    int *tmp = dest; dest = hold; hold = tmp;
-
-    // merge from hold array to destination array
-
-    for (i=0; i < num-1; i += 2*chunk) {
-      j = i + 2*chunk;
-      if (j > num) j=num;
-      m = i+chunk;
-      if (m > num) m=num;
-      do_merge(dest,hold,i,m,m,j,ptr,comp);
-    }
-
-    // copy all indices not handled by the chunked merge sort loop
-
-    for ( ; i < num ; i++ ) dest[i] = hold[i];
-    chunk *= 2;
-  }
-
-  // if the final sorted data is in buf, copy back to index
-
-  if (dest == buf) memcpy(index,buf,sizeof(int)*num);
-
-  delete[] buf;
-}
-
-#endif
diff --git a/src/my_page.cpp b/src/my_page.cpp
new file mode 100644
index 0000000000..95dd56fecd
--- /dev/null
+++ b/src/my_page.cpp
@@ -0,0 +1,193 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "my_page.h"
+
+#include <cstdlib>
+
+#if defined(LMP_USER_INTEL) && !defined(LAMMPS_MEMALIGN) && !defined(_WIN32)
+#define LAMMPS_MEMALIGN 64
+#endif
+
+using namespace LAMMPS_NS;
+
+/** \class LAMMPS_NS::MyPage
+ * \brief Templated class for storing chunks of datums in pages.
+ *
+ * The size of the chunk may vary from call to call, but must be
+ * less or equal than the *maxchunk* setting.
+ * The chunks are not returnable like with malloc() (i.e. you cannot
+ * call free() on them individually).  One can only reset and start over.
+ * The purpose of this class is to replace many small memory allocations
+ * via malloc() with a few large ones.  Since the pages are never freed
+ * until the class is re-initialized, they can be re-used without having
+ * to re-allocate them by calling the reset() method.
+ *
+ * The settings *maxchunk*, *pagesize*, and *pagedelta* control
+ * the memory allocation strategy.  The *maxchunk* value represents
+ * the expected largest number of items per chunk.  If there is
+ * less space left on the current page, a new page is allocated
+ * for the next chunk.  The *pagesize* value represents how many
+ * items can fit on a single page.  It should have space for multiple
+ * chunks of size *maxchunk*.  The combination of these two
+ * parameters determines how much memory is wasted by either switching
+ * to the next page too soon or allocating too large pages that never
+ * get properly used.  It is an error, if a requested chunk is larger
+ * than *maxchunk*.  The *pagedelta* parameter determines how many
+ * pages are allocated in one go.  In combination with the *pagesize*
+ * setting, this determines how often blocks of memory get allocated
+ * (fewer allocations will result in faster execution).
+ *
+ * \note
+ * This is a template class with explicit instantiation. If the class
+ * is used with a new data type a new explicit instantiation may need to
+ * be added at the end of the file ``src/my_page.cpp`` to avoid symbol
+ * lookup errors. */
+
+/** Create a class instance
+ *
+ *  Need to call init() before use to define allocation settings */
+
+template <class T>
+MyPage<T>::MyPage() : ndatum(0), nchunk(0), pages(nullptr), page(nullptr),
+                      npage(0), ipage(-1), index(-1), maxchunk(-1),
+                      pagesize(-1), pagedelta(1), errorflag(0) {};
+
+template <class T>
+MyPage<T>::~MyPage() {
+  deallocate();
+}
+
+/** (Re-)initialize the set of pages and allocation parameters.
+ *
+ * This also frees all previously allocated storage and allocates
+ * the first page(s).
+ *
+ * \param  user_maxchunk   Expected maximum number of items for one chunk
+ * \param  user_pagesize   Number of items on a single memory page
+ * \param  user_pagedelta  Number of pages to allocate with one malloc
+ * \return                 1 if there were invalid parameters, 2 if there was an allocation error or 0 if successful */
+
+template<class T>
+int MyPage<T>::init(int user_maxchunk, int user_pagesize,
+           int user_pagedelta) {
+    maxchunk = user_maxchunk;
+    pagesize = user_pagesize;
+    pagedelta = user_pagedelta;
+
+    if (maxchunk <= 0 || pagesize <= 0 || pagedelta <= 0) return 1;
+    if (maxchunk > pagesize) return 1;
+
+    // free storage if re-initialized
+
+    deallocate();
+
+    // initial page allocation
+
+    allocate();
+    if (errorflag) return 2;
+    reset();
+    return 0;
+  }
+
+/** Pointer to location that can store N items.
+ *
+ * This will allocate more pages as needed.
+ * If the parameter *N* is larger than the *maxchunk*
+ * setting an error is flagged.
+ *
+ * \param  n  number of items for which storage is requested
+ * \return    memory location or null pointer, if error or allocation failed */
+
+template <class T>
+T *MyPage<T>::get(int n) {
+  if (n > maxchunk) {
+    errorflag = 1;
+    return NULL;
+  }
+  ndatum += n;
+  nchunk++;
+
+  // return pointer from current page
+  if (index+n <= pagesize) {
+    int start = index;
+    index += n;
+    return &page[start];
+  }
+
+  // allocate new page
+  ipage++;
+  if (ipage == npage) {
+    allocate();
+    if (errorflag) return NULL;
+  }
+  page = pages[ipage];
+  index = n;
+  return &page[0];
+}
+
+
+/** Reset state of memory pool without freeing any memory */
+
+template <class T>
+void MyPage<T>::reset() {
+  ndatum = nchunk = 0;
+  index = ipage = 0;
+  page = (pages != nullptr) ? pages[ipage] : nullptr;
+  errorflag = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <class T>
+void MyPage<T>::allocate() {
+  npage += pagedelta;
+  pages = (T **) realloc(pages,npage*sizeof(T *));
+  if (!pages) {
+    errorflag = 2;
+    return;
+  }
+
+  for (int i = npage-pagedelta; i < npage; i++) {
+#if defined(LAMMPS_MEMALIGN)
+    void *ptr;
+    if (posix_memalign(&ptr, LAMMPS_MEMALIGN, pagesize*sizeof(T)))
+      errorflag = 2;
+    pages[i] = (T *) ptr;
+#else
+    pages[i] = (T *) malloc(pagesize*sizeof(T));
+    if (!pages[i]) errorflag = 2;
+#endif
+  }
+}
+
+/** Free all allocated pages of this class instance */
+
+template <class T>
+void MyPage<T>::deallocate() {
+  reset();
+  for (int i = 0; i < npage; i++) free(pages[i]);
+  free(pages);
+  pages = nullptr;
+  npage = 0;
+}
+
+// explicit instantiations
+
+namespace LAMMPS_NS {
+  template class MyPage<int>;
+  template class MyPage<long>;
+  template class MyPage<long long>;
+  template class MyPage<double>;
+  template class MyPage<HyperOneCoeff>;
+}
diff --git a/src/my_page.h b/src/my_page.h
index 01542a9174..4beb397ae2 100644
--- a/src/my_page.h
+++ b/src/my_page.h
@@ -12,144 +12,41 @@
 ------------------------------------------------------------------------- */
 
 /* ----------------------------------------------------------------------
-MyPage = templated class for storing chunks of datums in pages
-  chunks are not returnable, can only reset and start over
-  replaces many small mallocs with a few large mallocs
-  pages are never freed, so can reuse w/out reallocs
-usage:
-  request one datum at a time, repeat, clear
-  request chunks of datums in each get() or vget(), repeat, clear
-  chunk size can vary from request to request
-  chunk size can be known in advance or registered after usage via vgot()
-inputs:
-   template T = one datum, e.g. int, double, struct, int[3]
-     for int[3], access datum as ivec[i][2]
-methods:
-   T *get() = return ptr to one datum
-   T *get(N) = return ptr to N datums, N < maxchunk required
-   T *vget() = return ptr to maxchunk datums, use as needed, then call vgot()
-     all gets return NULL if error encountered
-   vgot(N) = used N datums of previous vget(), N < maxchunk required
-   void init(maxchunk, pagesize, pagedelta)
-     define allocation params and allocate first page(s)
-     call right after constructor
-       can call again to reset allocation params and free previous pages
-     maxchunk = max # of datums in one chunk, default = 1
-     pagesize = # of datums in one page, default = 1024
-       should be big enough to store multiple chunks
-     pagedelta = # of pages to allocate at a time, default = 1
-     return 1 if bad params
-   void reset() = clear pages w/out freeing
-   int size() = return total size of allocated pages in bytes
-   int status() = return error status
-     0 = ok, 1 = chunksize > maxchunk, 2 = allocation error
+   templated class for storing chunks of datums in pages
 ------------------------------------------------------------------------- */
 
 #ifndef LAMMPS_MY_PAGE_H
 #define LAMMPS_MY_PAGE_H
 
-#if defined(LMP_USER_INTEL) && !defined(LAMMPS_MEMALIGN) && !defined(_WIN32)
-#define LAMMPS_MEMALIGN 64
-#endif
+#include "lmptype.h"
 
-#include <cstdlib>
 namespace LAMMPS_NS {
 
+struct HyperOneCoeff {
+  double biascoeff;
+  tagint tag;
+};
+
 template<class T>
 class MyPage {
  public:
   int ndatum;      // total # of stored datums
   int nchunk;      // total # of stored chunks
+  MyPage();
+  virtual ~MyPage();
 
-  MyPage() {
-    ndatum = nchunk = 0;
-    pages = NULL;
-    npage = 0;
-    errorflag = 0;
-  }
+  int init(int user_maxchunk=1, int user_pagesize=1024,
+           int user_pagedelta=1);
 
-  // (re)initialize allocation params
-  // also allocate first page(s)
+  T *get(int n=1);
 
-  int init(int user_maxchunk = 1, int user_pagesize = 1024,
-           int user_pagedelta = 1) {
-    maxchunk = user_maxchunk;
-    pagesize = user_pagesize;
-    pagedelta = user_pagedelta;
-
-    if (maxchunk <= 0 || pagesize <= 0 || pagedelta <= 0) return 1;
-    if (maxchunk > pagesize) return 1;
-
-    // free any previously allocated pages
-
-    for (int i = 0; i < npage; i++) free(pages[i]);
-    free(pages);
-
-    // initial page allocation
-
-    ndatum = nchunk = 0;
-    pages = NULL;
-    npage = 0;
-    allocate();
-    if (errorflag) return 2;
-    ipage = index = 0;
-    page = pages[ipage];
-    return 0;
-  }
-
-  // free all allocated pages
-
-  ~MyPage() {
-    for (int i = 0; i < npage; i++) free(pages[i]);
-    free(pages);
-  }
-
-  // get ptr to one datum
-  // return NULL if run out of memory
-
-  T *get() {
-    ndatum++;
-    nchunk++;
-    if (index < pagesize) return &page[index++];
-    ipage++;
-    if (ipage == npage) {
-      allocate();
-      if (errorflag) return NULL;
-    }
-    page = pages[ipage];
-    index = 0;
-    return &page[index++];
-  }
-
-  // get ptr to location that can store N datums
-  // error if N > maxchunk
-  // return NULL if run out of memory
-
-  T *get(int n) {
-    if (n > maxchunk) {
-      errorflag = 1;
-      return NULL;
-    }
-    ndatum += n;
-    nchunk++;
-    if (index+n <= pagesize) {
-      int start = index;
-      index += n;
-      return &page[start];
-    }
-    ipage++;
-    if (ipage == npage) {
-      allocate();
-      if (errorflag) return NULL;
-    }
-    page = pages[ipage];
-    index = n;
-    return &page[0];
-  }
-
-  // get ptr to location that can store maxchunk datums
-  // will return same ptr as previous call if vgot() not called
-  // return NULL if run out of memory
+  /** Get pointer to location that can store *maxchunk* items.
+   *
+   * This will return the same pointer as the previous call to
+   * this function unless vgot() is called afterwards to record
+   * how many items of the chunk were actually used.
+   *
+   * \return pointer to chunk of memory or null pointer if run out of memory */
 
   T *vget() {
     if (index+maxchunk <= pagesize) return &page[index];
@@ -163,9 +60,14 @@ class MyPage {
     return &page[index];
   }
 
-  // increment by N = # of values stored in loc returned by vget()
-  // OK to not call if vget() ptr was not used
-  // error if N > maxchunk
+  /** Mark *N* items as used of the chunk reserved with a preceding call to vget().
+   *
+   * This will advance the internal pointer inside the current memory page.
+   * It is not necessary to call this function for *N* = 0, that is the reserved
+   * storage was not used.  A following call to vget() will then reserve the
+   * same location again.  It is an error if *N* > *maxchunk*.
+   *
+   * \param  n  Number of items used in previously reserved chunk */
 
   void vgot(int n) {
     if (n > maxchunk) errorflag = 1;
@@ -174,25 +76,21 @@ class MyPage {
     index += n;
   }
 
-  // clear all pages, without freeing any memory
+  void reset();
 
-  void reset() {
-    ndatum = nchunk = 0;
-    index = ipage = 0;
-    page = pages[ipage];
+  /** Return total size of allocated pages
+   *
+   * \return total storage used in bytes */
+
+  double size() const {
+    return (double)npage*pagesize*sizeof(T);
   }
 
-  // return total size of allocated pages
+  /** Return error status
+   *
+   * \return 0 if no error, 1 requested chunk size > maxchunk, 2 if malloc failed */
 
-  int size() const {
-    return npage*pagesize*sizeof(T);
-  }
-
-  // return error status
-
-  int status() const {
-    return errorflag;
-  }
+  int status() const { return errorflag; }
 
  private:
   T **pages;      // list of allocated pages
@@ -208,27 +106,8 @@ class MyPage {
   int errorflag;  // flag > 0 if error has occurred
                   // 1 = chunk size exceeded maxchunk
                   // 2 = memory allocation error
-
-  void allocate() {
-    npage += pagedelta;
-    pages = (T **) realloc(pages,npage*sizeof(T *));
-    if (!pages) {
-      errorflag = 2;
-      return;
-    }
-
-    for (int i = npage-pagedelta; i < npage; i++) {
-#if defined(LAMMPS_MEMALIGN)
-      void *ptr;
-      if (posix_memalign(&ptr, LAMMPS_MEMALIGN, pagesize*sizeof(T)))
-        errorflag = 2;
-      pages[i] = (T *) ptr;
-#else
-      pages[i] = (T *) malloc(pagesize*sizeof(T));
-      if (!pages[i]) errorflag = 2;
-#endif
-    }
-  }
+  void allocate();
+  void deallocate();
 };
 
 }
diff --git a/src/my_pool_chunk.cpp b/src/my_pool_chunk.cpp
new file mode 100644
index 0000000000..a8364f8d89
--- /dev/null
+++ b/src/my_pool_chunk.cpp
@@ -0,0 +1,228 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+   ------------------------------------------------------------------------- */
+
+#include "my_pool_chunk.h"
+
+#include <cstdlib>
+#include <cstdio>
+
+#if defined(LMP_USER_INTEL) && !defined(LAMMPS_MEMALIGN) && !defined(_WIN32)
+#define LAMMPS_MEMALIGN 64
+#endif
+
+using namespace LAMMPS_NS;
+
+/** \class LAMMPS_NS::MyPoolChunk
+ *  \brief Templated class for storing chunks of datums in pages
+ *
+ * The size of the chunk may vary from call to call between the
+ * *minchunk* and *maxchunk* setting.  Chunks may be returned
+ * to the pool for re-use.  Chunks can be reserved in *nbin*
+ * different sizes between *minchunk* and *maxchunk*.
+ * The *chunksperpage* setting specifies how many chunks are stored
+ * on any page and the *pagedelta* setting determines how many
+ * pages are allocated in one go.  Pages are never freed, so they
+ * can be re-used without re-allocation.
+ *
+ * \note
+ * This is a template class with explicit instantiation. If the class
+ * is used with a new data type a new explicit instantiation may need
+ * to be added at the end of the file ``src/my_pool_chunk.cpp`` to
+ * avoid symbol lookup errors. */
+
+/** Create a class instance and set memory pool parameters
+ *
+ * \param  user_minchunk      Minimal chunk size
+ * \param  user_maxchunk      Maximal chunk size
+ * \param  user_nbin          Number of bins of different chunk sizes
+ * \param  user_chunkperpage  Number of chunks per page
+ * \param  user_pagedelta     Number of pages to allocate in one go */
+
+template <class T>
+MyPoolChunk<T>::MyPoolChunk(int user_minchunk, int user_maxchunk, int user_nbin,
+                            int user_chunkperpage, int user_pagedelta) {
+  minchunk = user_minchunk;
+  maxchunk = user_maxchunk;
+  nbin = user_nbin;
+  chunkperpage = user_chunkperpage;
+  pagedelta = user_pagedelta;
+
+  errorflag = 0;
+  if (minchunk <= 0 || minchunk > maxchunk) errorflag = 1;
+  if (user_nbin <= 0 || chunkperpage <= 0 || pagedelta <= 0) errorflag = 1;
+
+  freehead = new int[nbin];
+  chunksize = new int[nbin];
+  if (!freehead || !chunksize) errorflag = 1;
+  if (errorflag) return;
+
+  // insure nbin*binsize spans minchunk to maxchunk inclusive
+
+  binsize = (maxchunk-minchunk+1) / nbin;
+  if (minchunk + nbin*binsize <= maxchunk) binsize++;
+
+  freelist = nullptr;
+  for (int ibin = 0; ibin < nbin; ibin++) {
+    freehead[ibin] = -1;
+    chunksize[ibin] = minchunk + (ibin+1)*binsize - 1;
+    if (chunksize[ibin] > maxchunk) chunksize[ibin] = maxchunk;
+  }
+
+  ndatum = nchunk = 0;
+  pages = nullptr;
+  whichbin = nullptr;
+  npage = 0;
+}
+
+/** Destroy class instance and free all allocated memory */
+template <class T>
+MyPoolChunk<T>::~MyPoolChunk() {
+  delete [] freehead;
+  delete [] chunksize;
+  if (npage) {
+    free(freelist);
+    for (int i = 0; i < npage; i++) free(pages[i]);
+    free(pages);
+    free(whichbin);
+  }
+}
+
+/** Return pointer/index of unused chunk of size maxchunk
+ *
+ * \param  index  Index of chunk in memory pool
+ * \return        Pointer to requested chunk of storage */
+
+template <class T>
+T *MyPoolChunk<T>::get(int &index) {
+  int ibin = nbin-1;
+  if (freehead[ibin] < 0) {
+    allocate(ibin);
+    if (errorflag) {
+      index = -1;
+      return nullptr;
+    }
+  }
+
+  ndatum += maxchunk;
+  nchunk++;
+  index = freehead[ibin];
+  int ipage = index/chunkperpage;
+  int ientry = index % chunkperpage;
+  freehead[ibin] = freelist[index];
+  return &pages[ipage][ientry*chunksize[ibin]];
+}
+
+/** Return pointer/index of unused chunk of size N
+ *
+ * \param  n      Size of chunk
+ * \param  index  Index of chunk in memory pool
+ * \return        Pointer to requested chunk of storage */
+
+template <class T>
+T *MyPoolChunk<T>::get(int n, int &index) {
+  if (n < minchunk || n > maxchunk) {
+    errorflag = 3;
+    index = -1;
+    return nullptr;
+  }
+
+  int ibin = (n-minchunk) / binsize;
+  if (freehead[ibin] < 0) {
+    allocate(ibin);
+    if (errorflag) {
+      index = -1;
+      return nullptr;
+    }
+  }
+
+  ndatum += n;
+  nchunk++;
+  index = freehead[ibin];
+  int ipage = index/chunkperpage;
+  int ientry = index % chunkperpage;
+  freehead[ibin] = freelist[index];
+  return &pages[ipage][ientry*chunksize[ibin]];
+}
+
+/** Put indexed chunk back into memory pool via free list
+ *
+ * \param index  Memory chunk index returned by call to get() */
+
+template <class T>
+void MyPoolChunk<T>::put(int index) {
+    if (index < 0) return;
+    int ipage = index/chunkperpage;
+    int ibin = whichbin[ipage];
+    nchunk--;
+    ndatum -= chunksize[ibin];
+    freelist[index] = freehead[ibin];
+    freehead[ibin] = index;
+  }
+
+
+template <class T>
+void MyPoolChunk<T>::allocate(int ibin) {
+  int oldpage = npage;
+  npage += pagedelta;
+  freelist = (int *) realloc(freelist,npage*chunkperpage*sizeof(int));
+  pages = (T **) realloc(pages,npage*sizeof(T *));
+  whichbin = (int *) realloc(whichbin,npage*sizeof(int));
+  if (!freelist || !pages) {
+    errorflag = 2;
+    return;
+  }
+
+  // allocate pages with appropriate chunksize for ibin
+
+  for (int i = oldpage; i < npage; i++) {
+    whichbin[i] = ibin;
+#if defined(LAMMPS_MEMALIGN)
+    void *ptr;
+    if (posix_memalign(&ptr, LAMMPS_MEMALIGN,
+                       chunkperpage*chunksize[ibin]*sizeof(T)))
+      errorflag = 2;
+    pages[i] = (T *) ptr;
+#else
+    pages[i] = (T *) malloc(chunkperpage*chunksize[ibin]*sizeof(T));
+    if (!pages[i]) errorflag = 2;
+#endif
+  }
+
+  // reset free list for unused chunks on new pages
+
+  freehead[ibin] = oldpage*chunkperpage;
+  for (int i = freehead[ibin]; i < npage*chunkperpage; i++) freelist[i] = i+1;
+  freelist[npage*chunkperpage-1] = -1;
+}
+
+/** Return total size of allocated pages
+ *
+ * \return total storage used in bytes */
+
+template <class T>
+double MyPoolChunk<T>::size() const {
+  double bytes = npage*chunkperpage*sizeof(int);
+  bytes += npage*sizeof(T *);
+  bytes += npage*sizeof(int);
+  for (int i=0; i < npage; ++i)
+    bytes += chunkperpage*chunksize[i]*sizeof(T);
+
+  return bytes;
+}
+
+// explicit instantiations
+
+namespace LAMMPS_NS {
+  template class MyPoolChunk<int>;
+  template class MyPoolChunk<double>;
+}
diff --git a/src/my_pool_chunk.h b/src/my_pool_chunk.h
index da196f5ec9..a9bcbd3e80 100644
--- a/src/my_pool_chunk.h
+++ b/src/my_pool_chunk.h
@@ -9,46 +9,11 @@
    the GNU General Public License.
 
    See the README file in the top-level LAMMPS directory.
-------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-MyPoolChunk = templated class for storing chunks of datums in pages
-  chunks can be returned to pool for reuse
-  chunks come in nbin different fixed sizes so can reuse
-  replaces many small mallocs with a few large mallocs
-  pages are never freed, so can reuse w/out reallocs
-usage:
-  continuously get() and put() chunks as needed
-  NOTE: could add a clear() if retain info on mapping of pages to bins
-inputs:
-   template T = one datum, e.g. int, double, struct
-   minchunk = min # of datums in one chunk, def = 1
-   maxchunk = max # of datums in one chunk, def = 1
-   nbin = # of bins between minchunk and maxchunk
-   chunkperpage = # of chunks in one page, def = 1024
-   pagedelta = # of pages to allocate at a time, def = 1
-methods:
-   T *get(index) = return ptr/index to unused chunk of size maxchunk
-   T *get(N,index) = return ptr/index to unused chunk of size N
-                     minchunk <= N <= maxchunk required
-   put(index) = return indexed chunk to pool (same index returned by get)
-   int size() = return total size of allocated pages in bytes
-public variables:
-   ndatum = total # of stored datums
-   nchunk = total # of stored chunks
-   size = total size of all allocated pages in daums
-   errorflag = flag for various error conditions
-------------------------------------------------------------------------- */
+   ------------------------------------------------------------------------- */
 
 #ifndef LAMMPS_MY_POOL_CHUNK_H
 #define LAMMPS_MY_POOL_CHUNK_H
 
-#if defined(LMP_USER_INTEL) && !defined(LAMMPS_MEMALIGN) && !defined(_WIN32)
-#define LAMMPS_MEMALIGN 64
-#endif
-
-#include <cstdlib>
-
 namespace LAMMPS_NS {
 
 template<class T>
@@ -56,113 +21,36 @@ class MyPoolChunk {
  public:
   int ndatum;      // total # of stored datums
   int nchunk;      // total # of stored chunks
-  int size;        // total size of all allocated pages in datums
-  int errorflag;   // flag > 1 if error has occurred
-                   // 1 = invalid inputs
-                   // 2 = memory allocation error
-                   // 3 = chunk size exceeded maxchunk
 
   MyPoolChunk(int user_minchunk = 1, int user_maxchunk = 1, int user_nbin = 1,
-              int user_chunkperpage = 1024, int user_pagedelta = 1) {
-    minchunk = user_minchunk;
-    maxchunk = user_maxchunk;
-    nbin = user_nbin;
-    chunkperpage = user_chunkperpage;
-    pagedelta = user_pagedelta;
-
-    errorflag = 0;
-    if (minchunk <= 0 || minchunk > maxchunk) errorflag = 1;
-    if (user_nbin <= 0 || chunkperpage <= 0 || pagedelta <= 0) errorflag = 1;
-
-    freehead = new int[nbin];
-    chunksize = new int[nbin];
-    if (!freehead || !chunksize) errorflag = 1;
-    if (errorflag) return;
-
-    // insure nbin*binsize spans minchunk to maxchunk inclusive
-
-    binsize = (maxchunk-minchunk+1) / nbin;
-    if (minchunk + nbin*binsize <= maxchunk) binsize++;
-
-    freelist = NULL;
-    for (int ibin = 0; ibin < nbin; ibin++) {
-      freehead[ibin] = -1;
-      chunksize[ibin] = minchunk + (ibin+1)*binsize - 1;
-      if (chunksize[ibin] > maxchunk) chunksize[ibin] = maxchunk;
-    }
-
-    ndatum = nchunk = size = 0;
-    pages = NULL;
-    whichbin = NULL;
-    npage = 0;
-  }
+              int user_chunkperpage = 1024, int user_pagedelta = 1);
 
   // free all allocated memory
 
-  ~MyPoolChunk() {
-    delete [] freehead;
-    delete [] chunksize;
-    if (npage) {
-      free(freelist);
-      for (int i = 0; i < npage; i++) free(pages[i]);
-      free(pages);
-      free(whichbin);
-    }
-  }
+  ~MyPoolChunk();
 
   // return pointer/index of unused chunk of size maxchunk
 
-  T *get(int &index) {
-    int ibin = nbin-1;
-    if (freehead[ibin] < 0) {
-      allocate(ibin);
-      if (errorflag) return NULL;
-    }
-
-    ndatum += maxchunk;
-    nchunk++;
-    index = freehead[ibin];
-    int ipage = index/chunkperpage;
-    int ientry = index % chunkperpage;
-    freehead[ibin] = freelist[index];
-    return &pages[ipage][ientry*chunksize[ibin]];
-  }
+  T *get(int &index);
 
   // return pointer/index of unused chunk of size N
 
-  T *get(int n, int &index) {
-    if (n < minchunk || n > maxchunk) {
-      errorflag = 3;
-      return NULL;
-    }
-
-    int ibin = (n-minchunk) / binsize;
-    if (freehead[ibin] < 0) {
-      allocate(ibin);
-      if (errorflag) return NULL;
-    }
-
-    ndatum += n;
-    nchunk++;
-    index = freehead[ibin];
-    int ipage = index/chunkperpage;
-    int ientry = index % chunkperpage;
-    freehead[ibin] = freelist[index];
-    return &pages[ipage][ientry*chunksize[ibin]];
-  }
+  T *get(int n, int &index);
 
   // return indexed chunk to pool via free list
   // index = -1 if no allocated chunk
 
-  void put(int index) {
-    if (index < 0) return;
-    int ipage = index/chunkperpage;
-    int ibin = whichbin[ipage];
-    nchunk--;
-    ndatum -= chunksize[ibin];
-    freelist[index] = freehead[ibin];
-    freehead[ibin] = index;
-  }
+  void put(int index);
+
+  // total memory used in bytes
+
+  double size() const;
+
+  /** Return error status
+   *
+   * \return 0 if no error, 1 if invalid input, 2 if malloc() failed, 3 if chunk > maxchunk */
+
+  int status() const { return errorflag; }
 
  private:
   int minchunk;       // min # of datums per chunk
@@ -171,6 +59,10 @@ class MyPoolChunk {
   int chunkperpage;   // # of chunks on every page, regardless of which bin
   int pagedelta;      // # of pages to allocate at once, default = 1
   int binsize;        // delta in chunk sizes between adjacent bins
+  int errorflag;      // flag > 0 if error has occurred
+                      // 1 = invalid inputs
+                      // 2 = memory allocation error
+                      // 3 = chunk size exceeded maxchunk
 
   T **pages;          // list of allocated pages
   int *whichbin;      // which bin each page belongs to
@@ -179,42 +71,7 @@ class MyPoolChunk {
   int *freehead;      // index of first unused chunk in each bin
   int *chunksize;     // size of chunks in each bin
 
-  void allocate(int ibin) {
-    int oldpage = npage;
-    npage += pagedelta;
-    freelist = (int *) realloc(freelist,npage*chunkperpage*sizeof(int));
-    pages = (T **) realloc(pages,npage*sizeof(T *));
-    whichbin = (int *) realloc(whichbin,npage*sizeof(int));
-    if (!freelist || !pages) {
-      errorflag = 2;
-      return;
-    }
-
-    // allocate pages with appropriate chunksize for ibin
-
-    for (int i = oldpage; i < npage; i++) {
-      whichbin[i] = ibin;
-#if defined(LAMMPS_MEMALIGN)
-      void *ptr;
-      if (posix_memalign(&ptr, LAMMPS_MEMALIGN,
-                         chunkperpage*chunksize[ibin]*sizeof(T)))
-        errorflag = 2;
-      pages[i] = (T *) ptr;
-#else
-      pages[i] = (T *) malloc(chunkperpage*chunksize[ibin]*sizeof(T));
-      size += chunkperpage*chunksize[ibin];
-      if (!pages[i]) errorflag = 2;
-#endif
-    }
-
-    // reset free list for unused chunks on new pages
-
-    freehead[ibin] = oldpage*chunkperpage;
-    for (int i = freehead[ibin]; i < npage*chunkperpage; i++) freelist[i] = i+1;
-    freelist[npage*chunkperpage-1] = -1;
-  }
+  void allocate(int ibin);
 };
-
 }
-
 #endif
diff --git a/src/reset_atom_ids.cpp b/src/reset_atom_ids.cpp
index 0b7cf9879f..406df7e608 100644
--- a/src/reset_atom_ids.cpp
+++ b/src/reset_atom_ids.cpp
@@ -34,7 +34,6 @@ using namespace LAMMPS_NS;
 ResetIDs::AtomRvous *ResetIDs::sortrvous;
 static int compare_coords(const void *, const void *);
 #else
-#include "mergesort.h"
 // prototype for non-class function
 static int compare_coords(const int, const int, void *);
 #endif
@@ -509,7 +508,7 @@ int ResetIDs::sort_bins(int n, char *inbuf,
     sortrvous = in;
     qsort(order,count[ibin],sizeof(int),compare_coords);
 #else
-    merge_sort(order,count[ibin],(void *) in,compare_coords);
+    utils::merge_sort(order,count[ibin],(void *) in,compare_coords);
 #endif
 
     head[ibin] = last[ibin] = -1;
diff --git a/src/utils.cpp b/src/utils.cpp
index af800f48b0..f63deac0dc 100644
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -71,6 +71,16 @@ extern "C"
   static int  re_match(const char *text, const char *pattern);
 }
 
+////////////////////////////////////////////////////////////////////////
+// Merge sort support functions
+
+static void do_merge(int *idx, int *buf, int llo, int lhi, int rlo, int rhi,
+                     void *ptr, int (*comp)(int, int, void *));
+static void insertion_sort(int *index, int num, void *ptr,
+                           int (*comp)(int, int, void*));
+
+////////////////////////////////////////////////////////////////////////
+
 using namespace LAMMPS_NS;
 
 /** More flexible and specific matching of a string against a pattern.
@@ -1011,6 +1021,113 @@ int utils::date2num(const std::string &date)
   return num;
 }
 
+/* ----------------------------------------------------------------------
+ * Merge sort part 1: Loop over sublists doubling in size with each iteration.
+ * Pre-sort small sublists with insertion sort for better overall performance.
+------------------------------------------------------------------------- */
+
+void utils::merge_sort(int *index, int num, void *ptr,
+                       int (*comp)(int, int, void *))
+{
+  if (num < 2) return;
+
+  int chunk,i,j;
+
+  // do insertion sort on chunks of up to 64 elements
+
+  chunk = 64;
+  for (i=0; i < num; i += chunk) {
+    j = (i+chunk > num) ? num-i : chunk;
+    insertion_sort(index+i,j,ptr,comp);
+  }
+
+  // already done?
+
+  if (chunk >= num) return;
+
+  // continue with merge sort on the pre-sorted chunks.
+  // we need an extra buffer for temporary storage and two
+  // pointers to operate on, so we can swap the pointers
+  // rather than copying to the hold buffer in each pass
+
+  int *buf = new int[num];
+  int *dest = index;
+  int *hold = buf;
+
+  while (chunk < num) {
+    int m;
+
+    // swap hold and destination buffer
+
+    int *tmp = dest; dest = hold; hold = tmp;
+
+    // merge from hold array to destination array
+
+    for (i=0; i < num-1; i += 2*chunk) {
+      j = i + 2*chunk;
+      if (j > num) j=num;
+      m = i+chunk;
+      if (m > num) m=num;
+      do_merge(dest,hold,i,m,m,j,ptr,comp);
+    }
+
+    // copy all indices not handled by the chunked merge sort loop
+
+    for ( ; i < num ; i++ ) dest[i] = hold[i];
+    chunk *= 2;
+  }
+
+  // if the final sorted data is in buf, copy back to index
+
+  if (dest == buf) memcpy(index,buf,sizeof(int)*num);
+
+  delete[] buf;
+}
+
+/* ------------------------------------------------------------------ */
+
+/* ----------------------------------------------------------------------
+ * Merge sort part 2: Insertion sort for pre-sorting of small chunks
+------------------------------------------------------------------------- */
+
+void insertion_sort(int *index, int num, void *ptr,
+                           int (*comp)(int, int, void*))
+{
+  if (num < 2) return;
+  for (int i=1; i < num; ++i) {
+    int tmp = index[i];
+    for (int j=i-1; j >= 0; --j) {
+      if ((*comp)(index[j],tmp,ptr) > 0) {
+        index[j+1] = index[j];
+      } else {
+        index[j+1] = tmp;
+        break;
+      }
+      if (j == 0) index[0] = tmp;
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+ * Merge sort part 3: Merge two sublists
+------------------------------------------------------------------------- */
+
+static void do_merge(int *idx, int *buf, int llo, int lhi, int rlo, int rhi,
+                     void *ptr, int (*comp)(int, int, void *))
+{
+  int i = llo;
+  int l = llo;
+  int r = rlo;
+  while ((l < lhi) && (r < rhi)) {
+    if ((*comp)(buf[l],buf[r],ptr) < 0)
+      idx[i++] = buf[l++];
+    else idx[i++] = buf[r++];
+  }
+
+  while (l < lhi) idx[i++] = buf[l++];
+  while (r < rhi) idx[i++] = buf[r++];
+}
+
 /* ------------------------------------------------------------------ */
 
 extern "C" {
diff --git a/src/utils.h b/src/utils.h
index 026f46955f..06aef4654f 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -33,23 +33,23 @@ namespace LAMMPS_NS {
      *
      *  \param text the text to be matched against the pattern
      *  \param pattern the search pattern, which may contain regexp markers
-     *  \return true if the pattern matches, false if not
-     */
+     *  \return true if the pattern matches, false if not */
+
     bool strmatch(const std::string &text, const std::string &pattern);
 
     /** Send message to screen and logfile, if available
      *
      *  \param lmp   pointer to LAMMPS class instance
-     *  \param mesg  message to be printed
-     */
+     *  \param mesg  message to be printed */
+
     void logmesg(LAMMPS *lmp, const std::string &mesg);
 
     /** return a string representing the current system error status
      *
      *  This is a wrapper around calling strerror(errno).
      *
-     *  \return  error string
-     */
+     *  \return  error string */
+
     std::string getsyserror();
 
     /** safe wrapper around fgets() which aborts on errors
@@ -61,8 +61,8 @@ namespace LAMMPS_NS {
      *  \param size     size of buffer s (max number of bytes read by fgets())
      *  \param fp       file pointer used by fgets()
      *  \param filename file name associated with fp (may be NULL; then LAMMPS will try to detect)
-     *  \param error    pointer to Error class instance (for abort)
-     */
+     *  \param error    pointer to Error class instance (for abort) */
+
     void sfgets(const char *srcname, int srcline, char *s, int size,
                 FILE *fp, const char *filename, Error *error);
 
@@ -76,8 +76,8 @@ namespace LAMMPS_NS {
      *  \param num      number of data elements read by fread()
      *  \param fp       file pointer used by fread()
      *  \param filename file name associated with fp (may be NULL; then LAMMPS will try to detect)
-     *  \param error    pointer to Error class instance (for abort)
-     */
+     *  \param error    pointer to Error class instance (for abort) */
+
     void sfread(const char *srcname, int srcline, void *s, size_t size,
                 size_t num, FILE *fp, const char *filename, Error *error);
 
@@ -86,8 +86,8 @@ namespace LAMMPS_NS {
      *  \param style type of style that is to be checked for
      *  \param name  name of style that was not found
      *  \param lmp   pointer to top-level LAMMPS class instance
-     *  \return string usable for error messages
-     */
+     *  \return string usable for error messages */
+
     std::string check_packages_for_style(const std::string &style,
                                          const std::string &name, LAMMPS *lmp);
 
@@ -112,8 +112,8 @@ namespace LAMMPS_NS {
      *  \param str      string to be converted to number
      *  \param do_abort determines whether to call Error::one() or Error::all()
      *  \param lmp      pointer to top-level LAMMPS class instance
-     *  \return         integer number (regular int)
-     */
+     *  \return         integer number (regular int)  */
+
     int inumeric(const char *file, int line, const char *str,
                  bool do_abort, LAMMPS *lmp);
 
@@ -125,8 +125,8 @@ namespace LAMMPS_NS {
      *  \param str      string to be converted to number
      *  \param do_abort determines whether to call Error::one() or Error::all()
      *  \param lmp      pointer to top-level LAMMPS class instance
-     *  \return         integer number (bigint)
-     */
+     *  \return         integer number (bigint) */
+
     bigint bnumeric(const char *file, int line, const char *str,
                     bool do_abort, LAMMPS *lmp);
 
@@ -162,6 +162,7 @@ namespace LAMMPS_NS {
      * \param nlo      lower bound
      * \param nhi      upper bound
      * \param error    pointer to Error class for out-of-bounds messages */
+
     template <typename TYPE>
     void bounds(const char *file, int line, const std::string &str,
                 bigint nmin, bigint nmax, TYPE &nlo, TYPE &nhi, Error *error);
@@ -197,45 +198,45 @@ namespace LAMMPS_NS {
     /** Trim leading and trailing whitespace. Like TRIM() in Fortran.
      *
      * \param line string that should be trimmed
-     * \return new string without whitespace (string)
-     */
+     * \return new string without whitespace (string) */
+
     std::string trim(const std::string &line);
 
     /** Return string with anything from '#' onward removed
      *
      * \param line string that should be trimmed
-     * \return new string without comment (string)
-     */
+     * \return new string without comment (string) */
+
     std::string trim_comment(const std::string &line);
 
     /** Count words in string with custom choice of separating characters
      *
      * \param text string that should be searched
      * \param separators string containing characters that will be treated as whitespace
-     * \return number of words found
-     */
+     * \return number of words found */
+
     size_t count_words(const std::string &text, const std::string &separators);
 
     /** Count words in string, ignore any whitespace matching " \t\r\n\f"
      *
      * \param text string that should be searched
-     * \return number of words found
-     */
+     * \return number of words found */
+
     size_t count_words(const std::string &text);
 
     /** Count words in C-string, ignore any whitespace matching " \t\r\n\f"
      *
      * \param text string that should be searched
-     * \return number of words found
-     */
+     * \return number of words found */
+
     size_t count_words(const char *text);
 
     /** Count words in a single line, trim anything from '#' onward
      *
      * \param text string that should be trimmed and searched
      * \param separators string containing characters that will be treated as whitespace
-     * \return number of words found
-     */
+     * \return number of words found */
+
     size_t trim_and_count_words(const std::string &text, const std::string &separators = " \t\r\n\f");
 
     /** Take text and split into non-whitespace words.
@@ -247,22 +248,22 @@ namespace LAMMPS_NS {
      * Use a tokenizer class for that.
      *
      * \param text string that should be split
-     * \return STL vector with the words
-     */
+     * \return STL vector with the words */
+
     std::vector<std::string> split_words(const std::string &text);
 
     /** Check if string can be converted to valid integer
      *
      * \param str string that should be checked
-     * \return true, if string contains valid integer, false otherwise
-     */
+     * \return true, if string contains valid a integer, false otherwise */
+
     bool is_integer(const std::string &str);
 
     /** Check if string can be converted to valid floating-point number
      *
      * \param str string that should be checked
-     * \return true, if string contains valid floating-point number, false otherwise
-     */
+     * \return true, if string contains valid number, false otherwise */
+
     bool is_double(const std::string &str);
 
     /** Try to detect pathname from FILE pointer.
@@ -272,55 +273,60 @@ namespace LAMMPS_NS {
      *  \param buf  storage buffer for pathname. output will be truncated if not large enough
      *  \param len  size of storage buffer. output will be truncated to this length - 1
      *  \param fp   FILE pointer struct from STDIO library for which we want to detect the name
-     *  \return pointer to the storage buffer, i.e. buf
-     */
+     *  \return pointer to the storage buffer, i.e. buf */
+
     const char *guesspath(char *buf, int len, FILE *fp);
 
     /** Strip off leading part of path, return just the filename
      *
      * \param path file path
-     * \return file name
-     */
+     * \return file name */
+
     std::string path_basename(const std::string &path);
 
-    /**
-     * \brief Join two paths
-     * \param a first path
-     * \param b second path
-     * \return combined path
-     */
+    /** Join two pathname segments
+     *
+     * This uses the forward slash '/' character unless LAMMPS is compiled
+     * for Windows where it used the equivalent backward slash '\\'.
+     *
+     * \param   a  first path
+     * \param   b  second path
+     * \return     combined path */
+
     std::string path_join(const std::string &a, const std::string &b);
 
-    /**
-     * \brief Check if file exists and is readable
+    /** Check if file exists and is readable
+     *
      * \param path file path
-     * \return true if file exists and is readable
-     */
+     * \return true if file exists and is readable */
+
     bool file_is_readable(const std::string &path);
 
     /** Determine full path of potential file. If file is not found in current directory,
      *  search directories listed in LAMMPS_POTENTIALS environment variable
      *
      * \param path file path
-     * \return full path to potential file
-     */
+     * \return full path to potential file */
+
     std::string get_potential_file_path(const std::string &path);
 
     /** Read potential file and return DATE field if it is present
      *
      * \param path file path
      * \param potential_name name of potential that is being read
-     * \return DATE field if present
-     */
-    std::string get_potential_date(const std::string &path, const std::string &potential_name);
+     * \return DATE field if present */
+
+    std::string get_potential_date(const std::string &path,
+                                   const std::string &potential_name);
 
     /** Read potential file and return UNITS field if it is present
      *
      * \param path file path
      * \param potential_name name of potential that is being read
-     * \return UNITS field if present
-     */
-    std::string get_potential_units(const std::string &path, const std::string &potential_name);
+     * \return UNITS field if present */
+
+    std::string get_potential_units(const std::string &path,
+                                    const std::string &potential_name);
 
     enum { NOCONVERT = 0, METAL2REAL = 1, REAL2METAL = 1<<1 };
     enum { UNKNOWN = 0, ENERGY };
@@ -328,16 +334,15 @@ namespace LAMMPS_NS {
     /** Return bitmask of available conversion factors for a given property
      *
      * \param property property to be converted
-     * \return bitmask indicating available conversions
-     */
+     * \return bitmask indicating available conversions */
     int get_supported_conversions(const int property);
 
     /** Return unit conversion factor for given property and selected from/to units
      *
      * \param property property to be converted
      * \param conversion constant indicating the conversion
-     * \return conversion factor
-     */
+     * \return conversion factor */
+
     double get_conversion_factor(const int property, const int conversion);
 
     /** Open a potential file as specified by *name*
@@ -368,8 +373,8 @@ namespace LAMMPS_NS {
      * The strings "off" and "unlimited" result in -1
      *
      * \param timespec a string in the following format: ([[HH:]MM:]SS)
-     * \return total in seconds
-     */
+     * \return total in seconds */
+
     double timespec2seconds(const std::string &timespec);
 
     /** Convert a LAMMPS version date to a number
@@ -386,9 +391,26 @@ namespace LAMMPS_NS {
      * No check is made whether the date is valid.
      *
      * \param  date  string in the format (Day Month Year)
-     * \return       date code
-     */
+     * \return       date code */
     int date2num(const std::string &date);
+
+    /** Custom merge sort implementation
+     *
+     * This function provides a custom upward hybrid merge sort
+     * implementation with support to pass an opaque pointer to
+     * the comparison function, e.g. for access to class members.
+     * This avoids having to use global variables.  For improved
+     * performance, it uses an in-place insertion sort on initial
+     * chunks of up to 64 elements and switches to merge sort from
+     * then on.
+     *
+     * \param  index  Array with indices to be sorted
+     * \param  num    Length of the index array
+     * \param  ptr    Pointer to opaque object passed to comparison function
+     * \param  comp   Pointer to comparison function */
+
+    void merge_sort(int *index, int num, void *ptr,
+                    int (*comp)(int, int, void *));
   }
 }
 
diff --git a/tools/singularity/centos7.def b/tools/singularity/centos7.def
index 7cd38af13e..24be2347ba 100644
--- a/tools/singularity/centos7.def
+++ b/tools/singularity/centos7.def
@@ -11,7 +11,7 @@ From: centos:7
             hdf5-devel python36-virtualenv python36-pip python-pip \
             netcdf-devel netcdf-cxx-devel netcdf-mpich-devel netcdf-openmpi-devel \
             python-virtualenv fftw-devel voro++-devel eigen3-devel gsl-devel openblas-devel enchant \
-            blas-devel lapack-devel libyaml-devel openkim-models kim-api-devel
+            blas-devel lapack-devel libyaml-devel openkim-models kim-api-devel zstd libzstd-devel
         yum clean all
 
         # we need to reset any module variables
@@ -36,7 +36,7 @@ From: centos:7
         # manually install Plumed
         mkdir plumed
         cd plumed
-        version=2.6.0
+        version=2.6.1
         curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${version}/plumed-src-${version}.tgz
         tar -xzf plumed.tar.gz
         cd plumed-${version}
diff --git a/tools/singularity/centos8.def b/tools/singularity/centos8.def
index 930ad5a909..eeead43cc8 100644
--- a/tools/singularity/centos8.def
+++ b/tools/singularity/centos8.def
@@ -16,7 +16,8 @@ From: centos:8
                texlive-latex-bin texlive-lualatex-math texlive-fncychap texlive-tabulary \
                texlive-framed texlive-wrapfig texlive-upquote texlive-capt-of \
                texlive-needspace texlive-titlesec texlive-anysize texlive-dvipng \
-               blas-devel lapack-devel libyaml-devel openkim-models kim-api-devel
+               blas-devel lapack-devel libyaml-devel openkim-models kim-api-devel \
+               zstd libzstd-devel
         dnf clean all
 
         # we need to reset any module variables
@@ -41,7 +42,7 @@ From: centos:8
         # manually install Plumed
         mkdir plumed
         cd plumed
-        version=2.6.0
+        version=2.6.1
         curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${version}/plumed-src-${version}.tgz
         tar -xzf plumed.tar.gz
         cd plumed-${version}
diff --git a/tools/singularity/fedora32_mingw.def b/tools/singularity/fedora32_mingw.def
index 87b4668c3b..fa0227c4e3 100644
--- a/tools/singularity/fedora32_mingw.def
+++ b/tools/singularity/fedora32_mingw.def
@@ -36,7 +36,8 @@ From: fedora:32
                texlive-latex-bin texlive-lualatex-math texlive-fncychap texlive-tabulary \
                texlive-framed texlive-wrapfig texlive-upquote texlive-capt-of \
                texlive-needspace texlive-titlesec texlive-anysize texlive-dvipng \
-               blas-devel lapack-devel libyaml-devel openkim-models kim-api-devel
+               blas-devel lapack-devel libyaml-devel openkim-models kim-api-devel \
+               zstd libzstd-devel
         dnf clean all
 
         # enable Lmod and load MPI
@@ -47,7 +48,7 @@ From: fedora:32
         # manually install Plumed
         mkdir plumed
         cd plumed
-        version=2.6.0
+        version=2.6.1
         curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${version}/plumed-src-${version}.tgz
         tar -xzf plumed.tar.gz
         cd plumed-${version}
diff --git a/tools/singularity/ubuntu18.04.def b/tools/singularity/ubuntu18.04.def
index fbff214251..921000c326 100644
--- a/tools/singularity/ubuntu18.04.def
+++ b/tools/singularity/ubuntu18.04.def
@@ -41,6 +41,7 @@ From: ubuntu:18.04
         libproj-dev \
         libvtk6-dev \
         libyaml-dev \
+        libzstd-dev \
         make \
         mpi-default-bin \
         mpi-default-dev \
@@ -70,26 +71,54 @@ From: ubuntu:18.04
         xxd \
         valgrind \
         gdb \
+        zstd \
         libkim-api-dev \
         openkim-models
 
-    # clean cache
-    rm -rf /var/lib/apt/lists/*
 
-        # manually install Plumed
-        mkdir plumed
-        cd plumed
-        version=2.6.0
-        curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${version}/plumed-src-${version}.tgz
-        tar -xzf plumed.tar.gz
-        cd plumed-${version}
-        ./configure --disable-doc --prefix=/usr
-        make
-        make install
-        # fix up installation for CentOS and Fedora
-        # mv -v /usr/lib/pkgconfig/plumed* /usr/share/pkgconfig/
-        cd ../../
-        rm -rvf plumed
+    ###########################################################################
+    # KIM-API
+    ###########################################################################
+
+    # workaround for installing files in /usr/share/doc inside of a container
+    sed -i 's/path-exclude=\/usr\/share\/doc/#path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+    apt-get install -y libkim-api-doc
+    sed -i 's/#path-exclude=\/usr\/share\/doc/path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+
+    # install KIM models
+    KIM_API_EXAMPLES=/usr/share/doc/libkim-api-dev/examples
+    gunzip $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003/LennardJones612_UniversalShifted.params.gz
+    gunzip $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ/ex_model_driver_P_LJ.f90.gz
+
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/LennardJones612__MD_414112407348_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones_Ar
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/ex_model_Ar_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/simulator-models/Sim_LAMMPS_LJcut_AkersonElliott_Alchemy_PbAu
+
+
+    ###########################################################################
+    # Plumed
+    ###########################################################################
+
+    export PLUMED_PKG_VERSION=2.6.1
+
+    mkdir plumed
+    cd plumed
+    curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${PLUMED_PKG_VERSION}/plumed-src-${PLUMED_PKG_VERSION}.tgz
+    tar -xzf plumed.tar.gz
+    cd plumed-${PLUMED_PKG_VERSION}
+    ./configure --disable-doc --prefix=/usr
+    make
+    make install
+    cd ../../
+    rm -rvf plumed
+
+
+    ###########################################################################
+    # Customizations
+    ###########################################################################
 
     # set custom prompt indicating the container name
     CUSTOM_PROMPT_ENV=/.singularity.d/env/99-zz_custom_prompt.sh
@@ -99,6 +128,13 @@ PS1="[ubuntu18.04:\u@\h] \W> "
 EOF
     chmod 755 $CUSTOM_PROMPT_ENV
 
+
+    ###########################################################################
+    # Cleanup
+    ###########################################################################
+    # clean cache
+    rm -rf /var/lib/apt/lists/*
+
 %environment
     LC_ALL=C
     export LC_ALL
diff --git a/tools/singularity/ubuntu18.04_amd_rocm.def b/tools/singularity/ubuntu18.04_amd_rocm.def
index 4c58f690aa..e3bed218f5 100644
--- a/tools/singularity/ubuntu18.04_amd_rocm.def
+++ b/tools/singularity/ubuntu18.04_amd_rocm.def
@@ -2,7 +2,7 @@ BootStrap: docker
 From: ubuntu:18.04
 
 %environment
-    export PATH=$PATH:/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64
+    export PATH=/usr/lib/ccache:${PATH}:/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64
 
 %post
     export DEBIAN_FRONTEND=noninteractive
@@ -25,7 +25,8 @@ From: ubuntu:18.04
         build-essential
 
     apt-get install --no-install-recommends -y software-properties-common
-
+    add-apt-repository ppa:openkim/latest
+    apt-get update
     apt-get install --no-install-recommends -y \
         bc \
         build-essential \
@@ -41,7 +42,6 @@ From: ubuntu:18.04
         gfortran \
         git \
         hdf5-tools \
-        kmod \
         less \
         libblas-dev \
         libeigen3-dev \
@@ -52,12 +52,15 @@ From: ubuntu:18.04
         libhwloc-dev \
         libjpeg-dev \
         liblapack-dev \
+        libnetcdf-dev \
         libomp-dev \
         libopenblas-dev \
         libnuma-dev \
         libpng-dev \
         libproj-dev \
         libvtk6-dev \
+        libyaml-dev \
+        libzstd-dev \
         make \
         mpi-default-bin \
         mpi-default-dev \
@@ -79,30 +82,85 @@ From: ubuntu:18.04
         wget \
         xxd \
         valgrind \
-        gdb
+        gdb \
+        zstd \
+        libkim-api-dev \
+        openkim-models
 
 
+    ###########################################################################
+    # ROCm hipCUB
+    ###########################################################################
+
     export PATH=$PATH:/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64
-    git clone -b master-rocm-3.5 https://github.com/ROCmSoftwarePlatform/hipCUB.git
+    git clone -b rocm-3.7.x https://github.com/ROCmSoftwarePlatform/hipCUB.git
     mkdir hipCUB/build
     cd hipCUB/build
     CXX=hipcc cmake -D BUILD_TEST=off ..
-    make -j4
+    make
     make package
     make install
 
-    # clean cache
-    rm -rf /var/lib/apt/lists/*
+
+    ###########################################################################
+    # KIM-API
+    ###########################################################################
+
+    # workaround for installing files in /usr/share/doc inside of a container
+    sed -i 's/path-exclude=\/usr\/share\/doc/#path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+    apt-get install -y libkim-api-doc
+    sed -i 's/#path-exclude=\/usr\/share\/doc/path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+
+    # install KIM models
+    KIM_API_EXAMPLES=/usr/share/doc/libkim-api-dev/examples
+    gunzip $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003/LennardJones612_UniversalShifted.params.gz
+    gunzip $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ/ex_model_driver_P_LJ.f90.gz
+
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/LennardJones612__MD_414112407348_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones_Ar
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/ex_model_Ar_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/simulator-models/Sim_LAMMPS_LJcut_AkersonElliott_Alchemy_PbAu
+
+
+    ###########################################################################
+    # Plumed
+    ###########################################################################
+
+    export PLUMED_PKG_VERSION=2.6.1
+
+    mkdir plumed
+    cd plumed
+    curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${PLUMED_PKG_VERSION}/plumed-src-${PLUMED_PKG_VERSION}.tgz
+    tar -xzf plumed.tar.gz
+    cd plumed-${PLUMED_PKG_VERSION}
+    ./configure --disable-doc --prefix=/usr
+    make
+    make install
+    cd ../../
+    rm -rvf plumed
+
+
+    ###########################################################################
+    # Customizations
+    ###########################################################################
 
     # set custom prompt indicating the container name
     CUSTOM_PROMPT_ENV=/.singularity.d/env/99-zz_custom_prompt.sh
     cat >$CUSTOM_PROMPT_ENV <<EOF
 #!/bin/bash
-PS1="[ubuntu18/rocm:\u@\h] \W> "
+PS1="[ubuntu18.04/rocm:\u@\h] \W> "
 EOF
     chmod 755 $CUSTOM_PROMPT_ENV
 
 
+    ###########################################################################
+    # Cleanup
+    ###########################################################################
+    # clean cache
+    rm -rf /var/lib/apt/lists/*
+
 %environment
     LC_ALL=C
     export LC_ALL
diff --git a/tools/singularity/ubuntu18.04_gpu.def b/tools/singularity/ubuntu18.04_gpu.def
index aea75bdb5a..379f0427df 100644
--- a/tools/singularity/ubuntu18.04_gpu.def
+++ b/tools/singularity/ubuntu18.04_gpu.def
@@ -2,11 +2,11 @@ BootStrap: docker
 From: ubuntu:18.04
 
 %environment
-    export PATH=/usr/lib/ccache:/usr/local/cuda-10.2/bin:${PATH}:/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64
-    export CUDADIR=/usr/local/cuda-10.2
-    export CUDA_PATH=/usr/local/cuda-10.2
-    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-10.2/lib64
-    export LIBRARY_PATH=/usr/local/cuda-10.2/lib64/stubs
+    export PATH=/usr/lib/ccache:/usr/local/cuda-11.0/bin:${PATH}:/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64
+    export CUDADIR=/usr/local/cuda-11.0
+    export CUDA_PATH=/usr/local/cuda-11.0
+    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.0/lib64
+    export LIBRARY_PATH=/usr/local/cuda-11.0/lib64/stubs
 %post
     export DEBIAN_FRONTEND=noninteractive
     apt-get update
@@ -28,6 +28,8 @@ From: ubuntu:18.04
         build-essential
 
     apt-get install --no-install-recommends -y software-properties-common
+    add-apt-repository ppa:openkim/latest
+    apt-get update
     apt-get install --no-install-recommends -y \
         bc \
         build-essential \
@@ -43,7 +45,6 @@ From: ubuntu:18.04
         gfortran \
         git \
         hdf5-tools \
-        kmod \
         less \
         libblas-dev \
         libeigen3-dev \
@@ -54,12 +55,15 @@ From: ubuntu:18.04
         libhwloc-dev \
         libjpeg-dev \
         liblapack-dev \
+        libnetcdf-dev \
         libomp-dev \
         libopenblas-dev \
         libnuma-dev \
         libpng-dev \
         libproj-dev \
         libvtk6-dev \
+        libyaml-dev \
+        libzstd-dev \
         make \
         mpi-default-bin \
         mpi-default-dev \
@@ -81,7 +85,15 @@ From: ubuntu:18.04
         wget \
         xxd \
         valgrind \
-        gdb
+        gdb \
+        zstd \
+        libkim-api-dev \
+        openkim-models
+
+
+    ###########################################################################
+    # CUDA
+    ###########################################################################
 
     wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin
     mv cuda-ubuntu1804.pin /etc/apt/preferences.d/cuda-repository-pin-600
@@ -89,7 +101,7 @@ From: ubuntu:18.04
     add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/ /"
     apt-get update
 
-    export CUDA_PKG_VERSION=10.2
+    export CUDA_PKG_VERSION=11.0
 
     apt-get install -y --no-install-recommends \
         cuda-libraries-$CUDA_PKG_VERSION \
@@ -97,37 +109,90 @@ From: ubuntu:18.04
         cuda-libraries-dev-$CUDA_PKG_VERSION \
         cuda-minimal-build-$CUDA_PKG_VERSION \
         cuda-compat-$CUDA_PKG_VERSION \
-        libcublas10 \
-        libcublas-dev
-
-    export PATH=$PATH:/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64
-    git clone -b master-rocm-3.5 https://github.com/ROCmSoftwarePlatform/hipCUB.git
-    mkdir hipCUB/build
-    cd hipCUB/build
-    CXX=hipcc cmake -D BUILD_TEST=off ..
-    make -j4
-    make package
-    make install
+        libcublas-11-0 \
+        libcublas-dev-11-0
 
     echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf
     echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf
 
-    # clean cache
-    rm -rf /var/lib/apt/lists/*
-
     # add missing symlink
-    ln -s /usr/local/cuda-10.2 /usr/local/cuda
-    ln -s /usr/local/cuda-10.2/lib64/stubs/libcuda.so /usr/local/cuda-10.2/lib64/stubs/libcuda.so.1
+    ln -s /usr/local/cuda-11.0 /usr/local/cuda
+    ln -s /usr/local/cuda-11.0/lib64/stubs/libcuda.so /usr/local/cuda-11.0/lib64/stubs/libcuda.so.1
+
+
+    ###########################################################################
+    # ROCm hipCUB
+    ###########################################################################
+
+    export PATH=$PATH:/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64
+    git clone -b rocm-3.7.x https://github.com/ROCmSoftwarePlatform/hipCUB.git
+    mkdir hipCUB/build
+    cd hipCUB/build
+    CXX=hipcc cmake -D BUILD_TEST=off ..
+    make
+    make package
+    make install
+
+
+    ###########################################################################
+    # KIM-API
+    ###########################################################################
+
+    # workaround for installing files in /usr/share/doc inside of a container
+    sed -i 's/path-exclude=\/usr\/share\/doc/#path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+    apt-get install -y libkim-api-doc
+    sed -i 's/#path-exclude=\/usr\/share\/doc/path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+
+    # install KIM models
+    KIM_API_EXAMPLES=/usr/share/doc/libkim-api-dev/examples
+    gunzip $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003/LennardJones612_UniversalShifted.params.gz
+    gunzip $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ/ex_model_driver_P_LJ.f90.gz
+
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/LennardJones612__MD_414112407348_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones_Ar
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/ex_model_Ar_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/simulator-models/Sim_LAMMPS_LJcut_AkersonElliott_Alchemy_PbAu
+
+
+    ###########################################################################
+    # Plumed
+    ###########################################################################
+
+    export PLUMED_PKG_VERSION=2.6.1
+
+    mkdir plumed
+    cd plumed
+    curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${PLUMED_PKG_VERSION}/plumed-src-${PLUMED_PKG_VERSION}.tgz
+    tar -xzf plumed.tar.gz
+    cd plumed-${PLUMED_PKG_VERSION}
+    ./configure --disable-doc --prefix=/usr
+    make
+    make install
+    cd ../../
+    rm -rvf plumed
+
+
+    ###########################################################################
+    # Customizations
+    ###########################################################################
 
     # set custom prompt indicating the container name
     CUSTOM_PROMPT_ENV=/.singularity.d/env/99-zz_custom_prompt.sh
     cat >$CUSTOM_PROMPT_ENV <<EOF
 #!/bin/bash
-PS1="[ubuntu18/gpu:\u@\h] \W> "
+PS1="[ubuntu18.04/gpu:\u@\h] \W> "
 EOF
     chmod 755 $CUSTOM_PROMPT_ENV
 
 
+    ###########################################################################
+    # Cleanup
+    ###########################################################################
+    # clean cache
+    rm -rf /var/lib/apt/lists/*
+
 %environment
     LC_ALL=C
     export LC_ALL
diff --git a/tools/singularity/ubuntu18.04_intel_opencl.def b/tools/singularity/ubuntu18.04_intel_opencl.def
index b60da61b91..6f4f970f49 100644
--- a/tools/singularity/ubuntu18.04_intel_opencl.def
+++ b/tools/singularity/ubuntu18.04_intel_opencl.def
@@ -4,8 +4,10 @@ From: ubuntu:18.04
 %post
     export DEBIAN_FRONTEND=noninteractive
     apt-get update
-    apt-get upgrade --no-install-recommends -y
     apt-get install --no-install-recommends -y software-properties-common
+    add-apt-repository ppa:openkim/latest
+    apt-get update
+    apt-get upgrade --no-install-recommends -y
     apt-get install --no-install-recommends -y \
         bc \
         build-essential \
@@ -38,6 +40,8 @@ From: ubuntu:18.04
         libpng-dev \
         libproj-dev \
         libvtk6-dev \
+        libyaml-dev \
+        libzstd-dev \
         make \
         mpi-default-bin \
         mpi-default-dev \
@@ -61,24 +65,79 @@ From: ubuntu:18.04
         wget \
         xxd \
         valgrind \
-        gdb
+        gdb \
+        zstd \
+        libkim-api-dev \
+        openkim-models
+
+    ###########################################################################
+    # Intel OpenCL
+    ###########################################################################
 
     add-apt-repository ppa:intel-opencl/intel-opencl
     apt-get update
     apt-get install -y intel-opencl-icd
 
-    # clean cache
-    rm -rf /var/lib/apt/lists/*
+    ###########################################################################
+    # KIM-API
+    ###########################################################################
+
+    # workaround for installing files in /usr/share/doc inside of a container
+    sed -i 's/path-exclude=\/usr\/share\/doc/#path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+    apt-get install -y libkim-api-doc
+    sed -i 's/#path-exclude=\/usr\/share\/doc/path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+
+    # install KIM models
+    KIM_API_EXAMPLES=/usr/share/doc/libkim-api-dev/examples
+    gunzip $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003/LennardJones612_UniversalShifted.params.gz
+    gunzip $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ/ex_model_driver_P_LJ.f90.gz
+
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/LennardJones612__MD_414112407348_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones_Ar
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/ex_model_Ar_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/simulator-models/Sim_LAMMPS_LJcut_AkersonElliott_Alchemy_PbAu
+
+
+    ###########################################################################
+    # Plumed
+    ###########################################################################
+
+    export PLUMED_PKG_VERSION=2.6.1
+
+    mkdir plumed
+    cd plumed
+    curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${PLUMED_PKG_VERSION}/plumed-src-${PLUMED_PKG_VERSION}.tgz
+    tar -xzf plumed.tar.gz
+    cd plumed-${PLUMED_PKG_VERSION}
+    ./configure --disable-doc --prefix=/usr
+    make
+    make install
+    cd ../../
+    rm -rvf plumed
+
+
+    ###########################################################################
+    # Customizations
+    ###########################################################################
 
     # set custom prompt indicating the container name
     CUSTOM_PROMPT_ENV=/.singularity.d/env/99-zz_custom_prompt.sh
     cat >$CUSTOM_PROMPT_ENV <<EOF
 #!/bin/bash
-PS1="[ubuntu18/intel:\u@\h] \W> "
+PS1="[ubuntu18.04/intel:\u@\h] \W> "
 EOF
     chmod 755 $CUSTOM_PROMPT_ENV
 
 
+    ###########################################################################
+    # Cleanup
+    ###########################################################################
+    # clean cache
+    rm -rf /var/lib/apt/lists/*
+
+
 %environment
     LC_ALL=C
     export LC_ALL
diff --git a/tools/singularity/ubuntu18.04_nvidia.def b/tools/singularity/ubuntu18.04_nvidia.def
index 4356ddf7e1..68d2a9166e 100644
--- a/tools/singularity/ubuntu18.04_nvidia.def
+++ b/tools/singularity/ubuntu18.04_nvidia.def
@@ -1,11 +1,13 @@
 BootStrap: docker
-From: nvidia/cuda:10.2-devel-ubuntu18.04
+From: nvidia/cuda:11.0-devel-ubuntu18.04
 
 %post
     export DEBIAN_FRONTEND=noninteractive
     apt-get update
-    apt-get upgrade --no-install-recommends -y
     apt-get install --no-install-recommends -y software-properties-common
+    add-apt-repository ppa:openkim/latest
+    apt-get update
+    apt-get upgrade --no-install-recommends -y
     apt-get install --no-install-recommends -y \
         bc \
         build-essential \
@@ -38,6 +40,8 @@ From: nvidia/cuda:10.2-devel-ubuntu18.04
         libpng-dev \
         libproj-dev \
         libvtk6-dev \
+        libyaml-dev \
+        libzstd-dev \
         make \
         mpi-default-bin \
         mpi-default-dev \
@@ -61,23 +65,77 @@ From: nvidia/cuda:10.2-devel-ubuntu18.04
         wget \
         xxd \
         valgrind \
-        gdb
+        gdb \
+        zstd \
+        libkim-api-dev \
+        openkim-models
+
+    ###########################################################################
+    # NVIDIA OpenCL
+    ###########################################################################
 
     mkdir -p /etc/OpenCL/vendors
     echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd
 
-    # clean cache
-    rm -rf /var/lib/apt/lists/*
+    ###########################################################################
+    # KIM-API
+    ###########################################################################
+
+    # workaround for installing files in /usr/share/doc inside of a container
+    sed -i 's/path-exclude=\/usr\/share\/doc/#path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+    apt-get install -y libkim-api-doc
+    sed -i 's/#path-exclude=\/usr\/share\/doc/path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+
+    # install KIM models
+    KIM_API_EXAMPLES=/usr/share/doc/libkim-api-dev/examples
+    gunzip $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003/LennardJones612_UniversalShifted.params.gz
+    gunzip $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ/ex_model_driver_P_LJ.f90.gz
+
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/LennardJones612__MD_414112407348_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones_Ar
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/ex_model_Ar_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/simulator-models/Sim_LAMMPS_LJcut_AkersonElliott_Alchemy_PbAu
+
+
+    ###########################################################################
+    # Plumed
+    ###########################################################################
+
+    export PLUMED_PKG_VERSION=2.6.1
+
+    mkdir plumed
+    cd plumed
+    curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${PLUMED_PKG_VERSION}/plumed-src-${PLUMED_PKG_VERSION}.tgz
+    tar -xzf plumed.tar.gz
+    cd plumed-${PLUMED_PKG_VERSION}
+    ./configure --disable-doc --prefix=/usr
+    make
+    make install
+    cd ../../
+    rm -rvf plumed
+
+
+    ###########################################################################
+    # Customizations
+    ###########################################################################
 
     # set custom prompt indicating the container name
     CUSTOM_PROMPT_ENV=/.singularity.d/env/99-zz_custom_prompt.sh
     cat >$CUSTOM_PROMPT_ENV <<EOF
 #!/bin/bash
-PS1="[ubuntu18/nvidia:\u@\h] \W> "
+PS1="[ubuntu18.04/nvidia:\u@\h] \W> "
 EOF
     chmod 755 $CUSTOM_PROMPT_ENV
 
 
+    ###########################################################################
+    # Cleanup
+    ###########################################################################
+    # clean cache
+    rm -rf /var/lib/apt/lists/*
+
 %environment
     LC_ALL=C
     export LC_ALL
diff --git a/tools/singularity/ubuntu20.04.def b/tools/singularity/ubuntu20.04.def
index 3b2c19e920..a5b1a68027 100644
--- a/tools/singularity/ubuntu20.04.def
+++ b/tools/singularity/ubuntu20.04.def
@@ -41,6 +41,7 @@ From: ubuntu:20.04
         libproj-dev \
         libvtk6-dev \
         libyaml-dev \
+        libzstd-dev \
         make \
         mpi-default-bin \
         mpi-default-dev \
@@ -66,26 +67,52 @@ From: ubuntu:20.04
         xxd \
         valgrind \
         gdb \
+        zstd \
         libkim-api-dev \
         openkim-models
 
-    # clean cache
-    rm -rf /var/lib/apt/lists/*
+    ###########################################################################
+    # KIM-API
+    ###########################################################################
 
-        # manually install Plumed
-        mkdir plumed
-        cd plumed
-        version=2.6.0
-        curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${version}/plumed-src-${version}.tgz
-        tar -xzf plumed.tar.gz
-        cd plumed-${version}
-        ./configure --disable-doc --prefix=/usr
-        make
-        make install
-        # fix up installation for CentOS and Fedora
-        # mv -v /usr/lib/pkgconfig/plumed* /usr/share/pkgconfig/
-        cd ../../
-        rm -rvf plumed
+    # workaround for installing files in /usr/share/doc inside of a container
+    sed -i 's/path-exclude=\/usr\/share\/doc/#path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+    apt-get install -y libkim-api-doc
+    sed -i 's/#path-exclude=\/usr\/share\/doc/path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+
+    # install KIM models
+    KIM_API_EXAMPLES=/usr/share/doc/libkim-api-dev/examples
+    gunzip $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003/LennardJones612_UniversalShifted.params.gz
+    gunzip $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ/ex_model_driver_P_LJ.f90.gz
+
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/LennardJones612__MD_414112407348_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones_Ar
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/ex_model_Ar_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/simulator-models/Sim_LAMMPS_LJcut_AkersonElliott_Alchemy_PbAu
+
+
+    ###########################################################################
+    # Plumed
+    ###########################################################################
+
+    export PLUMED_PKG_VERSION=2.6.1
+
+    mkdir plumed
+    cd plumed
+    curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${PLUMED_PKG_VERSION}/plumed-src-${PLUMED_PKG_VERSION}.tgz
+    tar -xzf plumed.tar.gz
+    cd plumed-${PLUMED_PKG_VERSION}
+    ./configure --disable-doc --prefix=/usr
+    make
+    make install
+    cd ../../
+    rm -rvf plumed
+
+    ###########################################################################
+    # Customizations
+    ###########################################################################
 
     # set custom prompt indicating the container name
     CUSTOM_PROMPT_ENV=/.singularity.d/env/99-zz_custom_prompt.sh
@@ -95,6 +122,13 @@ PS1="[ubuntu20.04:\u@\h] \W> "
 EOF
     chmod 755 $CUSTOM_PROMPT_ENV
 
+
+    ###########################################################################
+    # Cleanup
+    ###########################################################################
+    # clean cache
+    rm -rf /var/lib/apt/lists/*
+
 %environment
     LC_ALL=C
     export LC_ALL
diff --git a/tools/singularity/ubuntu20.04_amd_rocm.def b/tools/singularity/ubuntu20.04_amd_rocm.def
new file mode 100644
index 0000000000..8be23b05a4
--- /dev/null
+++ b/tools/singularity/ubuntu20.04_amd_rocm.def
@@ -0,0 +1,170 @@
+BootStrap: docker
+From: ubuntu:20.04
+
+%environment
+    export PATH=/usr/lib/ccache:/usr/local/cuda-11.0/bin:${PATH}:/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64
+%post
+    export DEBIAN_FRONTEND=noninteractive
+    apt-get update
+    apt-get upgrade --no-install-recommends -y
+
+    apt-get install -y --no-install-recommends curl libnuma-dev gnupg
+
+    curl -sL http://repo.radeon.com/rocm/apt/debian/rocm.gpg.key | apt-key add -
+    # AMD is using xenial folder also for focal
+    printf "deb [arch=amd64] http://repo.radeon.com/rocm/apt/debian/ xenial main" > /etc/apt/sources.list.d/rocm.list
+
+    apt-get update
+    apt-get install --no-install-recommends -y \
+        kmod \
+        file \
+        sudo \
+        libelf1 \
+        rocm-dev \
+        rocm-libs \
+        build-essential
+
+    apt-get install --no-install-recommends -y software-properties-common
+    add-apt-repository ppa:openkim/latest
+    apt-get update
+    apt-get install --no-install-recommends -y \
+        bc \
+        build-essential \
+        ccache \
+        clang \
+        cmake \
+        cmake-curses-gui \
+        curl \
+        doxygen \
+        enchant \
+        g++ \
+        gcc \
+        gfortran \
+        git \
+        hdf5-tools \
+        less \
+        libblas-dev \
+        libeigen3-dev \
+        libenchant-dev \
+        libfftw3-dev \
+        libgsl-dev \
+        libhdf5-serial-dev \
+        libhwloc-dev \
+        libjpeg-dev \
+        liblapack-dev \
+        libnetcdf-dev \
+        libomp-dev \
+        libopenblas-dev \
+        libnuma-dev \
+        libpng-dev \
+        libproj-dev \
+        libvtk6-dev \
+        libyaml-dev \
+        libzstd-dev \
+        make \
+        mpi-default-bin \
+        mpi-default-dev \
+        ninja-build \
+        python3-dev \
+        python3-pip \
+        python3-pkg-resources \
+        python3-setuptools \
+        python3-virtualenv \
+        rsync \
+        ssh \
+        vim-nox \
+        virtualenv \
+        voro++-dev \
+        wget \
+        xxd \
+        valgrind \
+        gdb \
+        zstd \
+        libkim-api-dev \
+        openkim-models
+
+
+    ###########################################################################
+    # ROCm hipCUB
+    ###########################################################################
+
+    export PATH=$PATH:/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64
+    git clone -b rocm-3.7.x https://github.com/ROCmSoftwarePlatform/hipCUB.git
+    mkdir hipCUB/build
+    cd hipCUB/build
+    CXX=hipcc cmake -D BUILD_TEST=off ..
+    make
+    make package
+    make install
+
+
+    ###########################################################################
+    # KIM-API
+    ###########################################################################
+
+    # workaround for installing files in /usr/share/doc inside of a container
+    sed -i 's/path-exclude=\/usr\/share\/doc/#path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+    apt-get install -y libkim-api-doc
+    sed -i 's/#path-exclude=\/usr\/share\/doc/path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+
+    # install KIM models
+    KIM_API_EXAMPLES=/usr/share/doc/libkim-api-dev/examples
+    gunzip $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003/LennardJones612_UniversalShifted.params.gz
+    gunzip $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ/ex_model_driver_P_LJ.f90.gz
+
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/LennardJones612__MD_414112407348_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones_Ar
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/ex_model_Ar_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/simulator-models/Sim_LAMMPS_LJcut_AkersonElliott_Alchemy_PbAu
+
+
+    ###########################################################################
+    # Plumed
+    ###########################################################################
+
+    export PLUMED_PKG_VERSION=2.6.1
+
+    mkdir plumed
+    cd plumed
+    curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${PLUMED_PKG_VERSION}/plumed-src-${PLUMED_PKG_VERSION}.tgz
+    tar -xzf plumed.tar.gz
+    cd plumed-${PLUMED_PKG_VERSION}
+    ./configure --disable-doc --prefix=/usr
+    make
+    make install
+    cd ../../
+    rm -rvf plumed
+
+
+    ###########################################################################
+    # Customizations
+    ###########################################################################
+
+    # set custom prompt indicating the container name
+    CUSTOM_PROMPT_ENV=/.singularity.d/env/99-zz_custom_prompt.sh
+    cat >$CUSTOM_PROMPT_ENV <<EOF
+#!/bin/bash
+PS1="[ubuntu20.04/rocm:\u@\h] \W> "
+EOF
+    chmod 755 $CUSTOM_PROMPT_ENV
+
+
+    ###########################################################################
+    # Cleanup
+    ###########################################################################
+    # clean cache
+    rm -rf /var/lib/apt/lists/*
+
+%environment
+    LC_ALL=C
+    export LC_ALL
+    # tell OpenMPI to not try using Infiniband
+    OMPI_MCA_btl="^openib"
+    # do not warn about unused components as this messes up testing
+    OMPI_MCA_btl_base_warn_component_unused="0"
+    export OMPI_MCA_btl OMPI_MCA_btl_base_warn_component_unused
+
+%labels
+    Author rbberger, akohlmey
diff --git a/tools/singularity/ubuntu20.04_gpu.def b/tools/singularity/ubuntu20.04_gpu.def
new file mode 100644
index 0000000000..7fd036bf7f
--- /dev/null
+++ b/tools/singularity/ubuntu20.04_gpu.def
@@ -0,0 +1,203 @@
+BootStrap: docker
+From: ubuntu:20.04
+
+%environment
+    export PATH=/usr/lib/ccache:/usr/local/cuda-11.0/bin:${PATH}:/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64
+    export CUDADIR=/usr/local/cuda-11.0
+    export CUDA_PATH=/usr/local/cuda-11.0
+    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.0/lib64
+    export LIBRARY_PATH=/usr/local/cuda-11.0/lib64/stubs
+%post
+    export DEBIAN_FRONTEND=noninteractive
+    apt-get update
+    apt-get upgrade --no-install-recommends -y
+
+    apt-get install -y --no-install-recommends curl libnuma-dev gnupg
+
+    curl -sL http://repo.radeon.com/rocm/apt/debian/rocm.gpg.key | apt-key add -
+    # AMD is using xenial folder also for focal
+    printf "deb [arch=amd64] http://repo.radeon.com/rocm/apt/debian/ xenial main" > /etc/apt/sources.list.d/rocm.list
+
+    apt-get update
+    apt-get install --no-install-recommends -y \
+        kmod \
+        file \
+        sudo \
+        libelf1 \
+        rocm-dev \
+        rocm-libs \
+        build-essential
+
+    apt-get install --no-install-recommends -y software-properties-common
+    add-apt-repository ppa:openkim/latest
+    apt-get update
+    apt-get install --no-install-recommends -y \
+        bc \
+        build-essential \
+        ccache \
+        clang \
+        cmake \
+        cmake-curses-gui \
+        curl \
+        doxygen \
+        enchant \
+        g++ \
+        gcc \
+        gfortran \
+        git \
+        hdf5-tools \
+        less \
+        libblas-dev \
+        libeigen3-dev \
+        libenchant-dev \
+        libfftw3-dev \
+        libgsl-dev \
+        libhdf5-serial-dev \
+        libhwloc-dev \
+        libjpeg-dev \
+        liblapack-dev \
+        libnetcdf-dev \
+        libomp-dev \
+        libopenblas-dev \
+        libnuma-dev \
+        libpng-dev \
+        libproj-dev \
+        libvtk6-dev \
+        libyaml-dev \
+        libzstd-dev \
+        make \
+        mpi-default-bin \
+        mpi-default-dev \
+        ninja-build \
+        python3-dev \
+        python3-pip \
+        python3-pkg-resources \
+        python3-setuptools \
+        python3-virtualenv \
+        rsync \
+        ssh \
+        vim-nox \
+        virtualenv \
+        voro++-dev \
+        wget \
+        xxd \
+        valgrind \
+        gdb \
+        zstd \
+        libkim-api-dev \
+        openkim-models
+
+
+    ###########################################################################
+    # CUDA
+    ###########################################################################
+
+    wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin
+    mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600
+    apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub
+    add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /"
+    apt-get update
+
+    export CUDA_PKG_VERSION=11.0
+
+    apt-get install -y --no-install-recommends \
+        cuda-libraries-$CUDA_PKG_VERSION \
+        cuda-command-line-tools-$CUDA_PKG_VERSION \
+        cuda-libraries-dev-$CUDA_PKG_VERSION \
+        cuda-minimal-build-$CUDA_PKG_VERSION \
+        cuda-compat-$CUDA_PKG_VERSION \
+        libcublas-11-0 \
+        libcublas-dev-11-0
+
+    echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf
+    echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf
+
+    # add missing symlink
+    ln -s /usr/local/cuda-11.0 /usr/local/cuda
+    ln -s /usr/local/cuda-11.0/lib64/stubs/libcuda.so /usr/local/cuda-11.0/lib64/stubs/libcuda.so.1
+
+
+    ###########################################################################
+    # ROCm hipCUB
+    ###########################################################################
+
+    export PATH=$PATH:/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64
+    git clone -b rocm-3.7.x https://github.com/ROCmSoftwarePlatform/hipCUB.git
+    mkdir hipCUB/build
+    cd hipCUB/build
+    CXX=hipcc cmake -D BUILD_TEST=off ..
+    make
+    make package
+    make install
+
+
+    ###########################################################################
+    # KIM-API
+    ###########################################################################
+
+    # workaround for installing files in /usr/share/doc inside of a container
+    sed -i 's/path-exclude=\/usr\/share\/doc/#path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+    apt-get install -y libkim-api-doc
+    sed -i 's/#path-exclude=\/usr\/share\/doc/path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+
+    # install KIM models
+    KIM_API_EXAMPLES=/usr/share/doc/libkim-api-dev/examples
+    gunzip $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003/LennardJones612_UniversalShifted.params.gz
+    gunzip $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ/ex_model_driver_P_LJ.f90.gz
+
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/LennardJones612__MD_414112407348_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones_Ar
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/ex_model_Ar_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/simulator-models/Sim_LAMMPS_LJcut_AkersonElliott_Alchemy_PbAu
+
+
+    ###########################################################################
+    # Plumed
+    ###########################################################################
+
+    export PLUMED_PKG_VERSION=2.6.1
+
+    mkdir plumed
+    cd plumed
+    curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${PLUMED_PKG_VERSION}/plumed-src-${PLUMED_PKG_VERSION}.tgz
+    tar -xzf plumed.tar.gz
+    cd plumed-${PLUMED_PKG_VERSION}
+    ./configure --disable-doc --prefix=/usr
+    make
+    make install
+    cd ../../
+    rm -rvf plumed
+
+
+    ###########################################################################
+    # Customizations
+    ###########################################################################
+
+    # set custom prompt indicating the container name
+    CUSTOM_PROMPT_ENV=/.singularity.d/env/99-zz_custom_prompt.sh
+    cat >$CUSTOM_PROMPT_ENV <<EOF
+#!/bin/bash
+PS1="[ubuntu20.04/gpu:\u@\h] \W> "
+EOF
+    chmod 755 $CUSTOM_PROMPT_ENV
+
+
+    ###########################################################################
+    # Cleanup
+    ###########################################################################
+    # clean cache
+    rm -rf /var/lib/apt/lists/*
+
+%environment
+    LC_ALL=C
+    export LC_ALL
+    # tell OpenMPI to not try using Infiniband
+    OMPI_MCA_btl="^openib"
+    # do not warn about unused components as this messes up testing
+    OMPI_MCA_btl_base_warn_component_unused="0"
+    export OMPI_MCA_btl OMPI_MCA_btl_base_warn_component_unused
+
+%labels
+    Author rbberger, akohlmey
diff --git a/tools/singularity/ubuntu20.04_intel_opencl.def b/tools/singularity/ubuntu20.04_intel_opencl.def
new file mode 100644
index 0000000000..f04b982e64
--- /dev/null
+++ b/tools/singularity/ubuntu20.04_intel_opencl.def
@@ -0,0 +1,145 @@
+BootStrap: docker
+From: ubuntu:20.04
+
+%post
+    export DEBIAN_FRONTEND=noninteractive
+    apt-get update
+    apt-get install --no-install-recommends -y software-properties-common
+    add-apt-repository ppa:openkim/latest
+    apt-get update
+    apt-get upgrade --no-install-recommends -y
+    apt-get install --no-install-recommends -y \
+        bc \
+        build-essential \
+        ccache \
+        clang \
+        cmake \
+        cmake-curses-gui \
+        curl \
+        doxygen \
+        enchant \
+        g++ \
+        gcc \
+        gfortran \
+        git \
+        hdf5-tools \
+        less \
+        libblas-dev \
+        libeigen3-dev \
+        libenchant-dev \
+        libfftw3-dev \
+        libgsl-dev \
+        libhdf5-serial-dev \
+        libhwloc-dev \
+        libjpeg-dev \
+        liblapack-dev \
+        libnetcdf-dev \
+        libomp-dev \
+        libopenblas-dev \
+        libnuma-dev \
+        libpng-dev \
+        libproj-dev \
+        libvtk6-dev \
+        libyaml-dev \
+        libzstd-dev \
+        make \
+        mpi-default-bin \
+        mpi-default-dev \
+        ninja-build \
+        python3-dev \
+        python3-pip \
+        python3-pkg-resources \
+        python3-setuptools \
+        python3-virtualenv \
+        rsync \
+        ssh \
+        vim-nox \
+        virtualenv \
+        voro++-dev \
+        wget \
+        xxd \
+        valgrind \
+        gdb \
+        zstd \
+        libkim-api-dev \
+        openkim-models
+
+    ###########################################################################
+    # Intel OpenCL
+    ###########################################################################
+
+    add-apt-repository ppa:intel-opencl/intel-opencl
+    apt-get update
+    apt-get install -y intel-opencl-icd
+
+    ###########################################################################
+    # KIM-API
+    ###########################################################################
+
+    # workaround for installing files in /usr/share/doc inside of a container
+    sed -i 's/path-exclude=\/usr\/share\/doc/#path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+    apt-get install -y libkim-api-doc
+    sed -i 's/#path-exclude=\/usr\/share\/doc/path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+
+    # install KIM models
+    KIM_API_EXAMPLES=/usr/share/doc/libkim-api-dev/examples
+    gunzip $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003/LennardJones612_UniversalShifted.params.gz
+    gunzip $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ/ex_model_driver_P_LJ.f90.gz
+
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/LennardJones612__MD_414112407348_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones_Ar
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/ex_model_Ar_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/simulator-models/Sim_LAMMPS_LJcut_AkersonElliott_Alchemy_PbAu
+
+
+    ###########################################################################
+    # Plumed
+    ###########################################################################
+
+    export PLUMED_PKG_VERSION=2.6.1
+
+    mkdir plumed
+    cd plumed
+    curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${PLUMED_PKG_VERSION}/plumed-src-${PLUMED_PKG_VERSION}.tgz
+    tar -xzf plumed.tar.gz
+    cd plumed-${PLUMED_PKG_VERSION}
+    ./configure --disable-doc --prefix=/usr
+    make
+    make install
+    cd ../../
+    rm -rvf plumed
+
+
+    ###########################################################################
+    # Customizations
+    ###########################################################################
+
+    # set custom prompt indicating the container name
+    CUSTOM_PROMPT_ENV=/.singularity.d/env/99-zz_custom_prompt.sh
+    cat >$CUSTOM_PROMPT_ENV <<EOF
+#!/bin/bash
+PS1="[ubuntu20.04/intel:\u@\h] \W> "
+EOF
+    chmod 755 $CUSTOM_PROMPT_ENV
+
+
+    ###########################################################################
+    # Cleanup
+    ###########################################################################
+    # clean cache
+    rm -rf /var/lib/apt/lists/*
+
+
+%environment
+    LC_ALL=C
+    export LC_ALL
+    # tell OpenMPI to not try using Infiniband
+    OMPI_MCA_btl="^openib"
+    # do not warn about unused components as this messes up testing
+    OMPI_MCA_btl_base_warn_component_unused="0"
+    export OMPI_MCA_btl OMPI_MCA_btl_base_warn_component_unused
+
+%labels
+    Author rbberger, akohlmey
diff --git a/tools/singularity/ubuntu20.04_nvidia.def b/tools/singularity/ubuntu20.04_nvidia.def
new file mode 100644
index 0000000000..06fccc52d1
--- /dev/null
+++ b/tools/singularity/ubuntu20.04_nvidia.def
@@ -0,0 +1,147 @@
+BootStrap: docker
+From: nvidia/cuda:11.0-devel-ubuntu20.04
+
+%post
+    export DEBIAN_FRONTEND=noninteractive
+    apt-get update
+    apt-get install --no-install-recommends -y software-properties-common
+    add-apt-repository ppa:openkim/latest
+    apt-get update
+    apt-get upgrade --no-install-recommends -y
+    apt-get install --no-install-recommends -y \
+        bc \
+        build-essential \
+        ccache \
+        clang \
+        clinfo \
+        cmake \
+        cmake-curses-gui \
+        curl \
+        doxygen \
+        enchant \
+        g++ \
+        gcc \
+        gfortran \
+        git \
+        hdf5-tools \
+        less \
+        libblas-dev \
+        libeigen3-dev \
+        libenchant-dev \
+        libfftw3-dev \
+        libgsl-dev \
+        libhdf5-serial-dev \
+        libhwloc-dev \
+        libjpeg-dev \
+        liblapack-dev \
+        libomp-dev \
+        libopenblas-dev \
+        libnuma-dev \
+        libpng-dev \
+        libproj-dev \
+        libvtk6-dev \
+        libyaml-dev \
+        libzstd-dev \
+        make \
+        mpi-default-bin \
+        mpi-default-dev \
+        ninja-build \
+        ocl-icd-libopencl1 \
+        ocl-icd-opencl-dev \
+        python3-dev \
+        python3-pip \
+        python3-pkg-resources \
+        python3-setuptools \
+        python3-virtualenv \
+        rsync \
+        ssh \
+        vim-nox \
+        virtualenv \
+        voro++-dev \
+        wget \
+        xxd \
+        valgrind \
+        gdb \
+        zstd \
+        libkim-api-dev \
+        openkim-models
+
+
+    ###########################################################################
+    # NVIDIA OpenCL
+    ###########################################################################
+
+    mkdir -p /etc/OpenCL/vendors
+    echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd
+
+
+    ###########################################################################
+    # KIM-API
+    ###########################################################################
+
+    # workaround for installing files in /usr/share/doc inside of a container
+    sed -i 's/path-exclude=\/usr\/share\/doc/#path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+    apt-get install -y libkim-api-doc
+    sed -i 's/#path-exclude=\/usr\/share\/doc/path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+
+    # install KIM models
+    KIM_API_EXAMPLES=/usr/share/doc/libkim-api-dev/examples
+    gunzip $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003/LennardJones612_UniversalShifted.params.gz
+    gunzip $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ/ex_model_driver_P_LJ.f90.gz
+
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/LennardJones612__MD_414112407348_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones_Ar
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/ex_model_Ar_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/simulator-models/Sim_LAMMPS_LJcut_AkersonElliott_Alchemy_PbAu
+
+
+    ###########################################################################
+    # Plumed
+    ###########################################################################
+
+    export PLUMED_PKG_VERSION=2.6.1
+
+    mkdir plumed
+    cd plumed
+    curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${PLUMED_PKG_VERSION}/plumed-src-${PLUMED_PKG_VERSION}.tgz
+    tar -xzf plumed.tar.gz
+    cd plumed-${PLUMED_PKG_VERSION}
+    ./configure --disable-doc --prefix=/usr
+    make
+    make install
+    cd ../../
+    rm -rvf plumed
+
+
+    ###########################################################################
+    # Customizations
+    ###########################################################################
+
+    # set custom prompt indicating the container name
+    CUSTOM_PROMPT_ENV=/.singularity.d/env/99-zz_custom_prompt.sh
+    cat >$CUSTOM_PROMPT_ENV <<EOF
+#!/bin/bash
+PS1="[ubuntu20.04/nvidia:\u@\h] \W> "
+EOF
+    chmod 755 $CUSTOM_PROMPT_ENV
+
+
+    ###########################################################################
+    # Cleanup
+    ###########################################################################
+    # clean cache
+    rm -rf /var/lib/apt/lists/*
+
+%environment
+    LC_ALL=C
+    export LC_ALL
+    # tell OpenMPI to not try using Infiniband
+    OMPI_MCA_btl="^openib"
+    # do not warn about unused components as this messes up testing
+    OMPI_MCA_btl_base_warn_component_unused="0"
+    export OMPI_MCA_btl OMPI_MCA_btl_base_warn_component_unused
+
+%labels
+    Author rbberger, akohlmey
diff --git a/unittest/c-library/test_library_open.cpp b/unittest/c-library/test_library_open.cpp
index 12d974d6c4..16d25eca13 100644
--- a/unittest/c-library/test_library_open.cpp
+++ b/unittest/c-library/test_library_open.cpp
@@ -164,7 +164,7 @@ TEST(lammps_open_fortran, no_args) {
     MPI_Comm_split(MPI_COMM_WORLD, 0, 1, &mycomm);
     int fcomm = MPI_Comm_c2f(mycomm);
     ::testing::internal::CaptureStdout();
-    void *handle = lammps_open_fortran(0, NULL, fcomm, NULL);
+    void *handle = lammps_open_fortran(0, NULL, fcomm);
     std::string output = ::testing::internal::GetCapturedStdout();
     EXPECT_STREQ(output.substr(0,6).c_str(),"LAMMPS");
     LAMMPS_NS::LAMMPS *lmp = (LAMMPS_NS::LAMMPS *)handle;
diff --git a/unittest/utils/CMakeLists.txt b/unittest/utils/CMakeLists.txt
index 67135deb11..5b5b931210 100644
--- a/unittest/utils/CMakeLists.txt
+++ b/unittest/utils/CMakeLists.txt
@@ -2,6 +2,10 @@ add_executable(test_tokenizer test_tokenizer.cpp)
 target_link_libraries(test_tokenizer PRIVATE lammps GTest::GMockMain GTest::GMock GTest::GTest)
 add_test(Tokenizer test_tokenizer)
 
+add_executable(test_mempool test_mempool.cpp)
+target_link_libraries(test_mempool PRIVATE lammps GTest::GMockMain GTest::GMock GTest::GTest)
+add_test(MemPool test_mempool)
+
 add_executable(test_utils test_utils.cpp)
 target_link_libraries(test_utils PRIVATE lammps GTest::GMockMain GTest::GMock GTest::GTest)
 add_test(Utils test_utils)
diff --git a/unittest/utils/test_mempool.cpp b/unittest/utils/test_mempool.cpp
new file mode 100644
index 0000000000..11c5be50ee
--- /dev/null
+++ b/unittest/utils/test_mempool.cpp
@@ -0,0 +1,347 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "my_page.h"
+#include "my_pool_chunk.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using namespace LAMMPS_NS;
+
+TEST(MyPage, int) {
+    MyPage<int> p;
+
+    // default init. maxchunk=1, pagesize=1024
+    int rv = p.init();
+    ASSERT_EQ(rv,0);
+
+    ASSERT_EQ(p.ndatum,0);
+    ASSERT_EQ(p.nchunk,0);
+
+    int *iptr = p.vget();
+    // second call to vget() should give same pointer without vgot()
+    ASSERT_EQ(iptr,p.vget());
+    p.vgot(1);
+    ++iptr;
+    ASSERT_EQ(0,p.status());
+    ASSERT_EQ(p.ndatum,1);
+    ASSERT_EQ(p.nchunk,1);
+    ASSERT_EQ(iptr,p.vget());
+    // use too large chunk size
+    p.vgot(2);
+    ASSERT_EQ(1,p.status());
+
+    p.reset();
+    ASSERT_EQ(p.ndatum,0);
+    ASSERT_EQ(p.nchunk,0);
+
+    iptr = p.vget();
+    p.vgot(1);
+    ++iptr;
+    ASSERT_EQ(iptr,p.get());
+    ++iptr;
+    ASSERT_EQ(iptr,p.get(1));
+    ASSERT_EQ(p.ndatum,3);
+    ASSERT_EQ(p.nchunk,3);
+
+    // restart with custom init. maxchunk=16, pagesize=256
+    rv = p.init(16,64,2);
+    ASSERT_EQ(rv,0);
+
+    ASSERT_EQ(p.ndatum,0);
+    ASSERT_EQ(p.nchunk,0);
+
+    iptr = p.vget();
+    // second call to vget() should give same pointer without vgot()
+    ASSERT_EQ(iptr,p.vget());
+    p.vgot(16);
+    iptr += 16;
+    ASSERT_EQ(0,p.status());
+    ASSERT_EQ(p.ndatum,16);
+    ASSERT_EQ(p.nchunk,1);
+
+    // use too large chunk size
+    ASSERT_EQ(iptr,p.vget());
+    p.vgot(32);
+    ASSERT_EQ(1,p.status());
+
+    p.reset();
+    ASSERT_EQ(0,p.status());
+    ASSERT_EQ(p.ndatum,0);
+    ASSERT_EQ(p.nchunk,0);
+
+    iptr = p.vget();
+    p.vgot(16);
+    iptr = p.vget();
+    p.vgot(4);
+    iptr += 4;
+    ASSERT_EQ(iptr,p.get());
+    ++iptr;
+    ASSERT_EQ(iptr,p.get(16));
+    ASSERT_DOUBLE_EQ(p.size(),(double)sizeof(int)*128.0);
+    ASSERT_EQ(p.ndatum,37);
+    ASSERT_EQ(p.nchunk,4);
+    p.get(16);
+    p.get(16);
+    // allocation on the same page
+    iptr = p.get(16);
+    iptr += 16;
+    ASSERT_EQ(iptr,p.get(16));
+    // allocation on different pages
+    p.get(16);
+    iptr += 16;
+    ASSERT_NE(iptr,p.get(16));
+    ASSERT_DOUBLE_EQ(p.size(),(double)sizeof(int)*256.0);
+    ASSERT_EQ(p.ndatum,133);
+    ASSERT_EQ(p.nchunk,10);
+}
+
+TEST(MyPage, double) {
+    MyPage<double> p;
+
+    // default init. maxchunk=1, pagesize=1024
+    int rv = p.init();
+    ASSERT_EQ(rv,0);
+
+    ASSERT_EQ(p.ndatum,0);
+    ASSERT_EQ(p.nchunk,0);
+
+    double *iptr = p.vget();
+    // second call to vget() should give same pointer without vgot()
+    ASSERT_EQ(iptr,p.vget());
+    p.vgot(1);
+    ++iptr;
+    ASSERT_EQ(0,p.status());
+    ASSERT_EQ(p.ndatum,1);
+    ASSERT_EQ(p.nchunk,1);
+    ASSERT_EQ(iptr,p.vget());
+    // use too large chunk size
+    p.vgot(2);
+    ASSERT_EQ(1,p.status());
+
+    p.reset();
+    ASSERT_EQ(p.ndatum,0);
+    ASSERT_EQ(p.nchunk,0);
+
+    iptr = p.vget();
+    p.vgot(1);
+    ++iptr;
+    ASSERT_EQ(iptr,p.get());
+    ++iptr;
+    ASSERT_EQ(iptr,p.get(1));
+    ASSERT_EQ(p.ndatum,3);
+    ASSERT_EQ(p.nchunk,3);
+
+    // restart with custom init. maxchunk=16, pagesize=256
+    rv = p.init(16,64,2);
+    ASSERT_EQ(rv,0);
+
+    ASSERT_EQ(p.ndatum,0);
+    ASSERT_EQ(p.nchunk,0);
+
+    iptr = p.vget();
+    // second call to vget() should give same pointer without vgot()
+    ASSERT_EQ(iptr,p.vget());
+    p.vgot(16);
+    iptr += 16;
+    ASSERT_EQ(0,p.status());
+    ASSERT_EQ(p.ndatum,16);
+    ASSERT_EQ(p.nchunk,1);
+
+    // use too large chunk size
+    ASSERT_EQ(iptr,p.vget());
+    p.vgot(32);
+    ASSERT_EQ(1,p.status());
+
+    p.reset();
+    ASSERT_EQ(0,p.status());
+    ASSERT_EQ(p.ndatum,0);
+    ASSERT_EQ(p.nchunk,0);
+
+    iptr = p.vget();
+    p.vgot(16);
+    iptr = p.vget();
+    p.vgot(4);
+    iptr += 4;
+    ASSERT_EQ(iptr,p.get());
+    ++iptr;
+    ASSERT_EQ(iptr,p.get(16));
+    ASSERT_DOUBLE_EQ(p.size(),(double)sizeof(double)*128.0);
+    ASSERT_EQ(p.ndatum,37);
+    ASSERT_EQ(p.nchunk,4);
+    p.get(16);
+    p.get(16);
+    // allocation on the same page
+    iptr = p.get(16);
+    iptr += 16;
+    ASSERT_EQ(iptr,p.get(16));
+    // allocation on different pages
+    p.get(16);
+    iptr += 16;
+    ASSERT_NE(iptr,p.get(16));
+    ASSERT_DOUBLE_EQ(p.size(),(double)sizeof(double)*256.0);
+    ASSERT_EQ(p.ndatum,133);
+    ASSERT_EQ(p.nchunk,10);
+}
+
+TEST(MyPage, bigint) {
+    MyPage<bigint> p;
+
+    // default init. maxchunk=1, pagesize=1024
+    int rv = p.init();
+    ASSERT_EQ(rv,0);
+
+    ASSERT_EQ(p.ndatum,0);
+    ASSERT_EQ(p.nchunk,0);
+
+    bigint *iptr = p.vget();
+    // second call to vget() should give same pointer without vgot()
+    ASSERT_EQ(iptr,p.vget());
+    p.vgot(1);
+    ++iptr;
+    ASSERT_EQ(0,p.status());
+    ASSERT_EQ(p.ndatum,1);
+    ASSERT_EQ(p.nchunk,1);
+    ASSERT_EQ(iptr,p.vget());
+    // use too large chunk size
+    p.vgot(2);
+    ASSERT_EQ(1,p.status());
+
+    p.reset();
+    ASSERT_EQ(p.ndatum,0);
+    ASSERT_EQ(p.nchunk,0);
+
+    iptr = p.vget();
+    p.vgot(1);
+    ++iptr;
+    ASSERT_EQ(iptr,p.get());
+    ++iptr;
+    ASSERT_EQ(iptr,p.get(1));
+    ASSERT_EQ(p.ndatum,3);
+    ASSERT_EQ(p.nchunk,3);
+
+    // restart with custom init. maxchunk=16, pagesize=256
+    rv = p.init(16,64,2);
+    ASSERT_EQ(rv,0);
+
+    ASSERT_EQ(p.ndatum,0);
+    ASSERT_EQ(p.nchunk,0);
+
+    iptr = p.vget();
+    // second call to vget() should give same pointer without vgot()
+    ASSERT_EQ(iptr,p.vget());
+    p.vgot(16);
+    iptr += 16;
+    ASSERT_EQ(0,p.status());
+    ASSERT_EQ(p.ndatum,16);
+    ASSERT_EQ(p.nchunk,1);
+
+    // use too large chunk size
+    ASSERT_EQ(iptr,p.vget());
+    p.vgot(32);
+    ASSERT_EQ(1,p.status());
+
+    p.reset();
+    ASSERT_EQ(0,p.status());
+    ASSERT_EQ(p.ndatum,0);
+    ASSERT_EQ(p.nchunk,0);
+
+    iptr = p.vget();
+    p.vgot(16);
+    iptr = p.vget();
+    p.vgot(4);
+    iptr += 4;
+    ASSERT_EQ(iptr,p.get());
+    ++iptr;
+    ASSERT_EQ(iptr,p.get(16));
+    ASSERT_DOUBLE_EQ(p.size(),(double)sizeof(bigint)*128.0);
+    ASSERT_EQ(p.ndatum,37);
+    ASSERT_EQ(p.nchunk,4);
+    p.get(16);
+    p.get(16);
+    // allocation on the same page
+    iptr = p.get(16);
+    iptr += 16;
+    ASSERT_EQ(iptr,p.get(16));
+    // allocation on different pages
+    p.get(16);
+    iptr += 16;
+    ASSERT_NE(iptr,p.get(16));
+    ASSERT_DOUBLE_EQ(p.size(),(double)sizeof(bigint)*256.0);
+    ASSERT_EQ(p.ndatum,133);
+    ASSERT_EQ(p.nchunk,10);
+}
+
+TEST(MyPoolChunk, int) {
+    // defaults to minchunk=1, maxchunk=1, nbin=1, chunksperpage=1024, pagedelta=1
+    MyPoolChunk<int> p;
+
+    ASSERT_EQ(p.ndatum,0);
+    ASSERT_EQ(p.nchunk,0);
+    ASSERT_EQ(p.size(),0.0);
+
+    int idx=~0x0000;
+    int *iptr = p.get(idx);
+    ASSERT_NE(iptr,nullptr);
+    ASSERT_EQ(idx,0);
+
+    iptr = p.get(1,idx);
+    ASSERT_NE(iptr,nullptr);
+    ASSERT_EQ(idx,1);
+    // we have only one page allocated
+    ASSERT_EQ(p.size(),1024*sizeof(int)+1024*sizeof(int)+sizeof(void *)+sizeof(int));
+    ASSERT_EQ(p.ndatum,2);
+    ASSERT_EQ(p.nchunk,2);
+
+    p.put(0);
+    ASSERT_EQ(p.ndatum,1);
+    ASSERT_EQ(p.nchunk,1);
+
+    iptr = p.get(2,idx);
+    ASSERT_EQ(iptr,nullptr);
+    ASSERT_EQ(p.status(),3);
+    ASSERT_EQ(p.ndatum,1);
+    ASSERT_EQ(p.nchunk,1);
+}
+
+TEST(MyPoolChunk, double) {
+    // defaults to minchunk=1, maxchunk=1, nbin=1, chunksperpage=1024, pagedelta=1
+    MyPoolChunk<double> p;
+
+    ASSERT_EQ(p.ndatum,0);
+    ASSERT_EQ(p.nchunk,0);
+    ASSERT_EQ(p.size(),0.0);
+
+    int idx=~0x0000;
+    double *dptr = p.get(idx);
+    ASSERT_NE(dptr,nullptr);
+    ASSERT_EQ(idx,0);
+
+    dptr = p.get(1,idx);
+    ASSERT_NE(dptr,nullptr);
+    ASSERT_EQ(idx,1);
+    // we have only one page allocated
+    ASSERT_EQ(p.size(),1024*sizeof(int)+1024*sizeof(double)+sizeof(void *)+sizeof(int));
+
+    p.put(0);
+    ASSERT_EQ(p.ndatum,1);
+    ASSERT_EQ(p.nchunk,1);
+
+    dptr = p.get(2,idx);
+    ASSERT_EQ(dptr,nullptr);
+    ASSERT_EQ(p.status(),3);
+    ASSERT_EQ(p.ndatum,1);
+    ASSERT_EQ(p.nchunk,1);
+}
+