diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index 04ec037184..a639c1d993 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -131,7 +131,9 @@ if((CMAKE_CXX_COMPILER_ID STREQUAL "NVHPC") OR (CMAKE_CXX_COMPILER_ID STREQUAL "
 endif()
 
 # silence nvcc warnings
-if((PKG_KOKKOS) AND (Kokkos_ENABLE_CUDA) AND NOT (CMAKE_CXX_COMPILER_ID STREQUAL "Clang"))
+if((PKG_KOKKOS) AND (Kokkos_ENABLE_CUDA) AND NOT
+    ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM")
+    OR (CMAKE_CXX_COMPILER_ID STREQUAL "XLClang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "CrayClang")))
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xcudafe --diag_suppress=unrecognized_pragma,--diag_suppress=128")
 endif()
 
diff --git a/cmake/Modules/Packages/COLVARS.cmake b/cmake/Modules/Packages/COLVARS.cmake
index 8fa0d84f01..b4dc738626 100644
--- a/cmake/Modules/Packages/COLVARS.cmake
+++ b/cmake/Modules/Packages/COLVARS.cmake
@@ -26,6 +26,11 @@ if(BUILD_OMP)
   target_link_libraries(colvars PRIVATE OpenMP::OpenMP_CXX)
 endif()
 
+if(BUILD_MPI)
+  target_compile_definitions(colvars PUBLIC -DCOLVARS_MPI)
+  target_link_libraries(colvars PUBLIC MPI::MPI_CXX)
+endif()
+
 if(COLVARS_DEBUG)
   # Need to export the define publicly to be valid in interface code
   target_compile_definitions(colvars PUBLIC -DCOLVARS_DEBUG)
diff --git a/cmake/Modules/Packages/MC.cmake b/cmake/Modules/Packages/MC.cmake
index f162254558..2a72a895cf 100644
--- a/cmake/Modules/Packages/MC.cmake
+++ b/cmake/Modules/Packages/MC.cmake
@@ -7,3 +7,13 @@ if(NOT PKG_MANYBODY)
   list(REMOVE_ITEM LAMMPS_SOURCES ${LAMMPS_SOURCE_DIR}/MC/fix_sgcmc.cpp)
   set_property(TARGET lammps PROPERTY SOURCES "${LAMMPS_SOURCES}")
 endif()
+
+# fix neighbor/swap may only be installed if also the VORONOI package is installed
+if(NOT PKG_VORONOI)
+  get_property(LAMMPS_FIX_HEADERS GLOBAL PROPERTY FIX)
+  list(REMOVE_ITEM LAMMPS_FIX_HEADERS ${LAMMPS_SOURCE_DIR}/MC/fix_neighbor_swap.h)
+  set_property(GLOBAL PROPERTY FIX "${LAMMPS_FIX_HEADERS}")
+  get_target_property(LAMMPS_SOURCES lammps SOURCES)
+  list(REMOVE_ITEM LAMMPS_SOURCES ${LAMMPS_SOURCE_DIR}/MC/fix_neighbor_swap.cpp)
+  set_property(TARGET lammps PROPERTY SOURCES "${LAMMPS_SOURCES}")
+endif()
diff --git a/doc/src/Commands_fix.rst b/doc/src/Commands_fix.rst
index 0c8d04c97a..cdc7a4a89d 100644
--- a/doc/src/Commands_fix.rst
+++ b/doc/src/Commands_fix.rst
@@ -78,6 +78,7 @@ OPT.
    * :doc:`flow/gauss <fix_flow_gauss>`
    * :doc:`freeze (k) <fix_freeze>`
    * :doc:`gcmc <fix_gcmc>`
+   * :doc:`gjf <fix_gjf>`
    * :doc:`gld <fix_gld>`
    * :doc:`gle <fix_gle>`
    * :doc:`gravity (ko) <fix_gravity>`
@@ -112,6 +113,7 @@ OPT.
    * :doc:`mvv/tdpd <fix_mvv_dpd>`
    * :doc:`neb <fix_neb>`
    * :doc:`neb/spin <fix_neb_spin>`
+   * :doc:`neighbor/swap <fix_neighbor_swap>`
    * :doc:`nonaffine/displacement <fix_nonaffine_displacement>`
    * :doc:`nph (ko) <fix_nh>`
    * :doc:`nph/asphere (o) <fix_nph_asphere>`
diff --git a/doc/src/Commands_removed.rst b/doc/src/Commands_removed.rst
index 847fe76456..c92d3d9064 100644
--- a/doc/src/Commands_removed.rst
+++ b/doc/src/Commands_removed.rst
@@ -12,6 +12,17 @@ stop LAMMPS and print a suitable error message in most cases, when a
 style/command is used that has been removed or will replace the command
 with the direct alternative (if available) and print a warning.
 
+GJF formulation in fix langevin
+-------------------------------
+
+.. deprecated:: TBD
+
+The *gjf* keyword in fix langevin is deprecated and will be removed
+soon.  The GJF functionality has been moved to its own fix style
+:doc:`fix gjf <fix_gjf>` and it is strongly recommended to use that
+fix instead.
+
+
 LAMMPS shell
 ------------
 
diff --git a/doc/src/Howto.rst b/doc/src/Howto.rst
index ec90472c27..cdc4efd737 100644
--- a/doc/src/Howto.rst
+++ b/doc/src/Howto.rst
@@ -66,6 +66,7 @@ Force fields howto
    :name: force_howto
    :maxdepth: 1
 
+   Howto_FFgeneral
    Howto_bioFF
    Howto_amoeba
    Howto_tip3p
diff --git a/doc/src/Howto_FFgeneral.rst b/doc/src/Howto_FFgeneral.rst
new file mode 100644
index 0000000000..1b96ae1119
--- /dev/null
+++ b/doc/src/Howto_FFgeneral.rst
@@ -0,0 +1,55 @@
+Some general force field considerations
+=======================================
+
+A compact summary of the concepts, definitions, and properties of force
+fields with explicit bonded interactions (like the ones discussed in
+this HowTo) is given in :ref:`(Gissinger) <Typelabel2>`.
+
+A force field has 2 parts: the formulas that define its potential
+functions and the coefficients used for a particular system.  To assign
+parameters it is first required to assign atom types.  Those are not
+only based on the elements, but also on the chemical environment due to
+the atoms bound to them.  This often follows the chemical concept of
+*functional groups*.  Example: a carbon atom bound with a single bond to
+a single OH-group (alcohol) would be a different atom type than a carbon
+atom bound to a methyl CH3 group (aliphatic carbon).  The atom types
+usually then determine the non-bonded Lennard-Jones parameters and the
+parameters for bonds, angles, dihedrals, and impropers.  On top of that,
+partial charges have to be applied.  Those are usually independent of
+the atom types and are determined either for groups of atoms called
+residues with some fitting procedure based on quantum mechanical
+calculations, or based on some increment system that add or subtract
+increments from the partial charge of an atom based on the types of
+the neighboring atoms.
+
+Force fields differ in the strategies they employ to determine the
+parameters and charge distribution in how generic or specific they are
+which in turn has an impact on the accuracy (compare for example
+CGenFF to CHARMM and GAFF to Amber).  Because of the different
+strategies, it is not a good idea to use a mix of parameters from
+different force field *families* (like CHARMM, Amber, or GROMOS)
+and that extends to the parameters for the solvent, especially
+water.  The publication describing the parameterization of a force
+field will describe which water model to use.  Changing the water
+model usually leads to overall worse results (even if it may improve
+on the water itself).
+
+In addition, one has to consider that *families* of force fields like
+CHARMM, Amber, OPLS, or GROMOS have evolved over time and thus provide
+different *revisions* of the force field parameters.  These often
+corresponds to changes in the functional form or the parameterization
+strategies.  This may also result in changes required for simulation
+settings like the preferred cutoff or how Coulomb interactions are
+computed (cutoff, smoothed/shifted cutoff, or long-range with Ewald
+summation or equivalent).  Unless explicitly stated in the publication
+describing the force field, the Coulomb interaction cannot be chosen at
+will but must match the revision of the force field.  That said,
+liberties may be taken during the initial equilibration of a system to
+speed up the process, but not for production simulations.
+
+----------
+
+.. _Typelabel2:
+
+**(Gissinger)** J. R. Gissinger, I. Nikiforov, Y. Afshar, B. Waters, M. Choi, D. S. Karls, A. Stukowski, W. Im, H. Heinz, A. Kohlmeyer, and E. B. Tadmor, J Phys Chem B, 128, 3282-3297 (2024).
+
diff --git a/doc/src/Howto_bioFF.rst b/doc/src/Howto_bioFF.rst
index 92dd45b9b6..cf8e4ab13e 100644
--- a/doc/src/Howto_bioFF.rst
+++ b/doc/src/Howto_bioFF.rst
@@ -1,22 +1,16 @@
 CHARMM, AMBER, COMPASS, DREIDING, and OPLS force fields
 =======================================================
 
-A compact summary of the concepts, definitions, and properties of
-force fields with explicit bonded interactions (like the ones discussed
-in this HowTo) is given in :ref:`(Gissinger) <Typelabel2>`.
-
-A force field has 2 parts: the formulas that define it and the
-coefficients used for a particular system.  Here we only discuss
-formulas implemented in LAMMPS that correspond to formulas commonly used
-in the CHARMM, AMBER, COMPASS, and DREIDING force fields.  Setting
-coefficients is done either from special sections in an input data file
-via the :doc:`read_data <read_data>` command or in the input script with
-commands like :doc:`pair_coeff <pair_coeff>` or :doc:`bond_coeff
-<bond_coeff>` and so on.  See the :doc:`Tools <Tools>` doc page for
-additional tools that can use CHARMM, AMBER, or Materials Studio
-generated files to assign force field coefficients and convert their
-output into LAMMPS input. LAMMPS input scripts can also be generated by
-`charmm-gui.org <https://charmm-gui.org/>`_.
+Here we only discuss formulas implemented in LAMMPS that correspond to
+formulas commonly used in the CHARMM, AMBER, COMPASS, and DREIDING force
+fields.  Setting coefficients is done either from special sections in an
+input data file via the :doc:`read_data <read_data>` command or in the
+input script with commands like :doc:`pair_coeff <pair_coeff>` or
+:doc:`bond_coeff <bond_coeff>` and so on.  See the :doc:`Tools <Tools>`
+doc page for additional tools that can use CHARMM, AMBER, or Materials
+Studio generated files to assign force field coefficients and convert
+their output into LAMMPS input. LAMMPS input scripts can also be
+generated by `charmm-gui.org <https://charmm-gui.org/>`_.
 
 CHARMM and AMBER
 ----------------
@@ -203,9 +197,11 @@ rather than individual force constants and geometric parameters that
 depend on the particular combinations of atoms involved in the bond,
 angle, or torsion terms.  DREIDING has an :doc:`explicit hydrogen bond
 term <pair_hbond_dreiding>` to describe interactions involving a
-hydrogen atom on very electronegative atoms (N, O, F).  Unlike CHARMM
-or AMBER, the DREIDING force field has not been parameterized for
-considering solvents (like water).
+hydrogen atom on very electronegative atoms (N, O, F).  Unlike CHARMM or
+AMBER, the DREIDING force field has not been parameterized for
+considering solvents (like water) and has no rules for assigning
+(partial) charges.  That will seriously limit its accuracy when used for
+simulating systems where those matter.
 
 See :ref:`(Mayo) <howto-Mayo>` for a description of the DREIDING force field
 
@@ -272,10 +268,6 @@ compatible with a subset of OPLS interactions.
 
 ----------
 
-.. _Typelabel2:
-
-**(Gissinger)** J. R. Gissinger, I. Nikiforov, Y. Afshar, B. Waters, M. Choi, D. S. Karls, A. Stukowski, W. Im, H. Heinz, A. Kohlmeyer, and E. B. Tadmor, J Phys Chem B, 128, 3282-3297 (2024).
-
 .. _howto-MacKerell:
 
 **(MacKerell)** MacKerell, Bashford, Bellott, Dunbrack, Evanseck, Field, Fischer, Gao, Guo, Ha, et al (1998).  J Phys Chem, 102, 3586 . https://doi.org/10.1021/jp973084f
diff --git a/doc/src/Howto_spc.rst b/doc/src/Howto_spc.rst
index 00bd8a1b10..f84d7797d2 100644
--- a/doc/src/Howto_spc.rst
+++ b/doc/src/Howto_spc.rst
@@ -1,5 +1,5 @@
-SPC water model
-===============
+SPC and SPC/E water model
+=========================
 
 The SPC water model specifies a 3-site rigid water molecule with
 charges and Lennard-Jones parameters assigned to each of the three atoms.
diff --git a/doc/src/Howto_thermostat.rst b/doc/src/Howto_thermostat.rst
index fe53fff540..bda3bc6cb4 100644
--- a/doc/src/Howto_thermostat.rst
+++ b/doc/src/Howto_thermostat.rst
@@ -21,9 +21,14 @@ can be invoked via the *dpd/tstat* pair style:
 * :doc:`fix nvt/sllod <fix_nvt_sllod>`
 * :doc:`fix temp/berendsen <fix_temp_berendsen>`
 * :doc:`fix temp/csvr <fix_temp_csvr>`
+* :doc:`fix ffl <fix_ffl>`
+* :doc:`fix gjf <fix_gjf>`
+* :doc:`fix gld <fix_gld>`
+* :doc:`fix gle <fix_gle>`
 * :doc:`fix langevin <fix_langevin>`
 * :doc:`fix temp/rescale <fix_temp_rescale>`
 * :doc:`pair_style dpd/tstat <pair_dpd>`
+* :doc:`pair_style dpd/ext/tstat <pair_dpd_ext>`
 
 :doc:`Fix nvt <fix_nh>` only thermostats the translational velocity of
 particles.  :doc:`Fix nvt/sllod <fix_nvt_sllod>` also does this,
@@ -82,10 +87,10 @@ that:
 
 .. note::
 
-   Only the nvt fixes perform time integration, meaning they update
+   Not all thermostat fixes perform time integration, meaning they update
    the velocities and positions of particles due to forces and velocities
    respectively.  The other thermostat fixes only adjust velocities; they
-   do NOT perform time integration updates.  Thus they should be used in
+   do NOT perform time integration updates.  Thus, they should be used in
    conjunction with a constant NVE integration fix such as these:
 
 * :doc:`fix nve <fix_nve>`
diff --git a/doc/src/Howto_tip4p.rst b/doc/src/Howto_tip4p.rst
index 47a1b9b578..76c470d615 100644
--- a/doc/src/Howto_tip4p.rst
+++ b/doc/src/Howto_tip4p.rst
@@ -1,5 +1,5 @@
-TIP4P water model
-=================
+TIP4P and OPC water models
+==========================
 
 The four-point TIP4P rigid water model extends the traditional
 :doc:`three-point TIP3P <Howto_tip3p>` model by adding an additional
@@ -9,9 +9,11 @@ the oxygen along the bisector of the HOH bond angle.  A bond style of
 :doc:`harmonic <bond_harmonic>` and an angle style of :doc:`harmonic
 <angle_harmonic>` or :doc:`charmm <angle_charmm>` should also be used.
 In case of rigid bonds also bond style :doc:`zero <bond_zero>` and angle
-style :doc:`zero <angle_zero>` can be used.
+style :doc:`zero <angle_zero>` can be used.  Very similar to the TIP4P
+model is the OPC water model.  It can be realized the same way as TIP4P
+but has different geometry and force field parameters.
 
-There are two ways to implement TIP4P water in LAMMPS:
+There are two ways to implement TIP4P-like water in LAMMPS:
 
 #. Use a specially written pair style that uses the :ref:`TIP3P geometry
    <tip3p_molecule>` without the point M. The point M location is then
@@ -21,7 +23,10 @@ There are two ways to implement TIP4P water in LAMMPS:
    computationally very efficient, but the charge distribution in space
    is only correct within the tip4p labeled styles.  So all other
    computations using charges will "see" the negative charge incorrectly
-   on the oxygen atom.
+   located on the oxygen atom unless they are specially written for using
+   the TIP4P geometry internally as well, e.g. :doc:`compute dipole/tip4p
+   <compute_dipole>`, :doc:`fix efield/tip4p <fix_efield>`, or
+   :doc:`kspace_style pppm/tip4p <kspace_style>`.
 
    This can be done with the following pair styles for Coulomb with a cutoff:
 
@@ -68,77 +73,90 @@ TIP4P/2005 model :ref:`(Abascal2) <Abascal2>` and a version of TIP4P
 parameters adjusted for use with a long-range Coulombic solver
 (e.g. Ewald or PPPM in LAMMPS).  Note that for implicit TIP4P models the
 OM distance is specified in the :doc:`pair_style <pair_style>` command,
-not as part of the pair coefficients.
+not as part of the pair coefficients. Also parameters for the OPC
+model (:ref:`Izadi <Izadi>`) are provided.
 
 .. list-table::
       :header-rows: 1
-      :widths: 36 19 13 15 17
+      :widths: 40 12 12 14 11 11
 
       * - Parameter
         - TIP4P (original)
         - TIP4P/Ice
         - TIP4P/2005
         - TIP4P (Ewald)
+        - OPC
       * - O mass (amu)
         - 15.9994
         - 15.9994
         - 15.9994
         - 15.9994
+        - 15.9994
       * - H mass (amu)
         - 1.008
         - 1.008
         - 1.008
         - 1.008
+        - 1.008
       * - O or M charge (:math:`e`)
         - -1.040
         - -1.1794
         - -1.1128
         - -1.04844
+        - -1.3582
       * - H charge (:math:`e`)
         - 0.520
         - 0.5897
         - 0.5564
         - 0.52422
+        - 0.6791
       * - LJ :math:`\epsilon` of OO (kcal/mole)
         - 0.1550
         - 0.21084
         - 0.1852
         - 0.16275
+        - 0.21280
       * - LJ :math:`\sigma` of OO (:math:`\AA`)
         - 3.1536
         - 3.1668
         - 3.1589
         - 3.16435
+        - 3.1660
       * - LJ :math:`\epsilon` of HH, MM, OH, OM, HM (kcal/mole)
         - 0.0
         - 0.0
         - 0.0
         - 0.0
+        - 0.0
       * - LJ :math:`\sigma` of HH, MM, OH, OM, HM (:math:`\AA`)
         - 1.0
         - 1.0
         - 1.0
         - 1.0
+        - 1.0
       * - :math:`r_0` of OH bond (:math:`\AA`)
         - 0.9572
         - 0.9572
         - 0.9572
         - 0.9572
+        - 0.8724
       * - :math:`\theta_0` of HOH angle
         - 104.52\ :math:`^{\circ}`
         - 104.52\ :math:`^{\circ}`
         - 104.52\ :math:`^{\circ}`
         - 104.52\ :math:`^{\circ}`
+        - 103.60\ :math:`^{\circ}`
       * - OM distance (:math:`\AA`)
         - 0.15
         - 0.1577
         - 0.1546
         - 0.1250
+        - 0.1594
 
-Note that the when using the TIP4P pair style, the neighbor list cutoff
+Note that the when using a TIP4P pair style, the neighbor list cutoff
 for Coulomb interactions is effectively extended by a distance 2 \* (OM
 distance), to account for the offset distance of the fictitious charges
-on O atoms in water molecules.  Thus it is typically best in an
+on O atoms in water molecules.  Thus, it is typically best in an
 efficiency sense to use a LJ cutoff >= Coulomb cutoff + 2\*(OM
 distance), to shrink the size of the neighbor list.  This leads to
 slightly larger cost for the long-range calculation, so you can test the
@@ -192,6 +210,94 @@ file changed):
     run 20000
     write_data tip4p-implicit.data nocoeff
 
+When constructing an OPC model, we cannot use the ``tip3p.mol`` file due
+to the different geometry.  Below is a molecule file providing the 3
+sites of an implicit OPC geometry for use with TIP4P styles.  Note, that
+the "Shake" and "Special" sections are missing here.  Those will be
+auto-generated by LAMMPS when the molecule file is loaded *after* the
+simulation box has been created.  These sections are required only when
+the molecule file is loaded *before*.
+
+.. _opc3p_molecule:
+.. code-block::
+
+   # Water molecule. 3 point geometry for OPC model
+
+   3 atoms
+   2 bonds
+   1 angles
+
+   Coords
+
+   1    0.00000  -0.06037   0.00000
+   2    0.68558   0.50250   0.00000
+   3   -0.68558   0.50250   0.00000
+
+   Types
+
+   1        1   # O
+   2        2   # H
+   3        2   # H
+
+   Charges
+
+   1       -1.3582
+   2        0.6791
+   3        0.6791
+
+   Bonds
+
+   1   1      1      2
+   2   1      1      3
+
+   Angles
+
+   1   1      2      1      3
+
+Below is a LAMMPS input file using the implicit method to implement
+the OPC model using the molecule file from above and including the
+PPPM long-range Coulomb solver.
+
+.. code-block:: LAMMPS
+
+    units real
+    atom_style full
+    region box block -5 5 -5 5 -5 5
+    create_box 2 box bond/types 1 angle/types 1 &
+                extra/bond/per/atom 2 extra/angle/per/atom 1 extra/special/per/atom 2
+
+    mass 1 15.9994
+    mass 2 1.008
+
+    pair_style lj/cut/tip4p/long 1 2 1 1 0.1594 12.0
+    pair_coeff 1 1 0.2128 3.166
+    pair_coeff 2 2 0.0    1.0
+
+    bond_style zero
+    bond_coeff 1 0.8724
+
+    angle_style zero
+    angle_coeff 1 103.6
+
+    kspace_style pppm/tip4p 1.0e-5
+
+    molecule water opc3p.mol  # this file has the OPC geometry but is without M
+    create_atoms 0 random 33 34564 NULL mol water 25367 overlap 1.33
+
+    fix rigid all shake 0.001 10 10000 b 1 a 1
+    minimize 0.0 0.0 1000 10000
+
+    reset_timestep 0
+    timestep 1.0
+    velocity all create 300.0 5463576
+    fix integrate all nvt temp 300 300 100.0
+
+    thermo_style custom step temp press etotal pe
+
+    thermo 1000
+    run 20000
+    write_data opc-implicit.data nocoeff
+
 Below is the code for a LAMMPS input file using the explicit method and
 a TIP4P molecule file.  Because of using :doc:`fix rigid/small
 <fix_rigid>` no bonds need to be defined and thus no extra storage needs
@@ -279,3 +385,8 @@ Phys, 79, 926 (1983).
 
 **(Abascal2)** Abascal, J Chem Phys, 123, 234505 (2005)
    https://doi.org/10.1063/1.2121687
+
+.. _Izadi:
+
+**(Izadi)** Izadi, Anandakrishnan, Onufriev, J. Phys. Chem. Lett., 5, 21, 3863 (2014)
+   https://doi.org/10.1021/jz501780a
diff --git a/doc/src/Intro_authors.rst b/doc/src/Intro_authors.rst
index 38f1102595..730cd2e336 100644
--- a/doc/src/Intro_authors.rst
+++ b/doc/src/Intro_authors.rst
@@ -84,8 +84,9 @@ lammps.org".  General questions about LAMMPS should be posted in the
 
    \normalsize
 
-Past developers include Paul Crozier and Mark Stevens, both at SNL,
-and Ray Shan, now at Materials Design.
+Past core developers include Paul Crozier and Mark Stevens, both at SNL,
+and Ray Shan while at SNL and later at Materials Design, now at Thermo
+Fisher Scientific.
 
 ----------
 
diff --git a/doc/src/Intro_portability.rst b/doc/src/Intro_portability.rst
index 63ae147b8c..564cdc47f4 100644
--- a/doc/src/Intro_portability.rst
+++ b/doc/src/Intro_portability.rst
@@ -28,8 +28,9 @@ Build systems
 LAMMPS can be compiled from source code using a (traditional) build
 system based on shell scripts, a few shell utilities (grep, sed, cat,
 tr) and the GNU make program. This requires running within a Bourne
-shell (``/bin/sh``).  Alternatively, a build system with different back
-ends can be created using CMake.  CMake must be at least version 3.16.
+shell (``/bin/sh`` or ``/bin/bash``).  Alternatively, a build system
+with different back ends can be created using CMake.  CMake must be
+at least version 3.16.
 
 Operating systems
 ^^^^^^^^^^^^^^^^^
@@ -40,11 +41,18 @@ Also, compilation and correct execution on macOS and Windows (using
 Microsoft Visual C++) is checked automatically for the largest part of
 the source code.  Some (optional) features are not compatible with all
 operating systems, either through limitations of the corresponding
-LAMMPS source code or through incompatibilities of source code or
-build system of required external libraries or packages.
+LAMMPS source code or through incompatibilities or build system
+limitations of required external libraries or packages.
 
-Executables for Windows may be created natively using either Cygwin or
-Visual Studio or with a Linux to Windows MinGW cross-compiler.
+Executables for Windows may be created either natively using Cygwin,
+MinGW, Intel, Clang, or Microsoft Visual C++ compilers, or with a Linux
+to Windows MinGW cross-compiler.  Native compilation is supported using
+Microsoft Visual Studio or a terminal window (using the CMake build
+system).
+
+Executables for macOS may be created either using Xcode or GNU compilers
+installed with Homebrew.  In the latter case, building of LAMMPS through
+Homebrew instead of a manual compile is also possible.
 
 Additionally, FreeBSD and Solaris have been tested successfully to
 run LAMMPS and produce results consistent with those on Linux.
@@ -61,8 +69,9 @@ CPU architectures
 ^^^^^^^^^^^^^^^^^
 
 The primary CPU architecture for running LAMMPS is 64-bit x86, but also
-32-bit x86, and 64-bit ARM and PowerPC (64-bit, Little Endian) are
-regularly tested.
+64-bit ARM and PowerPC (64-bit, Little Endian) are currently regularly
+tested.  Further architectures are tested by Linux distributions that
+bundle LAMMPS.
 
 Portability compliance
 ^^^^^^^^^^^^^^^^^^^^^^
diff --git a/doc/src/JPG/lammps-gui-screen.png b/doc/src/JPG/lammps-gui-screen.png
new file mode 100644
index 0000000000..cae0ae9fe9
Binary files /dev/null and b/doc/src/JPG/lammps-gui-screen.png differ
diff --git a/doc/src/PDF/colvars-refman-lammps.pdf b/doc/src/PDF/colvars-refman-lammps.pdf
index b8f049ce01..76d94f8e3a 100644
Binary files a/doc/src/PDF/colvars-refman-lammps.pdf and b/doc/src/PDF/colvars-refman-lammps.pdf differ
diff --git a/doc/src/Tools.rst b/doc/src/Tools.rst
index 488de848bf..521cfe5f16 100644
--- a/doc/src/Tools.rst
+++ b/doc/src/Tools.rst
@@ -475,9 +475,13 @@ beginners to start with LAMMPS, it is also the expectation that
 LAMMPS-GUI users will eventually transition to workflows that most
 experienced LAMMPS users employ.
 
-All features have been extensively exposed to keyboard shortcuts, so
-that there is also appeal for experienced LAMMPS users for prototyping
-and testing simulation setups.
+.. image:: JPG/lammps-gui-screen.png
+   :align: center
+   :scale: 50%
+
+Features have been extensively exposed to keyboard shortcuts, so that
+there is also appeal for experienced LAMMPS users for prototyping and
+testing simulation setups.
 
 Features
 ^^^^^^^^
@@ -502,7 +506,7 @@ Here are a few highlights of LAMMPS-GUI
 - Visualization of current state in Image Viewer (via calling :doc:`write_dump image <dump_image>`)
 - Capture of images created via :doc:`dump image <dump_image>` in Slide show window
 - Dialog to set variables, similar to the LAMMPS command-line flag '-v' / '-var'
-- Support for GPU, INTEL, KOKKOS/OpenMP, OPENMAP, and OPT and accelerator packages
+- Support for GPU, INTEL, KOKKOS/OpenMP, OPENMP, and OPT accelerator packages
 
 Parallelization
 ^^^^^^^^^^^^^^^
@@ -523,8 +527,8 @@ with CMake is required.
 The LAMMPS-GUI has been successfully compiled and tested on:
 
 - Ubuntu Linux 20.04LTS x86_64 using GCC 9, Qt version 5.12
-- Fedora Linux 40 x86\_64 using GCC 14 and Clang 17, Qt version 5.15LTS
-- Fedora Linux 40 x86\_64 using GCC 14, Qt version 6.7
+- Fedora Linux 41 x86\_64 using GCC 14 and Clang 17, Qt version 5.15LTS
+- Fedora Linux 41 x86\_64 using GCC 14, Qt version 6.8
 - Apple macOS 12 (Monterey) and macOS 13 (Ventura) with Xcode on arm64 and x86\_64, Qt version 5.15LTS
 - Windows 10 and 11 x86_64 with Visual Studio 2022 and Visual C++ 14.36, Qt version 5.15LTS
 - Windows 10 and 11 x86_64 with Visual Studio 2022 and Visual C++ 14.40, Qt version 6.7
diff --git a/doc/src/compute_bond_local.rst b/doc/src/compute_bond_local.rst
index e070d507b1..7ce6f9b15a 100644
--- a/doc/src/compute_bond_local.rst
+++ b/doc/src/compute_bond_local.rst
@@ -64,20 +64,32 @@ All these properties are computed for the pair of atoms in a bond,
 whether the two atoms represent a simple diatomic molecule, or are part
 of some larger molecule.
 
-The value *dist* is the current length of the bond.
-The values *dx*, *dy*, and *dz* are the xyz components of the
-*distance* between the pair of atoms. This value is always the
-distance from the atom of lower to the one with the higher id.
+.. versionchanged:: TBD
+
+   The sign of *dx*, *dy*, *dz* is no longer determined by the atom IDs
+   of the bonded atoms but by their order in the bond list to be
+   consistent with *fx*, *fy*, and *fz*.
+
+The value *dist* is the current length of the bond.  The values *dx*,
+*dy*, and *dz* are the :math:`(x,y,z)` components of the distance vector
+:math:`\vec{x_i} - \vec{x_j}` between the atoms in the bond.  The order
+of the atoms is determined by the bond list and the respective atom-IDs
+can be output with :doc:`compute property/local
+<compute_property_local>`.
 
 The value *engpot* is the potential energy for the bond,
 based on the current separation of the pair of atoms in the bond.
 
-The value *force* is the magnitude of the force acting between the
-pair of atoms in the bond.
+The value *force* is the magnitude of the force acting between the pair
+of atoms in the bond, which is positive for a repulsive force and
+negative for an attractive force.
 
-The values *fx*, *fy*, and *fz* are the xyz components of
-*force* between the pair of atoms in the bond. For bond styles that apply
-non-central forces, such as :doc:`bond_style bpm/rotational
+The values *fx*, *fy*, and *fz* are the :math:`(x,y,z)` components of
+the force on the first atom *i* in the bond due to the second atom *j*.
+Mathematically, they are obtained by multiplying the value of *force*
+from above with a unit vector created from the *dx*, *dy*, and *dz*
+components of the distance vector also described above.  For bond styles
+that apply non-central forces, such as :doc:`bond_style bpm/rotational
 <bond_bpm_rotational>`, these values only include the :math:`(x,y,z)`
 components of the normal force component.
 
diff --git a/doc/src/compute_pair_local.rst b/doc/src/compute_pair_local.rst
index 31209f63f4..605bfc8e9e 100644
--- a/doc/src/compute_pair_local.rst
+++ b/doc/src/compute_pair_local.rst
@@ -56,19 +56,33 @@ force cutoff distance for that interaction, as defined by the
 :doc:`pair_style <pair_style>` and :doc:`pair_coeff <pair_coeff>`
 commands.
 
-The value *dist* is the distance between the pair of atoms.
-The values *dx*, *dy*, and *dz* are the :math:`(x,y,z)` components of the
-*distance* between the pair of atoms. This value is always the
-distance from the atom of higher to the one with the lower atom ID.
+.. versionchanged:: TBD
+
+   The sign of *dx*, *dy*, *dz* is no longer determined by the value of
+   their atom-IDs but by their order in the neighbor list to be
+   consistent with *fx*, *fy*, and *fz*.
+
+The value *dist* is the distance between the pair of atoms.  The values
+*dx*, *dy*, and *dz* are the :math:`(x,y,z)` components of the distance
+vector :math:`\vec{x_i} - \vec{x_j}` between the pair of atoms.  The
+order of the atoms is determined by the neighbor list and the respective
+atom-IDs can be output with :doc:`compute property/local
+<compute_property_local>`.
 
 The value *eng* is the interaction energy for the pair of atoms.
 
 The value *force* is the force acting between the pair of atoms, which
 is positive for a repulsive force and negative for an attractive
-force.  The values *fx*, *fy*, and *fz* are the :math:`(x,y,z)` components of
-*force* on atom I. For pair styles that apply non-central forces,
-such as :doc:`granular pair styles <pair_gran>`, these values only include
-the :math:`(x,y,z)` components of the normal force component.
+force.
+
+The values *fx*, *fy*, and *fz* are the :math:`(x,y,z)` components of
+the force vector on the first atom *i* of a pair in the neighbor list
+due to the second atom *j*.  Mathematically, they are obtained by
+multiplying the value of *force* from above with a unit vector created
+from the *dx*, *dy*, and *dz* components of the distance vector also
+described above.  For pair styles that apply non-central forces, such as
+:doc:`granular pair styles <pair_gran>`, these values only include the
+:math:`(x,y,z)` components of the normal force component.
 
 A pair style may define additional pairwise quantities which can be
 accessed as *p1* to *pN*, where :math:`N` is defined by the pair style.
diff --git a/doc/src/fix.rst b/doc/src/fix.rst
index 27cc9525e5..935cd842c9 100644
--- a/doc/src/fix.rst
+++ b/doc/src/fix.rst
@@ -257,6 +257,7 @@ accelerated styles exist.
 * :doc:`flow/gauss <fix_flow_gauss>` - Gaussian dynamics for constant mass flux
 * :doc:`freeze <fix_freeze>` - freeze atoms in a granular simulation
 * :doc:`gcmc <fix_gcmc>` - grand canonical insertions/deletions
+* :doc:`gjf <fix_gjf>` - statistically correct Langevin temperature control using the GJ methods
 * :doc:`gld <fix_gld>` - generalized Langevin dynamics integrator
 * :doc:`gle <fix_gle>` - generalized Langevin equation thermostat
 * :doc:`gravity <fix_gravity>` - add gravity to atoms in a granular simulation
@@ -291,6 +292,7 @@ accelerated styles exist.
 * :doc:`mvv/tdpd <fix_mvv_dpd>` - constant temperature DPD using the modified velocity-Verlet algorithm
 * :doc:`neb <fix_neb>` - nudged elastic band (NEB) spring forces
 * :doc:`neb/spin <fix_neb_spin>` - nudged elastic band (NEB) spring forces for spins
+* :doc:`neighbor/swap <fix_neighbor_swap>` - kinetic Monte Carlo (kMC) atom swapping
 * :doc:`nonaffine/displacement <fix_nonaffine_displacement>` - calculate nonaffine displacement of atoms
 * :doc:`nph <fix_nh>` - constant NPH time integration via Nose/Hoover
 * :doc:`nph/asphere <fix_nph_asphere>` - NPH for aspherical particles
diff --git a/doc/src/fix_gjf.rst b/doc/src/fix_gjf.rst
new file mode 100644
index 0000000000..82d283db34
--- /dev/null
+++ b/doc/src/fix_gjf.rst
@@ -0,0 +1,208 @@
+.. index:: fix gjf
+
+fix gjf command
+========================
+
+Syntax
+""""""
+
+.. code-block:: LAMMPS
+
+   fix ID group-ID gjf Tstart Tstop damp seed keyword values ...
+
+* ID, group-ID are documented in :doc:`fix <fix>` command
+* gjf = style name of this fix command
+* Tstart,Tstop = desired temperature at start/end of run (temperature units)
+* Tstart can be a variable (see below)
+* damp = damping parameter (time units)
+* seed = random number seed to use for white noise (positive integer)
+* zero or more keyword/value pairs may be appended
+* keyword = *vel* or *method*
+
+  .. parsed-literal::
+
+       *vel* value = *vfull* or *vhalf*
+         *vfull* = use on-site velocity
+         *vhalf* = use half-step velocity
+       *method* value = *1-8*
+         *1-8* = choose one of the many GJ formulations
+         *7*   = requires input of additional scalar between 0 and 1
+
+Examples
+""""""""
+
+.. code-block:: LAMMPS
+
+   fix 3 boundary gjf 10.0 10.0 1.0 699483
+   fix 1 all gjf 10.0 100.0 100.0 48279 vel vfull method 4
+   fix 2 all gjf 10.0 10.0 1.0 26488 method 7 0.95
+
+Description
+"""""""""""
+.. versionadded:: TBD
+
+Apply a Langevin thermostat as described in :ref:`(Gronbech-Jensen-2020) <Gronbech-Jensen-2020>`
+to a group of atoms which models an interaction with a background
+implicit solvent.  As described in the papers cited below, the GJ methods
+provide exact diffusion, drift, and Boltzmann sampling for linear systems for
+any time step within the stability limit. The purpose of this set of methods
+is therefore to significantly improve statistical accuracy at longer time steps
+compared to other thermostats.
+
+The current implementation provides the user with the option to output
+the velocity in one of two forms: *vfull* or *vhalf*. The option *vhalf*
+outputs the 2GJ half-step velocity given in :ref:`Gronbech Jensen/Gronbech-Jensen
+<Gronbech-Jensen-2019>`; for linear systems, this velocity is shown to not
+have any statistical errors for any stable time step. The option *vfull*
+outputs the on-site velocity given in :ref:`Gronbech-Jensen/Farago
+<Gronbech-Jensen-Farago>`; this velocity is shown to be systematically lower
+than the target temperature by a small amount, which grows
+quadratically with the timestep. An overview of statistically correct Boltzmann
+and Maxwell-Boltzmann sampling of true on-site and true half-step velocities is
+given in :ref:`Gronbech-Jensen-2020 <Gronbech-Jensen-2020>`.
+
+This fix allows the use of several GJ methods as listed in :ref:`Gronbech-Jensen-2020 <Gronbech-Jensen-2020>`.
+The GJ-VII method is described in :ref:`Finkelstein <Finkelstein>` and GJ-VIII
+is described in :ref:`Gronbech-Jensen-2024 <Gronbech-Jensen-2024>`.
+The implementation follows the splitting form provided in Eqs. (24) and (25)
+in :ref:`Gronbech-Jensen-2024 <Gronbech-Jensen-2024>`, including the application
+of Gaussian noise values, per the description in
+:ref:`Gronbech-Jensen-2023 <Gronbech-Jensen-2023>`.
+
+
+.. note::
+
+   Unlike the :doc:`fix langevin <fix_langevin>` command which performs force
+   modifications only, this fix performs thermostatting and time integration.
+   Thus you no longer need a separate time integration fix, like :doc:`fix nve <fix_nve>`.
+
+See the :doc:`Howto thermostat <Howto_thermostat>` page for
+a discussion of different ways to compute temperature and perform
+thermostatting.
+
+The desired temperature at each timestep is a ramped value during the
+run from *Tstart* to *Tstop*\ .
+
+*Tstart* can be specified as an equal-style or atom-style
+:doc:`variable <variable>`.  In this case, the *Tstop* setting is
+ignored.  If the value is a variable, it should be specified as
+v_name, where name is the variable name.  In this case, the variable
+will be evaluated each timestep, and its value used to determine the
+target temperature.
+
+Equal-style variables can specify formulas with various mathematical
+functions, and include :doc:`thermo_style <thermo_style>` command
+keywords for the simulation box parameters and timestep and elapsed
+time.  Thus it is easy to specify a time-dependent temperature.
+
+Atom-style variables can specify the same formulas as equal-style
+variables but can also include per-atom values, such as atom
+coordinates.  Thus it is easy to specify a spatially-dependent
+temperature with optional time-dependence as well.
+
+Like other fixes that perform thermostatting, this fix can be used
+with :doc:`compute commands <compute>` that remove a "bias" from the
+atom velocities.  E.g. to apply the thermostat only to atoms within a
+spatial :doc:`region <region>`, or to remove the center-of-mass
+velocity from a group of atoms, or to remove the x-component of
+velocity from the calculation.
+
+This is not done by default, but only if the :doc:`fix_modify
+<fix_modify>` command is used to assign a temperature compute to this
+fix that includes such a bias term.  See the doc pages for individual
+:doc:`compute temp commands <compute>` to determine which ones include
+a bias.
+
+The *damp* parameter is specified in time units and determines how
+rapidly the temperature is relaxed.  For example, a value of 100.0 means
+to relax the temperature in a timespan of (roughly) 100 time units
+(:math:`\tau` or fs or ps - see the :doc:`units <units>` command).  The
+damp factor can be thought of as inversely related to the viscosity of
+the solvent.  I.e. a small relaxation time implies a high-viscosity
+solvent and vice versa.  See the discussion about :math:`\gamma` and
+viscosity in the documentation for the :doc:`fix viscous <fix_viscous>`
+command for more details.
+
+The random # *seed* must be a positive integer.  A Marsaglia random
+number generator is used.  Each processor uses the input seed to
+generate its own unique seed and its own stream of random numbers.
+Thus the dynamics of the system will not be identical on two runs on
+different numbers of processors.
+
+----------
+
+The keyword/value option pairs are used in the following ways.
+
+The keyword *vel* determines which velocity is used to determine
+quantities of interest in the simulation.
+
+The keyword *method* selects one of the eight GJ-methods implemented in LAMMPS.
+
+----------
+
+Restart, fix_modify, output, run start/stop, minimize info
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+No information about this fix is written to :doc:`binary restart files <restart>`.
+Because the state of the random number generator is not saved in restart files,
+this means you cannot do "exact" restarts with this fix, where the simulation
+continues on the same as if no restart had taken place.  However, in a
+statistical sense, a restarted simulation should produce the same behavior.
+Additionally, the GJ methods implement noise exclusively within each time step
+(unlike the BBK thermostat of the fix-langevin). The restart is done with
+either vfull or vhalf velocity output for as long as the choice of vfull/vhalf
+is the same for the simulation as it is in the restart file.
+
+The :doc:`fix_modify <fix_modify>` *temp* option is supported by this
+fix.  You can use it to assign a temperature :doc:`compute <compute>`
+you have defined to this fix which will be used in its thermostatting
+procedure, as described above.  For consistency, the group used by
+this fix and by the compute should be the same.
+
+This fix can ramp its target temperature over multiple runs, using the
+*start* and *stop* keywords of the :doc:`run <run>` command.  See the
+:doc:`run <run>` command for details of how to do this.
+
+This fix is not invoked during :doc:`energy minimization <minimize>`.
+
+Restrictions
+""""""""""""
+
+This fix is not compatible with run_style respa. It is not compatible with
+accelerated packages such as KOKKOS.
+
+Related commands
+""""""""""""""""
+
+:doc:`fix langevin <fix_langevin>`, :doc:`fix nvt <fix_nh>`
+
+Default
+"""""""
+
+The option defaults are vel = vhalf, method = 1.
+
+----------
+
+.. _Gronbech-Jensen-2020:
+
+**(Gronbech-Jensen-2020)** Gronbech-Jensen, Mol Phys 118, e1662506 (2020).
+
+.. _Gronbech-Jensen-2019:
+
+**(Gronbech Jensen/Gronbech-Jensen)** Gronbech Jensen and Gronbech-Jensen, Mol Phys, 117, 2511 (2019)
+
+.. _Gronbech-Jensen-Farago:
+
+**(Gronbech-Jensen/Farago)** Gronbech-Jensen and Farago, Mol Phys, 111, 983 (2013).
+
+.. _Finkelstein:
+
+**(Finkelstein)** Finkelstein, Cheng, Florin, Seibold, Gronbech-Jensen, J. Chem. Phys., 155, 18 (2021)
+
+.. _Gronbech-Jensen-2024:
+
+**(Gronbech-Jensen-2024)** Gronbech-Jensen, J. Stat. Phys. 191, 137 (2024).
+
+.. _Gronbech-Jensen-2023:
+
+**(Gronbech-Jensen-2023)** Gronbech-Jensen, J. Stat. Phys. 190, 96 (2023).
diff --git a/doc/src/fix_langevin.rst b/doc/src/fix_langevin.rst
index 30e4c48270..b8be16e16d 100644
--- a/doc/src/fix_langevin.rst
+++ b/doc/src/fix_langevin.rst
@@ -56,7 +56,7 @@ Examples
 Description
 """""""""""
 
-Apply a Langevin thermostat as described in :ref:`(Schneider) <Schneider1>`
+Apply a Langevin thermostat as described in :ref:`(Bruenger) <Bruenger1>`
 to a group of atoms which models an interaction with a background
 implicit solvent.  Used with :doc:`fix nve <fix_nve>`, this command
 performs Brownian dynamics (BD), since the total force on each atom
@@ -241,6 +241,13 @@ to zero by subtracting off an equal part of it from each atom in the
 group.  As a result, the center-of-mass of a system with zero initial
 momentum will not drift over time.
 
+.. deprecated:: TDB
+
+The *gjf* keyword in fix langevin is deprecated and will be removed
+soon.  The GJF functionality has been moved to its own fix style
+:doc:`fix gjf <fix_gjf>` and it is strongly recommended to use that
+fix instead.
+
 The keyword *gjf* can be used to run the :ref:`Gronbech-Jensen/Farago
 <Gronbech-Jensen>` time-discretization of the Langevin model.  As
 described in the papers cited below, the purpose of this method is to
@@ -324,14 +331,16 @@ types, tally = no, zero = no, gjf = no.
 
 ----------
 
+.. _Bruenger1:
+
+**(Bruenger)** Bruenger, Brooks, and Karplus, Chem. Phys. Lett. 105, 495 (1982).
+[Previously attributed to Schneider and Stoll, Phys. Rev. B 17, 1302 (1978).
+Implementation remains unchanged.]
+
 .. _Dunweg1:
 
 **(Dunweg)** Dunweg and Paul, Int J of Modern Physics C, 2, 817-27 (1991).
 
-.. _Schneider1:
-
-**(Schneider)** Schneider and Stoll, Phys Rev B, 17, 1302 (1978).
-
 .. _Gronbech-Jensen:
 
 **(Gronbech-Jensen)** Gronbech-Jensen and Farago, Mol Phys, 111, 983
diff --git a/doc/src/fix_neighbor_swap.rst b/doc/src/fix_neighbor_swap.rst
new file mode 100644
index 0000000000..dffbc93217
--- /dev/null
+++ b/doc/src/fix_neighbor_swap.rst
@@ -0,0 +1,264 @@
+.. index:: fix neighbor/swap
+
+fix neighbor/swap command
+=========================
+
+Syntax
+""""""
+
+.. code-block:: LAMMPS
+
+   fix ID group-ID neighbor/swap N X seed T R0 voro-ID keyword values ...
+
+* ID, group-ID are documented in :doc:`fix <fix>` command
+* neighbor/swap = style name of this fix command
+* N = invoke this fix every N steps
+* X = number of swaps to attempt every N steps
+* seed = random # seed (positive integer)
+* T = scaling temperature of the MC swaps (temperature units)
+* R0 = scaling swap probability of the MC swaps (distance units)
+* voro-ID = valid voronoi compute id (compute voronoi/atom)
+* one or more keyword/value pairs may be appended to args
+* keywords *types* and *diff* are mutually exclusive, but one must be specified
+* keyword = *types* or *diff* or *ke* or *region* or *rates*
+
+  .. parsed-literal::
+
+       *types* values = two or more atom types (Integers in range [1,Ntypes] or type labels)
+       *diff* values = one atom type
+       *ke* value = *yes* or *no*
+         *yes* = kinetic energy is conserved after atom swaps
+         *no* = no conservation of kinetic energy after atom swaps
+       *region* value = region-ID
+         region-ID = ID of region to use as an exchange/move volume
+       *rates* values = V1 V2 . . . Vntypes values to conduct variable diffusion for different atom types (unitless)
+
+Examples
+""""""""
+
+.. code-block:: LAMMPS
+
+   compute voroN all voronoi/atom neighbors yes
+   fix mc all neighbor/swap 10 160 15238 1000.0 3.0 voroN diff 2
+   fix myFix all neighbor/swap 100 1 12345 298.0 3.0 voroN region my_swap_region types 5 6
+   fix kmc all neighbor/swap 1 100 345 1.0 3.0 voroN diff 3 rates 3 1 6
+
+Description
+"""""""""""
+
+.. versionadded:: TBD
+
+This fix performs Monte-Carlo (MC) evaluations to enable kinetic
+Monte Carlo (kMC)-type behavior during MD simulation by allowing
+neighboring atoms to swap their positions. In contrast to the :doc:`fix
+atom/swap <fix_atom_swap>` command which swaps pairs of atoms anywhere
+in the simulation domain, the restriction of the MC swapping to
+neighbors enables a hybrid MD/kMC-like simulation.
+
+Neighboring atoms are defined by using a Voronoi tesselation performed
+by the :doc:`compute voronoi/atom <compute_voronoi_atom>` command.
+Two atoms are neighbors if their Voronoi cells share a common face
+(3d) or edge (2d).
+
+The selection of a swap neighbor is made using a distance-based
+criterion for weighting the selection probability of each swap, in the
+same manner as kMC selects a next event using relative probabilities.
+The acceptance or rejection of each swap is determined via the
+Metropolis criterion after evaluating the change in system energy due
+to the swap.
+
+A detailed explanation of the original implementation of this
+algorithm can be found in :ref:`(Tavenner 2023) <TavennerMDkMC>`
+where it was used to simulated accelerated diffusion in an MD context.
+
+Simulating inherently kinetically-limited behaviors which rely on rare
+events (such as atomic diffusion in a solid) is challenging for
+traditional MD since its relatively short timescale will not naturally
+sample many events. This fix addresses this challenge by allowing rare
+neighbor hopping events to be sampled in a kMC-like fashion at a much
+faster rate (set by the specified *N* and *X* parameters).  This enables
+the processes of atomic diffusion to be approximated during an MD
+simulation, effectively decoupling the MD atomic vibrational timescale
+and the atomic hopping (kMC event) timescale.
+
+The algorithm implemented by this fix is as follows:
+
+   - The MD simulation is paused every *N* steps
+   - A Voronoi tesselation is performed for the current atom configuration.
+   - Then *X* atom swaps are attempted, one after the other.
+   - For each swap, an atom *I* is selected randomly from the list of
+     atom types specified by either the *types* or *diff* keywords.
+   - One of *I*'s Voronoi neighbors *J* is selected using the
+     distance-weighted probability for each neighbor detailed below.
+   - The *I,J* atom IDs are communicated to all processors so that a
+     global energy evaluation can be performed for the post-swap state
+     of the system.
+   - The swap is accepted or rejected based on the Metropolis criterion
+     using the energy change of the system and the specified temperature
+     *T*.
+
+Here are a few comments on the computational cost of the swapping
+algorithm.
+
+   1. The cost of a global energy evaluation is similar to that of an MD
+      timestep.
+
+   2. Similar to other MC algorithms in LAMMPS, improved parallel
+      efficiency is achieved with a smaller number of atoms per
+      processor than would typically be used in an standard MD
+      simulation. This is because the per-energy evaluation cost
+      increases relative to the balance of MD/MC steps as indicated by
+      1., but the communication cost remains relatively constant for a
+      given number of MD steps.
+
+   3. The MC portion of the simulation will run dramatically slower if
+      the pair style uses different cutoffs for different atom types (or
+      type pairs).  This is because each atom swap then requires a
+      rebuild of the neighbor list to ensure the post-swap global energy
+      can be computed correctly.
+
+Limitations are imposed on selection of *I,J* atom pairs to avoid
+swapping of atoms which are outside of a reasonable cutoff (e.g. due to
+a Voronoi tesselation near free surfaces) though the use of a
+distance-weighted probability scaling.
+
+----------
+
+This section gives more details on other arguments and keywords.
+
+The random number generator (RNG) used by all the processors for MC
+operations is initialized with the specified *seed*.
+
+The distance-based probability is weighted by the specified *R0* which
+sets the radius :math:`r_0` in this formula
+
+.. math::
+
+    p_{ij} = e^{(\frac{r_{ij}}{r_0})^2}
+
+where :math:`p_{ij}` is the probability of selecting atom :math:`j` to
+swap with atom :math:`i`.  Typically, a value for *R0* around the
+average nearest-neighbor spacing is appropriate.  Since this is simply a
+probability weighting, the swapping behavior is not very sensitive to
+the exact value of *R0*.
+
+The required *voro-ID* value is the compute-ID of a
+:doc:`compute voronoi/atom <compute_voronoi_atom>` command like
+this:
+
+.. code-block:: LAMMPS
+
+    compute compute-ID group-ID voronoi/atom neighbors yes
+
+It must return per-atom list of valid neighbor IDs as in the
+:doc:`compute voronoi/atom <compute_voronoi_atom>` command.
+
+The keyword *types* takes two or more atom types as its values.  Only
+atoms *I* of the first atom type will be selected.  Only atoms *J* of the
+remaining atom types will be considered as potential swap partners.
+
+The keyword *diff* take a single atom type as its value.  Only atoms
+*I* of the that atom type will be selected.  Atoms *J* of all
+remaining atom types will be considered as potential swap partners.
+This includes the atom type specified with the *diff* keyword to
+account for self-diffusive hops between two atoms of the same type.
+
+Note that the *neighbors yes* option must be enabled for use with this
+fix. The group-ID should include all the atoms which this fix will
+potentially select. I.e. the group-ID used in the voronoi compute should
+include the same atoms as that indicated by the *types* keyword. If the
+*diff* keyword is used, the group-ID should include atoms of all types
+in the simulation.
+
+The keyword *ke* takes *yes* (default) or *no* as its value.  It two
+atoms are swapped with different masses, then a value of *yes* will
+rescale their respective velocities to conserve the kinetic energy of
+the system.  A value of *no* will perform no rescaling, so that
+kinetic energy is not conserved.  See the restriction on this keyword
+below.
+
+The *region* keyword takes a *region-ID* as its value.  If specified,
+then only atoms *I* and *J* within the geometric region will be
+considered as swap partners.  See the :doc:`region <region>` command
+for details.  This means the group-ID for the :doc:`compute
+voronoi/atom <compute_voronoi_atom>` command also need only contain
+atoms within the region.
+
+The keyword *rates* can modify the swap rate based on the type of atom
+*J*.  Ntype values must be specified, where Ntype = the number of atom
+types in the system.  Each value is used to scale the probability
+weighting given by the equation above.  In the third example command
+above, a simulation has 3 atoms types.  Atom *I*s of type 1 are
+eligible for swapping.  Swaps may occur with atom *J*s of all 3 types.
+Assuming all *J* atoms are equidistant from an atom *I*, *J* atoms of
+type 1 will be 3x more likely to be selected as a swap partner than
+atoms of type 2.  And *J* atoms of type 3 will be 6.5x more likely to
+be selected than atoms of type 2.  If the *rates* keyword is not used,
+all atom types will be treated with the same probability during selection
+of swap attempts.
+
+
+Restart, fix_modify, output, run start/stop, minimize info
+""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+This fix writes the state of the fix to :doc:`binary restart files
+<restart>`.  This includes information about the random number generator
+seed, the next timestep for MC exchanges, and the number of exchange
+attempts and successes.  See the :doc:`read_restart <read_restart>`
+command for info on how to re-specify a fix in an input script that
+reads a restart file, so that the operation of the fix continues in an
+uninterrupted fashion.
+
+None of the :doc:`fix_modify <fix_modify>` options are relevant to this
+fix.
+
+This fix computes a global vector of length 2, which can be accessed
+by various :doc:`output commands <Howto_output>`.  The vector values are
+the following global cumulative quantities:
+
+  #. swap attempts
+  #. swap accepts
+
+The vector values calculated by this fix are "intensive".
+
+No parameter of this fix can be used with the *start/stop* keywords of
+the :doc:`run <run>` command.  This fix is not invoked during
+:doc:`energy minimization <minimize>`.
+
+Restrictions
+""""""""""""
+
+This fix is part of the MC package.  It is only enabled if LAMMPS was
+built with that package.  See the :doc:`Build package <Build_package>`
+doc page for more info.  Also this fix requires that the :ref:`VORONOI
+package <PKG-VORONOI>` is installed, otherwise the fix will not be
+compiled.
+
+The :doc:`compute voronoi/atom <compute_voronoi_atom>` command
+referenced by the required voro-ID must return neighboring atoms as
+illustrated in the examples above.
+
+If this fix is used with systems that do not have per-type masses
+(e.g. atom style sphere), the *ke* keyword must be set to *off* since
+the implemented algorithm will not be able to re-scale velocities
+properly.
+
+Related commands
+""""""""""""""""
+
+:doc:`fix nvt <fix_nh>`, :doc:`compute voronoi/atom <compute_voronoi_atom>`
+:doc:`delete_atoms <delete_atoms>`, :doc:`fix gcmc <fix_gcmc>`,
+:doc:`fix atom/swap <fix_atom_swap>`, :doc:`fix mol/swap <fix_mol_swap>`,
+:doc:`fix sgcmc <fix_sgcmc>`
+
+Default
+"""""""
+
+The option defaults are *ke* = yes and *rates* = 1 for all atom types.
+
+----------
+
+.. _TavennerMDkMC:
+
+**(Tavenner 2023)** J Tavenner, M Mendelev, J Lawson, Computational
+ Materials Science, 218, 111929 (2023).
diff --git a/doc/src/fix_qeq_rel_reaxff.rst b/doc/src/fix_qeq_rel_reaxff.rst
index 012980e230..98023ab51a 100644
--- a/doc/src/fix_qeq_rel_reaxff.rst
+++ b/doc/src/fix_qeq_rel_reaxff.rst
@@ -37,18 +37,18 @@ Examples
 Description
 """""""""""
 
-.. versionadded:: 19Nov2024
+.. versionadded:: 2Apr2025
 
-This fix implements the QEqR method for charge equilibration, which
-differs from the QEq charge equilibration method :ref:`(Rappe and
-Goddard) <Rappe4>` only in how external electric fields are accounted
-for.  This fix therefore raises a warning when used without :doc:`fix
-efield <fix_efield>` since :doc:`fix qeq/reaxff <fix_qeq_reaxff>` should
-be used without an external electric field.  Charges are computed with
-the QEqR method by minimizing the electrostatic energy of the system in
-the same way as the QEq method but where the absolute electronegativity,
-:math:`\chi_i`, of each atom in the QEq method is replaced with an
-effective electronegativity given by
+This fix implements the QEqR method :ref:`(Lalli) <lalli2>` for charge
+equilibration, which differs from the QEq charge equilibration method
+:ref:`(Rappe and Goddard) <Rappe4>` only in how external electric fields
+are accounted for. This fix therefore raises a warning when used without
+:doc:`fix efield <fix_efield>` since :doc:`fix qeq/reaxff <fix_qeq_reaxff>`
+should be used when no external electric field is present.  Charges are
+computed with the QEqR method by minimizing the electrostatic energy of
+the system in the same way as the QEq method but where the absolute
+electronegativity, :math:`\chi_i`, of each atom in the QEq method is
+replaced with an effective electronegativity given by
 
 .. math::
    \chi_{\mathrm{r}i} = \chi_i + \frac{\sum_{j=1}^{N} \beta(\phi_i - \phi_j) S_{ij}}
@@ -61,8 +61,9 @@ external electric field and :math:`S_{ij}` is the overlap integral
 between atoms :math:`i` and :math:`j`.  This formulation is advantageous
 over the method used by :doc:`fix qeq/reaxff <fix_qeq_reaxff>` to
 account for an external electric field in that it permits periodic
-boundaries in the direction of an external electric field and in that it
-does not worsen long-range charge transfer seen with QEq.
+boundaries in the direction of an external electric field and in
+that it does not worsen long-range charge transfer seen with
+QEq. See :ref:`Lalli <lalli2>` for further details.
 
 This fix is typically used in conjunction with the ReaxFF force field
 model as implemented in the :doc:`pair_style reaxff <pair_reaxff>`
@@ -184,6 +185,10 @@ scale = 1.0 and maxiter = 200
 
 ----------
 
+.. _lalli2:
+
+**(Lalli)** Lalli and Giusti, Journal of Chemical Physics, 162, 174311 (2025).
+
 .. _Rappe4:
 
 **(Rappe)** Rappe and Goddard III, Journal of Physical Chemistry, 95,
diff --git a/doc/src/fix_qtpie_reaxff.rst b/doc/src/fix_qtpie_reaxff.rst
index 08ae24e164..643295dba4 100644
--- a/doc/src/fix_qtpie_reaxff.rst
+++ b/doc/src/fix_qtpie_reaxff.rst
@@ -59,8 +59,7 @@ and atom :math:`j`.
 The effect of an external electric field can be incorporated into the QTPIE
 method by modifying the absolute or effective electronegativities of each
 atom :ref:`(Chen) <qtpie-Chen>`. This fix models the effect of an external
-electric field by using the effective electronegativity given in
-:ref:`(Gergs) <Gergs>`:
+electric field by using the effective electronegativity :ref:`(Lalli) <lalli>`
 
 .. math::
    \tilde{\chi}_{\mathrm{r}i} = \frac{\sum_{j=1}^{N} (\chi_i - \chi_j + \beta(\phi_i - \phi_j)) S_{ij}}
@@ -68,7 +67,8 @@ electric field by using the effective electronegativity given in
 
 where :math:`\beta` is a scaling factor and :math:`\phi_i` and :math:`\phi_j`
 are the electric potentials at the positions of atoms :math:`i` and :math:`j`
-due to the external electric field.
+due to the external electric field. Additional details regarding the
+implementation and performance of this fix are provided in :ref:`Lalli <lalli>`.
 
 This fix is typically used in conjunction with the ReaxFF force
 field model as implemented in the :doc:`pair_style reaxff <pair_reaxff>`
@@ -206,10 +206,9 @@ scale = 1.0 and maxiter = 200
 **(Chen)** Chen, Jiahao. Theory and applications of fluctuating-charge models.
 University of Illinois at Urbana-Champaign, 2009.
 
-.. _Gergs:
+.. _lalli:
 
-**(Gergs)** Gergs, Dirkmann and Mussenbrock.
-Journal of Applied Physics 123.24 (2018).
+**(Lalli)** Lalli and Giusti, Journal of Chemical Physics, 162, 174311 (2025).
 
 .. _qeq-Aktulga2:
 
diff --git a/doc/src/pair_granular.rst b/doc/src/pair_granular.rst
index 4ae59a587f..4d8753a7a1 100644
--- a/doc/src/pair_granular.rst
+++ b/doc/src/pair_granular.rst
@@ -44,7 +44,7 @@ Examples
    pair_coeff * * hertz 1000.0 50.0 tangential mindlin 1000.0 1.0 0.4 heat area 0.1
 
    pair_style granular
-   pair_coeff * * mdr 5e6 0.4 1.9e5 2.0 0.5 0.5 tangential linear_history 940.0 0.0 0.7 rolling sds 2.7e5 0.0 0.6 damping none
+   pair_coeff * * mdr 5e6 0.4 1.9e5 2.0 0.5 0.5 tangential linear_history 940.0 1.0 0.7 rolling sds 2.7e5 0.0 0.6 damping mdr 1
 
 Description
 """""""""""
@@ -88,7 +88,8 @@ and their required arguments are:
 3. *hertz/material* : E, :math:`\eta_{n0}` (or :math:`e`), :math:`\nu`
 4. *dmt* : E, :math:`\eta_{n0}` (or :math:`e`), :math:`\nu`, :math:`\gamma`
 5. *jkr* : E, :math:`\eta_{n0}` (or :math:`e`), :math:`\nu`, :math:`\gamma`
-6. *mdr* : :math:`E`, :math:`\nu`, :math:`Y`, :math:`\Delta\gamma`, :math:`\psi_b`, :math:`e`
+6. *mdr* : :math:`E`, :math:`\nu`, :math:`Y`, :math:`\Delta\gamma`,
+   :math:`\psi_b`, :math:`\eta_{n0}`
 
 Here, :math:`k_n` is spring stiffness (with units that depend on model
 choice, see below); :math:`\eta_{n0}` is a damping prefactor (or, in its
@@ -177,6 +178,8 @@ two-part series :ref:`Zunker and Kamrin Part I <Zunker2024I>` and
 :ref:`Zunker and Kamrin Part II <Zunker2024II>`. Further development
 and demonstrations of its application to industrially relevant powder
 compaction processes are presented in :ref:`Zunker et al. <Zunker2025>`.
+If you use the *mdr* normal model the only supported damping option is
+the *mdr* damping class described below.
 
 The model requires the following inputs:
 
@@ -200,8 +203,8 @@ The model requires the following inputs:
    triggered. Lower values of :math:`\psi_b` delay the onset of the bulk elastic
    response.
 
-   6. *Coefficient of restitution* :math:`0 \le e \le 1` : The coefficient of
-   restitution is a tunable parameter that controls damping in the normal direction.
+   6. *Damping coefficent* :math:`\eta_{n0} \ge 0` : The damping coefficient
+   is a tunable parameter that controls damping in the normal direction.
 
 .. note::
 
@@ -213,18 +216,12 @@ The *mdr* model produces a nonlinear force-displacement response, therefore the
 critical timestep :math:`\Delta t` depends on the inputs and level of
 deformation. As a conservative starting point the timestep can be assumed to be
 dictated by the bulk elastic response such that
-:math:`\Delta t = 0.35\sqrt{m/k_\textrm{bulk}}`, where :math:`m` is the mass of
+:math:`\Delta t = 0.08\sqrt{m/k_\textrm{bulk}}`, where :math:`m` is the mass of
 the smallest particle and :math:`k_\textrm{bulk} = \kappa R_\textrm{min}` is an
 effective stiffness related to the bulk elastic response.
 Here, :math:`\kappa = E/(3(1-2\nu))` is the bulk modulus and
 :math:`R_\textrm{min}` is the radius of the smallest particle.
 
-.. note::
-
-   The *mdr* model requires some specific settings to function properly,
-   please read the following text carefully to ensure all requirements are
-   followed.
-
 The *atom_style* must be set to *sphere 1* to enable dynamic particle
 radii. The *mdr* model is designed to respect the incompressibility of
 plastic deformation and inherently tracks free surface displacements
@@ -253,13 +250,6 @@ algorithm see :ref:`Zunker et al. <Zunker2025>`.
 
    newton off
 
-The damping model must be set to *none*. The *mdr* model already has a built
-in damping model.
-
-.. code-block:: LAMMPS
-
-   pair_coeff * * mdr 5e6 0.4 1.9e5 2 0.5 0.5 damping none
-
 The definition of multiple *mdr* models in the *pair_style* is currently not
 supported. Similarly, the *mdr* model cannot be combined with a different normal
 model in the *pair_style*. Physically this means that only one homogeneous
@@ -270,7 +260,7 @@ The *mdr* model currently only supports *fix wall/gran/region*, not
 any *fix wall/gran/region* commands must also use the *mdr* model.
 Additionally, the following *mdr* inputs must match between the
 *pair_style* and *fix wall/gran/region* definitions: :math:`E`,
-:math:`\nu`, :math:`Y`, :math:`\psi_b`, and :math:`e`. The exception
+:math:`\nu`, :math:`Y`, :math:`\psi_b`, and :math:`\eta_{n0}`. The exception
 is :math:`\Delta\gamma`, which may vary, permitting different
 adhesive behaviors between particle-particle and particle-wall interactions.
 
@@ -336,6 +326,7 @@ for the damping model currently supported are:
 3. *viscoelastic*
 4. *tsuji*
 5. *coeff_restitution*
+6. *mdr* (class) : :math:`d_{type}`
 
 If the *damping* keyword is not specified, the *viscoelastic* model is
 used by default.
@@ -425,6 +416,37 @@ the damping coefficient, it accurately reproduces the specified coefficient of
 restitution for both monodisperse and polydisperse particle pairs.  This damping
 model is not compatible with cohesive normal models such as *JKR* or *DMT*.
 
+The *mdr* damping class contains multiple damping models that can be toggled between
+by specifying different integer values for the :math:`d_{type}` input parameter. This
+damping option is only compatible with the normal *mdr* contact model.
+
+Setting :math:`d_{type} = 1` is the suggested damping option. This specifies a damping
+model that takes into account the contact stiffness :math:`k_{mdr}` calculated
+by the normal *mdr* contact model to determine the damping coefficient:
+
+.. math::
+
+   \eta_n = \eta_{n0} (m_{eff}k_{mdr})^{1/2},
+
+where :math:`k_{mdr}` is proportional to contact radius :math:`a_{mdr}` tracked by the
+normal *mdr* contact model:
+
+.. math::
+
+   k_{mdr} = 2 E_{eff} a_{mdr}.
+
+In this case, :math:`\eta_{n0}` is simply a dimensionless coefficient that scales the
+the overall damping coefficient.
+
+The other supported option is :math:`d_{type} = 2`, which defines a simple damping model
+similar to the *velocity* option
+
+.. math::
+
+   \eta_n = \eta_{n0},
+
+but has additional checks to avoid non-physical damping after plastic deformation.
+
 The total normal force is computed as the sum of the elastic and
 damping components:
 
@@ -1068,8 +1090,8 @@ a bulk elastic response. Journal of the Mechanics and Physics of Solids,
 
 **(Zunker et al, 2025)** Zunker, W., Dunatunga, S., Thakur, S.,
 Tang, P., & Kamrin, K. (2025). Experimentally validated DEM for large
-deformation powder compaction: mechanically-derived contact model and
-screening of non-physical contacts.
+deformation powder compaction: Mechanically-derived contact model and
+screening of non-physical contacts. Powder Technology, 120972.
 
 .. _Luding2008:
 
diff --git a/doc/src/pair_lj_smooth.rst b/doc/src/pair_lj_smooth.rst
index 4799ca3277..d6b266d932 100644
--- a/doc/src/pair_lj_smooth.rst
+++ b/doc/src/pair_lj_smooth.rst
@@ -48,13 +48,19 @@ At the inner cutoff the force and its first derivative
 will match the non-smoothed LJ formula.  At the outer cutoff the force
 and its first derivative will be 0.0.  The inner cutoff cannot be 0.0.
 
+Explicit expressions for the coefficients C1, C2, C3, C4, as well as the
+energy discontinuity at the cutoff can be found here :ref:`(Leoni_1) <Leoni_1>`
+and here :ref:`(Leoni_2) <Leoni_2>`
+
 .. note::
 
    this force smoothing causes the energy to be discontinuous both
    in its values and first derivative.  This can lead to poor energy
-   conservation and may require the use of a thermostat.  Plot the energy
-   and force resulting from this formula via the
-   :doc:`pair_write <pair_write>` command to see the effect.
+   conservation and may require the use of a thermostat.  The energy
+   value discontinuity can be eliminated by shifting the potential
+   energy to be zero at the outer cutoff using the pair_modify shift
+   option. With or without shifting, you can plot the resulting energy
+   and force via the :doc:`pair_write <pair_write>` command to see the effect.
 
 The following coefficients must be defined for each pair of atoms
 types via the :doc:`pair_coeff <pair_coeff>` command as in the examples
@@ -122,3 +128,14 @@ Default
 """""""
 
 none
+
+----------
+
+.. _Leoni_1:
+
+**(Leoni_1)** F. Leoni et al., Phys Rev Lett, 134, 128201 (2025).
+
+.. _Leoni_2:
+
+**(Leoni_2)** F. Leoni et al., Phys Rev Lett, 134, Supplementary Material (2025).
+
diff --git a/doc/utils/sphinx-config/false_positives.txt b/doc/utils/sphinx-config/false_positives.txt
index cdad93d55e..773e51bf60 100644
--- a/doc/utils/sphinx-config/false_positives.txt
+++ b/doc/utils/sphinx-config/false_positives.txt
@@ -103,6 +103,7 @@ Amit
 amsmath
 amu
 Amzallag
+Anandakrishnan
 analytical
 Anders
 Andric
@@ -110,6 +111,7 @@ Andrienko
 Andzelm
 Ang
 anglegrad
+anglelist
 angleoffset
 angletangrad
 angmom
@@ -347,6 +349,7 @@ Bomont
 BondAngle
 BondBond
 bondchk
+bondlist
 bondmax
 bondscreened
 bondscreenedspin
@@ -395,6 +398,7 @@ Broglie
 brownian
 brownw
 Broyden
+Bruenger
 Bruskin
 Brusselle
 Bryantsev
@@ -821,6 +825,7 @@ diffusively
 diffusivities
 diffusivity
 dihedral
+dihedrallist
 dihedrals
 Dihedrals
 dihydride
@@ -1199,6 +1204,7 @@ filesystem
 filesystems
 Fily
 Fincham
+Finkelstein
 Fint
 fingerprintconstants
 fingerprintsperelement
@@ -1325,7 +1331,6 @@ Geocomputing
 georg
 Georg
 Geotechnica
-Gergs
 germain
 Germann
 Germano
@@ -1353,6 +1358,8 @@ Gillan
 Gingold
 Gissinger
 github
+Giusti
+GJ
 gjf
 gjwagne
 gl
@@ -1611,6 +1618,7 @@ imagename
 imd
 Impey
 impl
+improperlist
 impropers
 Impropers
 imulator
@@ -1727,6 +1735,7 @@ Iyz
 iz
 izcm
 ized
+Izadi
 Izrailev
 Izumi
 Izvekov
@@ -1856,6 +1865,7 @@ Kloss
 Kloza
 kmax
 Kmax
+kMC
 KMP
 kmu
 Knizhnik
@@ -1920,6 +1930,7 @@ Lachet
 Lackmann
 Ladd
 lagrangian
+Lalli
 lambdai
 LambdaLanczos
 Lambrecht
@@ -1982,6 +1993,7 @@ lennard
 Lennard
 Lenosky
 Lenz
+Leoni
 Lett
 Leuven
 Leven
@@ -2480,6 +2492,7 @@ namespaces
 nan
 NaN
 Nandor
+nanglelist
 nangles
 Nangletype
 nangletypes
@@ -2516,6 +2529,7 @@ nbodies
 nbody
 Nbody
 nbond
+nbondlist
 nbonds
 nbondtype
 Nbondtype
@@ -2537,6 +2551,7 @@ ncount
 nd
 ndactrung
 ndescriptors
+ndihedrallist
 ndihedrals
 Ndihedraltype
 ndihedraltypes
@@ -2594,6 +2609,7 @@ NiAlH
 Nicklas
 Niklasson
 Nikolskiy
+nimproperlist
 nimpropers
 Nimpropertype
 nimpropertypes
@@ -2798,6 +2814,7 @@ oneMKL
 oneway
 onlysalt
 ons
+Onufriev
 OO
 Oord
 opencl
@@ -3466,6 +3483,7 @@ sectoring
 sed
 Seddon
 segmental
+Seibold
 Seifert
 Seleson
 sellerio
@@ -4085,9 +4103,11 @@ versa
 Verstraelen
 ves
 vf
+vfull
 vflag
 vflow
 vfrac
+vhalf
 vhi
 vibrational
 Vij
@@ -4119,6 +4139,7 @@ volpress
 volumetric
 von
 Voro
+voro
 Vorobyov
 voronoi
 Voronoi
diff --git a/examples/PACKAGES/imd/in.bucky-plus-cnt b/examples/PACKAGES/imd/in.bucky-plus-cnt
index b3eeff3cc1..af511fe11f 100644
--- a/examples/PACKAGES/imd/in.bucky-plus-cnt
+++ b/examples/PACKAGES/imd/in.bucky-plus-cnt
@@ -46,8 +46,8 @@ fix     integrate   mobile nve
 fix     thermostat  mobile langevin 300.0 300.0 2000.0 234624
 
 # IMD setup.
-fix  comm       all imd 6789 unwrap on trate 10
-#fix  comm       all imd 6789 unwrap on trate 10 nowait on
+#fix  comm       all imd 6789 unwrap on trate 10
+fix  comm       all imd 6789 unwrap on trate 10 nowait on
 
 # temperature is based on mobile atoms only
 compute mobtemp mobile temp
diff --git a/examples/PACKAGES/imd/in.bucky-plus-cnt-gpu b/examples/PACKAGES/imd/in.bucky-plus-cnt-gpu
index 5762ec68c8..f3e4b32cdc 100644
--- a/examples/PACKAGES/imd/in.bucky-plus-cnt-gpu
+++ b/examples/PACKAGES/imd/in.bucky-plus-cnt-gpu
@@ -1,16 +1,20 @@
 # stick a buckyball into a nanotube
+
+# enable GPU package from within the input:
+package gpu 0 pair/only on
+suffix gpu
+
 units           real
 dimension       3
 boundary       f f f
 atom_style      molecular
-newton          off
 
 processors * * 1
 
 # read topology 
 read_data       data.bucky-plus-cnt
 
-pair_style  lj/cut/gpu  10.0
+pair_style  lj/cut  10.0
 bond_style  harmonic
 angle_style charmm
 dihedral_style charmm
@@ -29,9 +33,6 @@ neigh_modify    delay 0 every 1 check yes
 
 timestep        2.0
 
-# required for GPU acceleration
-fix   gpu  all      gpu  force 0 0 1.0
-
 # we only move some atoms.
 group mobile type 1
 
@@ -49,8 +50,8 @@ fix     integrate   mobile nve
 fix     thermostat  mobile langevin 300.0 300.0 2000.0 234624
 
 # IMD setup.
-fix  comm       all imd 6789 unwrap on trate 10
-#fix  comm       all imd 6789 unwrap on trate 10 nowait on
+#fix  comm       all imd 6789 unwrap on trate 10
+fix  comm       all imd 6789 unwrap on trate 10 nowait on
 
 # temperature is based on mobile atoms only
 compute mobtemp mobile temp
diff --git a/examples/PACKAGES/imd/in.deca-ala_imd-gpu b/examples/PACKAGES/imd/in.deca-ala_imd-gpu
index 72c3f4aae9..9470f7c213 100644
--- a/examples/PACKAGES/imd/in.deca-ala_imd-gpu
+++ b/examples/PACKAGES/imd/in.deca-ala_imd-gpu
@@ -1,8 +1,12 @@
-# 
+#
+
+# enable GPU package from within the input:
+package gpu 0 pair/only on
+suffix gpu
+
 units           real
 neighbor        2.5 bin
 neigh_modify    delay 1 every 1 
-newton          off
 
 atom_style      full
 bond_style      harmonic
@@ -10,20 +14,18 @@ angle_style     charmm
 dihedral_style  charmm
 improper_style  harmonic
 
-pair_style      lj/charmm/coul/long/gpu 8 10
+pair_style      lj/charmm/coul/long 8 10
 pair_modify     mix arithmetic
 special_bonds   charmm
 read_data       data.deca-ala-solv
 
-fix             0 all gpu force/neigh 0 0 1.0    
-
 group peptide   id <= 103
 fix             rigidh all shake 1e-6 100 1000 t 1 2 3 4 5 a 23
 
 thermo          100
 thermo_style    multi
 timestep        2.0
-kspace_style    pppm/gpu 1e-5
+kspace_style    pppm 1e-5
 
 fix             ensemble all npt temp 300.0 300.0 100.0 iso 1.0 1.0 1000.0 drag 0.2
 
diff --git a/examples/PACKAGES/imd/in.melt_imd-gpu b/examples/PACKAGES/imd/in.melt_imd-gpu
index 24904eb832..f1406befa6 100644
--- a/examples/PACKAGES/imd/in.melt_imd-gpu
+++ b/examples/PACKAGES/imd/in.melt_imd-gpu
@@ -1,30 +1,32 @@
-# 3d Lennard-Jones melt
+# 3d Lennard-Jones melt with GPU package acceleration
 
-units		lj
-atom_style	atomic
-newton      off
+# enable GPU package from within the input:
+package gpu 0
+suffix gpu
 
-lattice		fcc 0.8442
-region		box block 0 10 0 10 0 10
-create_box	1 box
-create_atoms	1 box
-mass		1 1.0
+units           lj
+atom_style      atomic
 
-velocity	all create 3.0 87287
+lattice         fcc 0.8442
+region          box block 0 10 0 10 0 10
+create_box      1 box
+create_atoms    1 box
+mass            1 1.0
 
-pair_style	lj/cut/gpu 2.5
-pair_coeff	1 1 1.0 1.0 2.5
+velocity        all create 3.0 87287
 
-neighbor	0.3 bin
-neigh_modify	every 5 delay 10 check yes
+pair_style      lj/cut 2.5
+pair_coeff      1 1 1.0 1.0 2.5
+
+neighbor        0.3 bin
+neigh_modify    every 5 delay 10 check yes
 
 thermo_style    custom step pe ke spcpu
 
-fix     0 all gpu force/neigh 0 0 1.0
-fix		1 all nve
+fix             1 all nve
 
 # IMD setup.
 fix  comm       all imd 5678 unwrap off fscale 20.0 trate 20 nowait on
 
-thermo		500
-run		5000000
+thermo          500
+run             5000000
diff --git a/examples/gjf/README b/examples/gjf/README
new file mode 100644
index 0000000000..f7c955080f
--- /dev/null
+++ b/examples/gjf/README
@@ -0,0 +1,47 @@
+LAMMPS GJ THERMOSTAT EXAMPLE
+
+Required LAMMPS packages: EXTRA-FIX, MOLECULE, EXTRA-PAIR
+
+This directory contains the ingredients to run an NVT simulation using the 
+GJ thermostats.
+
+Example:
+
+NP=4 #number of processors
+mpirun -np $NP lmp_mpi -in.gjf.vhalf
+
+Compared to other thermostats, the GJ thermostat allows for larger timesteps 
+with the correct Boltzmann statistics. A comparison using averaged properties
+from this example's input file is shown below. 'X' denotes a failed simulation.
+The theoretical value for KE is 1.1168 eV.
+
+POTENTIAL ENERGY (eV)
+| Δt                ||  0.01  |  0.05  |  0.10  |  0.11  |  0.12  |  0.13  |  0.14  |
+|===================||========|========|========|========|========|========|========|
+| gjf half          || -55.11 | -55.11 | -55.11 | -55.11 | -55.11 | -55.10 | -55.07 |
+| gjf full          || -55.11 | -55.11 | -55.11 | -55.11 | -55.11 | -55.10 | -55.07 |
+| langevin          || -55.11 | -55.07 | -54.87 | -54.79 | -54.65 |    X   |    X   |
+| nvt (Nose-Hoover) || -55.14 | -55.07 | -54.90 | -54.84 | -54.76 |    X   |    X   |
+|-------------------||--------|--------|--------|--------|--------|--------|--------|
+
+KINETIC ENERGY (eV)
+| Δt                ||  0.01  |  0.05  |  0.10  |  0.11  |  0.12  |  0.13  |  0.14  |
+|===================||========|========|========|========|========|========|========|
+| gjf half          ||  1.117 |  1.116 |  1.119 |  1.119 |  1.123 |  1.136 |  1.170 |
+| gjf full          ||  1.116 |  1.071 |  0.938 |  0.898 |  0.858 |  0.817 |  0.780 |
+| langevin          ||  1.110 |  1.113 |  1.121 |  1.129 |  1.157 |    X   |    X   |
+| nvt (Nose-Hoover) ||  1.083 |  1.109 |  1.112 |  1.113 |  1.114 |    X   |    X   |
+|-------------------||--------|--------|--------|--------|--------|--------|--------|
+
+
+Script Commands:
+--
+fix lang all gjf 10 10 1 26488
+--
+fix lang all gjf 10 10 1 26488 vel vfull
+--
+fix nve all nve
+fix lang all langevin 10 10 1 26488
+--
+fix noho all nvt temp 10 10 1
+--
\ No newline at end of file
diff --git a/examples/gjf/README.md b/examples/gjf/README.md
deleted file mode 100644
index e6886cb2dd..0000000000
--- a/examples/gjf/README.md
+++ /dev/null
@@ -1,13 +0,0 @@
-# LAMMPS GJF-2GJ THERMOSTAT EXAMPLE
-
-## GJF-2GJ THERMOSTAT
-
-This directory contains the ingredients to run an NVT simulation using the GJF-2GJ thermostat.
-
-Example:
-```
-NP=4 #number of processors
-mpirun -np $NP lmp_mpi -in.gjf.vhalf
-```
-
-## Required LAMMPS packages: MOLECULE package
diff --git a/examples/gjf/in.gjf.vfull b/examples/gjf/in.gjf.vfull
index 40512ac37a..fad6df4a9b 100644
--- a/examples/gjf/in.gjf.vfull
+++ b/examples/gjf/in.gjf.vfull
@@ -1,23 +1,25 @@
-# GJF-2GJ thermostat
+# GJ thermostat
 
 units   	metal
 atom_style      full
 
 boundary 	p p p
 read_data argon.lmp
-
 include ff-argon.lmp
 
 velocity all create 10 2357 mom yes dist gaussian
-
 neighbor 1 bin
 
 timestep 0.1
 
-fix lang all langevin 10 10 1 26488 gjf vfull
-fix nve all nve
+compute myKE all ke
+compute myPE all pe
+
+fix lang all gjf 10 10 1 26488 vel vfull method 1
 
-thermo 200
 run 5000
 
+fix energies all ave/time 1 20000 20000 c_myKE c_myPE #file ave.out
 
+thermo 2000
+run 35000
\ No newline at end of file
diff --git a/examples/gjf/in.gjf.vhalf b/examples/gjf/in.gjf.vhalf
index 63fb8bd467..2f5c482928 100644
--- a/examples/gjf/in.gjf.vhalf
+++ b/examples/gjf/in.gjf.vhalf
@@ -1,23 +1,25 @@
-# GJF-2GJ thermostat
+# GJ thermostat
 
 units   	metal
 atom_style      full
 
 boundary 	p p p
 read_data argon.lmp
-
 include ff-argon.lmp
 
 velocity all create 10 2357 mom yes dist gaussian
-
 neighbor 1 bin
 
 timestep 0.1
 
-fix lang all langevin 10 10 1 26488 gjf vhalf
-fix nve all nve
+compute myKE all ke
+compute myPE all pe
+
+fix lang all gjf 10 10 1 26488
 
-thermo 200
 run 5000
 
+fix energies all ave/time 1 20000 20000 c_myKE c_myPE #file ave.out
 
+thermo 2000
+run 35000
\ No newline at end of file
diff --git a/examples/gjf/log.15Oct19.gjf.vfull.g++.1 b/examples/gjf/log.15Oct19.gjf.vfull.g++.1
deleted file mode 100644
index e3e9cce124..0000000000
--- a/examples/gjf/log.15Oct19.gjf.vfull.g++.1
+++ /dev/null
@@ -1,125 +0,0 @@
-LAMMPS (19 Sep 2019)
-  using 1 OpenMP thread(s) per MPI task
-# GJF-2GJ thermostat
-
-units   	metal
-atom_style      full
-
-boundary 	p p p
-read_data argon.lmp
-  orthogonal box = (0 0 0) to (32.146 32.146 32.146)
-  1 by 1 by 1 MPI processor grid
-  reading atoms ...
-  864 atoms
-  0 = max # of 1-2 neighbors
-  0 = max # of 1-3 neighbors
-  0 = max # of 1-4 neighbors
-  1 = max # of special neighbors
-  special bonds CPU = 0.000150019 secs
-  read_data CPU = 0.001946 secs
-
-include ff-argon.lmp
-#############################
-#Atoms types - mass - charge#
-#############################
-#@ 1 atom types #!THIS LINE IS NECESSARY DON'T SPEND HOURS FINDING THAT OUT!#
-
-variable Ar equal 1
-
-#############
-#Atom Masses#
-#############
-
-mass ${Ar}      39.903
-mass 1      39.903
-
-###########################
-#Pair Potentials - Tersoff#
-###########################
-
-pair_style      lj/cubic
-pair_coeff      * * 0.0102701 3.42
-
-
-velocity all create 10 2357 mom yes dist gaussian
-
-neighbor 1 bin
-
-timestep 0.1
-
-fix lang all langevin 10 10 1 26488 gjf vfull
-fix nve all nve
-
-thermo 200
-run 5000
-Neighbor list info ...
-  update every 1 steps, delay 10 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 6.94072
-  ghost atom cutoff = 6.94072
-  binsize = 3.47036, bins = 10 10 10
-  1 neighbor lists, perpetual/occasional/extra = 1 0 0
-  (1) pair lj/cubic, perpetual
-      attributes: half, newton on
-      pair build: half/bin/newton
-      stencil: half/bin/3d/newton
-      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 6.875 | 6.875 | 6.875 Mbytes
-Step Temp E_pair E_mol TotEng Press 
-       0    11.080223   -56.207655            0    -54.97164    37.215524 
-     200    8.2588471   -55.073602            0   -54.152316    339.80416 
-     400    8.1427292   -55.072244            0    -54.16391    338.91883 
-     600    8.7595618   -55.066739            0   -54.089596    344.25426 
-     800     8.550633   -55.148315            0   -54.194479     318.9385 
-    1000    8.5394337   -55.125709            0   -54.173122    326.59471 
-    1200     8.565973   -55.114892            0   -54.159345     328.5193 
-    1400    8.2092914   -55.109233            0   -54.193475    329.56161 
-    1600     8.209495   -55.138161            0    -54.22238    321.39971 
-    1800    8.4039924    -55.13355            0   -54.196072    322.64214 
-    2000    8.4548937   -55.062994            0   -54.119838    343.29888 
-    2200    8.3775139    -55.13364            0   -54.199116    323.63744 
-    2400     8.537332   -55.163702            0    -54.21135    315.62864 
-    2600     8.672488   -55.112054            0   -54.144625     330.1106 
-    2800    8.3000218   -55.147275            0   -54.221396    318.73112 
-    3000    8.3552421   -55.135164            0   -54.203124    323.53075 
-    3200    8.4126798   -55.135753            0   -54.197306    321.48817 
-    3400    8.4986413   -55.135408            0   -54.187372    323.42951 
-    3600      8.38431   -55.103932            0    -54.16865    330.68929 
-    3800    8.8262454   -55.103648            0   -54.119067    332.97779 
-    4000    7.9658136   -55.120402            0   -54.231803     324.9595 
-    4200    8.2265544   -55.129011            0   -54.211327    323.87069 
-    4400    8.1253738   -55.153089            0   -54.246691      316.304 
-    4600    8.2010823   -55.124053            0    -54.20921    325.98402 
-    4800    8.5512149   -55.075877            0   -54.121976    338.30137 
-    5000    8.4737659   -55.158604            0   -54.213343    316.22418 
-Loop time of 2.73236 on 1 procs for 5000 steps with 864 atoms
-
-Performance: 15810.507 ns/day, 0.002 hours/ns, 1829.920 timesteps/s
-99.7% CPU use with 1 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 1.4262     | 1.4262     | 1.4262     |   0.0 | 52.20
-Bond    | 0.00042836 | 0.00042836 | 0.00042836 |   0.0 |  0.02
-Neigh   | 0.12819    | 0.12819    | 0.12819    |   0.0 |  4.69
-Comm    | 0.058611   | 0.058611   | 0.058611   |   0.0 |  2.15
-Output  | 0.00047283 | 0.00047283 | 0.00047283 |   0.0 |  0.02
-Modify  | 1.0924     | 1.0924     | 1.0924     |   0.0 | 39.98
-Other   |            | 0.02605    |            |       |  0.95
-
-Nlocal:    864 ave 864 max 864 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-Nghost:    1593 ave 1593 max 1593 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-Neighs:    18143 ave 18143 max 18143 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-
-Total # of neighbors = 18143
-Ave neighs/atom = 20.9988
-Ave special neighs/atom = 0
-Neighbor list builds = 158
-Dangerous builds = 5
-
-
-Total wall time: 0:00:02
diff --git a/examples/gjf/log.15Oct19.gjf.vfull.g++.4 b/examples/gjf/log.15Oct19.gjf.vfull.g++.4
deleted file mode 100644
index 95caed5dc9..0000000000
--- a/examples/gjf/log.15Oct19.gjf.vfull.g++.4
+++ /dev/null
@@ -1,125 +0,0 @@
-LAMMPS (19 Sep 2019)
-  using 1 OpenMP thread(s) per MPI task
-# GJF-2GJ thermostat
-
-units   	metal
-atom_style      full
-
-boundary 	p p p
-read_data argon.lmp
-  orthogonal box = (0 0 0) to (32.146 32.146 32.146)
-  1 by 2 by 2 MPI processor grid
-  reading atoms ...
-  864 atoms
-  0 = max # of 1-2 neighbors
-  0 = max # of 1-3 neighbors
-  0 = max # of 1-4 neighbors
-  1 = max # of special neighbors
-  special bonds CPU = 0.000556268 secs
-  read_data CPU = 0.003817 secs
-
-include ff-argon.lmp
-#############################
-#Atoms types - mass - charge#
-#############################
-#@ 1 atom types #!THIS LINE IS NECESSARY DON'T SPEND HOURS FINDING THAT OUT!#
-
-variable Ar equal 1
-
-#############
-#Atom Masses#
-#############
-
-mass ${Ar}      39.903
-mass 1      39.903
-
-###########################
-#Pair Potentials - Tersoff#
-###########################
-
-pair_style      lj/cubic
-pair_coeff      * * 0.0102701 3.42
-
-
-velocity all create 10 2357 mom yes dist gaussian
-
-neighbor 1 bin
-
-timestep 0.1
-
-fix lang all langevin 10 10 1 26488 gjf vfull
-fix nve all nve
-
-thermo 200
-run 5000
-Neighbor list info ...
-  update every 1 steps, delay 10 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 6.94072
-  ghost atom cutoff = 6.94072
-  binsize = 3.47036, bins = 10 10 10
-  1 neighbor lists, perpetual/occasional/extra = 1 0 0
-  (1) pair lj/cubic, perpetual
-      attributes: half, newton on
-      pair build: half/bin/newton
-      stencil: half/bin/3d/newton
-      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 6.808 | 6.808 | 6.808 Mbytes
-Step Temp E_pair E_mol TotEng Press 
-       0    11.080228   -56.207655            0   -54.971639    37.215541 
-     200    8.4818184   -55.127334            0   -54.181174    324.96159 
-     400    8.5960916    -55.09236            0   -54.133453    334.83136 
-     600    8.1607556   -55.073136            0   -54.162791      339.035 
-     800    8.8350489   -55.133382            0   -54.147819    324.48149 
-    1000    8.5692704   -55.118463            0   -54.162548    327.26328 
-    1200    8.4174147   -55.126297            0   -54.187322     324.4248 
-    1400    8.6362603   -55.123075            0   -54.159688     326.7798 
-    1600     8.222512   -55.153799            0   -54.236565     317.8147 
-    1800     8.324523   -55.116698            0   -54.188085    327.35373 
-    2000    7.9615959   -55.155825            0   -54.267697    315.37215 
-    2200     8.495968   -55.083943            0   -54.136205    336.67775 
-    2400    7.7926986   -55.044816            0   -54.175529    344.87758 
-    2600    8.1551351   -55.069404            0   -54.159687    339.60901 
-    2800    8.2593599   -55.084151            0   -54.162807    336.54935 
-    3000    8.2860869   -55.110296            0   -54.185971    328.99074 
-    3200    8.4074534   -55.123576            0   -54.185712    326.06823 
-    3400    8.6694364   -55.128925            0   -54.161836    324.67512 
-    3600    8.5718984   -55.129861            0   -54.173653    325.20586 
-    3800     8.508102   -55.099093            0   -54.150001    333.91437 
-    4000    8.2966658   -55.117782            0   -54.192276    327.13516 
-    4200    8.7641728   -55.135792            0   -54.158136    324.00844 
-    4400    8.8827909   -55.096369            0    -54.10548    335.08467 
-    4600    8.7666577   -55.127213            0   -54.149279    326.15539 
-    4800    8.6670762   -55.163395            0    -54.19657    316.48383 
-    5000    8.1893094   -55.073756            0   -54.160226    337.95271 
-Loop time of 0.870594 on 4 procs for 5000 steps with 864 atoms
-
-Performance: 49621.267 ns/day, 0.000 hours/ns, 5743.202 timesteps/s
-96.5% CPU use with 4 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 0.33582    | 0.35125    | 0.3724     |   2.3 | 40.35
-Bond    | 0.00030267 | 0.00031316 | 0.00033538 |   0.0 |  0.04
-Neigh   | 0.034246   | 0.03479    | 0.035904   |   0.4 |  4.00
-Comm    | 0.15068    | 0.17419    | 0.19191    |   3.6 | 20.01
-Output  | 0.00044776 | 0.00054703 | 0.00083177 |   0.0 |  0.06
-Modify  | 0.27679    | 0.28079    | 0.28849    |   0.9 | 32.25
-Other   |            | 0.02871    |            |       |  3.30
-
-Nlocal:    216 ave 216 max 216 min
-Histogram: 4 0 0 0 0 0 0 0 0 0
-Nghost:    888.75 ave 899 max 876 min
-Histogram: 1 0 1 0 0 0 0 0 0 2
-Neighs:    4536 ave 4737 max 4335 min
-Histogram: 2 0 0 0 0 0 0 0 0 2
-
-Total # of neighbors = 18144
-Ave neighs/atom = 21
-Ave special neighs/atom = 0
-Neighbor list builds = 178
-Dangerous builds = 11
-
-
-Total wall time: 0:00:00
diff --git a/examples/gjf/log.15Oct19.gjf.vhalf.g++.1 b/examples/gjf/log.15Oct19.gjf.vhalf.g++.1
deleted file mode 100644
index a87b20a887..0000000000
--- a/examples/gjf/log.15Oct19.gjf.vhalf.g++.1
+++ /dev/null
@@ -1,125 +0,0 @@
-LAMMPS (19 Sep 2019)
-  using 1 OpenMP thread(s) per MPI task
-# GJF-2GJ thermostat
-
-units   	metal
-atom_style      full
-
-boundary 	p p p
-read_data argon.lmp
-  orthogonal box = (0 0 0) to (32.146 32.146 32.146)
-  1 by 1 by 1 MPI processor grid
-  reading atoms ...
-  864 atoms
-  0 = max # of 1-2 neighbors
-  0 = max # of 1-3 neighbors
-  0 = max # of 1-4 neighbors
-  1 = max # of special neighbors
-  special bonds CPU = 0.000147804 secs
-  read_data CPU = 0.00194898 secs
-
-include ff-argon.lmp
-#############################
-#Atoms types - mass - charge#
-#############################
-#@ 1 atom types #!THIS LINE IS NECESSARY DON'T SPEND HOURS FINDING THAT OUT!#
-
-variable Ar equal 1
-
-#############
-#Atom Masses#
-#############
-
-mass ${Ar}      39.903
-mass 1      39.903
-
-###########################
-#Pair Potentials - Tersoff#
-###########################
-
-pair_style      lj/cubic
-pair_coeff      * * 0.0102701 3.42
-
-
-velocity all create 10 2357 mom yes dist gaussian
-
-neighbor 1 bin
-
-timestep 0.1
-
-fix lang all langevin 10 10 1 26488 gjf vhalf
-fix nve all nve
-
-thermo 200
-run 5000
-Neighbor list info ...
-  update every 1 steps, delay 10 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 6.94072
-  ghost atom cutoff = 6.94072
-  binsize = 3.47036, bins = 10 10 10
-  1 neighbor lists, perpetual/occasional/extra = 1 0 0
-  (1) pair lj/cubic, perpetual
-      attributes: half, newton on
-      pair build: half/bin/newton
-      stencil: half/bin/3d/newton
-      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 6.5 | 6.5 | 6.5 Mbytes
-Step Temp E_pair E_mol TotEng Press 
-       0    11.080223   -56.207655            0    -54.97164    37.215524 
-     200    9.8808568   -55.073602            0   -53.971378    345.62207 
-     400    9.8712816   -55.072244            0   -53.971088    345.11889 
-     600    10.528988   -55.066739            0   -53.892214    350.60093 
-     800    10.167171   -55.148315            0   -54.014152    324.73679 
-    1000    10.029026   -55.125709            0   -54.006956    331.93766 
-    1200    10.194424   -55.114892            0   -53.977688    334.36032 
-    1400    9.3473846   -55.109233            0   -54.066518    333.64378 
-    1600    9.7774071   -55.138161            0   -54.047477    327.02358 
-    1800    9.9814275    -55.13355            0   -54.020107    328.30017 
-    2000      10.2515   -55.062994            0   -53.919424    349.74304 
-    2200    9.8126922    -55.13364            0   -54.039019    328.78521 
-    2400    10.044314   -55.163702            0   -54.043244    321.03397 
-    2600    10.543316   -55.112054            0   -53.935932    336.82099 
-    2800    9.7874375   -55.147275            0   -54.055472    324.06626 
-    3000    9.7703821   -55.135164            0   -54.045263    328.60665 
-    3200    10.141958   -55.135753            0   -54.004402    327.69084 
-    3400    10.160576   -55.135408            0    -54.00198    329.39063 
-    3600    10.044652   -55.103932            0   -53.983436    336.64469 
-    3800    10.662403   -55.103648            0   -53.914241    339.56382 
-    4000    9.2921047   -55.120402            0   -54.083853    329.71671 
-    4200    9.8744553   -55.129011            0   -54.027501    329.78147 
-    4400    9.4085964   -55.153089            0   -54.103546    320.90673 
-    4600    9.5463801   -55.124053            0    -54.05914    330.80941 
-    4800    10.223884   -55.075877            0   -53.935387    344.30099 
-    5000    9.6243338   -55.158604            0   -54.084996     320.3511 
-Loop time of 2.29551 on 1 procs for 5000 steps with 864 atoms
-
-Performance: 18819.358 ns/day, 0.001 hours/ns, 2178.166 timesteps/s
-99.7% CPU use with 1 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 1.4393     | 1.4393     | 1.4393     |   0.0 | 62.70
-Bond    | 0.0004441  | 0.0004441  | 0.0004441  |   0.0 |  0.02
-Neigh   | 0.12136    | 0.12136    | 0.12136    |   0.0 |  5.29
-Comm    | 0.059342   | 0.059342   | 0.059342   |   0.0 |  2.59
-Output  | 0.00046968 | 0.00046968 | 0.00046968 |   0.0 |  0.02
-Modify  | 0.64937    | 0.64937    | 0.64937    |   0.0 | 28.29
-Other   |            | 0.02522    |            |       |  1.10
-
-Nlocal:    864 ave 864 max 864 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-Nghost:    1593 ave 1593 max 1593 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-Neighs:    18143 ave 18143 max 18143 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-
-Total # of neighbors = 18143
-Ave neighs/atom = 20.9988
-Ave special neighs/atom = 0
-Neighbor list builds = 158
-Dangerous builds = 5
-
-
-Total wall time: 0:00:02
diff --git a/examples/gjf/log.15Oct19.gjf.vhalf.g++.4 b/examples/gjf/log.15Oct19.gjf.vhalf.g++.4
deleted file mode 100644
index a70a67a89c..0000000000
--- a/examples/gjf/log.15Oct19.gjf.vhalf.g++.4
+++ /dev/null
@@ -1,125 +0,0 @@
-LAMMPS (19 Sep 2019)
-  using 1 OpenMP thread(s) per MPI task
-# GJF-2GJ thermostat
-
-units   	metal
-atom_style      full
-
-boundary 	p p p
-read_data argon.lmp
-  orthogonal box = (0 0 0) to (32.146 32.146 32.146)
-  1 by 2 by 2 MPI processor grid
-  reading atoms ...
-  864 atoms
-  0 = max # of 1-2 neighbors
-  0 = max # of 1-3 neighbors
-  0 = max # of 1-4 neighbors
-  1 = max # of special neighbors
-  special bonds CPU = 0.000315903 secs
-  read_data CPU = 0.0653752 secs
-
-include ff-argon.lmp
-#############################
-#Atoms types - mass - charge#
-#############################
-#@ 1 atom types #!THIS LINE IS NECESSARY DON'T SPEND HOURS FINDING THAT OUT!#
-
-variable Ar equal 1
-
-#############
-#Atom Masses#
-#############
-
-mass ${Ar}      39.903
-mass 1      39.903
-
-###########################
-#Pair Potentials - Tersoff#
-###########################
-
-pair_style      lj/cubic
-pair_coeff      * * 0.0102701 3.42
-
-
-velocity all create 10 2357 mom yes dist gaussian
-
-neighbor 1 bin
-
-timestep 0.1
-
-fix lang all langevin 10 10 1 26488 gjf vhalf
-fix nve all nve
-
-thermo 200
-run 5000
-Neighbor list info ...
-  update every 1 steps, delay 10 steps, check yes
-  max neighbors/atom: 2000, page size: 100000
-  master list distance cutoff = 6.94072
-  ghost atom cutoff = 6.94072
-  binsize = 3.47036, bins = 10 10 10
-  1 neighbor lists, perpetual/occasional/extra = 1 0 0
-  (1) pair lj/cubic, perpetual
-      attributes: half, newton on
-      pair build: half/bin/newton
-      stencil: half/bin/3d/newton
-      bin: standard
-Per MPI rank memory allocation (min/avg/max) = 6.433 | 6.433 | 6.433 Mbytes
-Step Temp E_pair E_mol TotEng Press 
-       0    11.080228   -56.207655            0   -54.971639    37.215541 
-     200    9.8046716   -55.127334            0   -54.033608    329.70647 
-     400    10.174622    -55.09236            0   -53.957366    340.49331 
-     600    9.9812299   -55.073136            0   -53.959714    345.56477 
-     800    10.512874   -55.133382            0   -53.960655     330.4996 
-    1000    9.9587885   -55.118463            0   -54.007545    332.24728 
-    1200    10.236607   -55.126297            0   -53.984388    330.94998 
-    1400    10.134679   -55.123075            0   -53.992537    332.15441 
-    1600    9.8934078   -55.153799            0   -54.050174    323.80795 
-    1800    10.064966   -55.116698            0   -53.993936    333.59644 
-    2000    9.6736107   -55.155825            0   -54.076719     321.5129 
-    2200    10.264537   -55.083943            0   -53.938918    343.02135 
-    2400    9.5640032   -55.044816            0   -53.977937    351.23099 
-    2600    9.6581077   -55.069404            0   -53.992028    344.99996 
-    2800    9.9622575   -55.084151            0   -53.972846     342.6574 
-    3000    9.8724909   -55.110296            0   -54.009005    334.68094 
-    3200    10.032027   -55.123576            0   -54.004488    331.89534 
-    3400    10.221132   -55.128925            0   -53.988742    330.24082 
-    3600    10.085802   -55.129861            0   -54.004774    330.63601 
-    3800    10.098545   -55.099093            0   -53.972585    339.61905 
-    4000    10.000257   -55.117782            0   -54.002238    333.24569 
-    4200     10.20477   -55.135792            0   -53.997435    329.17565 
-    4400    10.545132   -55.096369            0   -53.920044    341.04725 
-    4600    10.376108   -55.127213            0   -53.969743    331.92825 
-    4800    10.247392   -55.163395            0   -54.020283    322.15219 
-    5000    9.7753102   -55.073756            0   -53.983305    343.64146 
-Loop time of 1.19785 on 4 procs for 5000 steps with 864 atoms
-
-Performance: 36064.674 ns/day, 0.001 hours/ns, 4174.152 timesteps/s
-88.6% CPU use with 4 MPI tasks x 1 OpenMP threads
-
-MPI task timing breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
----------------------------------------------------------------
-Pair    | 0.36387    | 0.38652    | 0.44086    |   5.1 | 32.27
-Bond    | 0.00028847 | 0.00030833 | 0.000338   |   0.0 |  0.03
-Neigh   | 0.033934   | 0.034959   | 0.036917   |   0.6 |  2.92
-Comm    | 0.39292    | 0.47821    | 0.52198    |   7.3 | 39.92
-Output  | 0.00050343 | 0.0012343  | 0.0023338  |   1.9 |  0.10
-Modify  | 0.1605     | 0.17963    | 0.19457    |   2.9 | 15.00
-Other   |            | 0.117      |            |       |  9.77
-
-Nlocal:    216 ave 216 max 216 min
-Histogram: 4 0 0 0 0 0 0 0 0 0
-Nghost:    888.75 ave 899 max 876 min
-Histogram: 1 0 1 0 0 0 0 0 0 2
-Neighs:    4536 ave 4737 max 4335 min
-Histogram: 2 0 0 0 0 0 0 0 0 2
-
-Total # of neighbors = 18144
-Ave neighs/atom = 21
-Ave special neighs/atom = 0
-Neighbor list builds = 178
-Dangerous builds = 11
-
-
-Total wall time: 0:00:01
diff --git a/examples/gjf/log.2Apr25.gjf.vfull.g++.1 b/examples/gjf/log.2Apr25.gjf.vfull.g++.1
new file mode 100644
index 0000000000..ca739b3a22
--- /dev/null
+++ b/examples/gjf/log.2Apr25.gjf.vfull.g++.1
@@ -0,0 +1,193 @@
+LAMMPS (2 Apr 2025 - Development - d4867ab55e-modified)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:99)
+  using 1 OpenMP thread(s) per MPI task
+# GJ thermostat
+
+units   	metal
+atom_style      full
+
+boundary 	p p p
+read_data argon.lmp
+Reading data file ...
+  orthogonal box = (0 0 0) to (32.146 32.146 32.146)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  864 atoms
+Finding 1-2 1-3 1-4 neighbors ...
+  special bond factors lj:    0        0        0       
+  special bond factors coul:  0        0        0       
+     0 = max # of 1-2 neighbors
+     0 = max # of 1-3 neighbors
+     0 = max # of 1-4 neighbors
+     1 = max # of special neighbors
+  special bonds CPU = 0.000 seconds
+  read_data CPU = 0.007 seconds
+include ff-argon.lmp
+#############################
+#Atoms types - mass - charge#
+#############################
+#@ 1 atom types #!THIS LINE IS NECESSARY DON'T SPEND HOURS FINDING THAT OUT!#
+
+variable Ar equal 1
+
+#############
+#Atom Masses#
+#############
+
+mass ${Ar}      39.903
+mass 1      39.903
+
+###########################
+#Pair Potentials - Tersoff#
+###########################
+
+pair_style      lj/cubic
+pair_coeff      * * 0.0102701 3.42
+
+
+velocity all create 10 2357 mom yes dist gaussian
+neighbor 1 bin
+
+timestep 0.1
+
+compute myKE all ke
+compute myPE all pe
+
+fix lang all gjf 10 10 1 26488 vel vfull method 1
+
+run 5000
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Your simulation uses code contributions which should be cited:
+
+- GJ methods: doi:10.1080/00268976.2019.1662506
+
+@Article{gronbech-jensen_complete_2020,
+title = {Complete set of stochastic Verlet-type thermostats for correct Langevin simulations},
+volume = {118},
+number = {8},
+url = {https://www.tandfonline.com/doi/full/10.1080/00268976.2019.1662506},
+doi = {10.1080/00268976.2019.1662506},
+journal = {Molecular Physics},
+author = {Grønbech-Jensen, Niels},
+year = {2020}
+}
+
+- GJ-I vfull method: doi:10.1080/00268976.2012.760055
+
+@Article{gronbech-jensen_simple_2013,
+title = {A simple and effective Verlet-type algorithm for simulating Langevin dynamics},
+volume = {111},
+url = {http://www.tandfonline.com/doi/abs/10.1080/00268976.2012.760055},
+doi = {10.1080/00268976.2012.760055},
+pages = {983-991},
+number = {8},
+journal = {Molecular Physics},
+author = {Grønbech-Jensen, Niels and Farago, Oded},
+year = {2013}
+}
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 6.9407173
+  ghost atom cutoff = 6.9407173
+  binsize = 3.4703587, bins = 10 10 10
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cubic, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 6.481 | 6.481 | 6.481 Mbytes
+   Step          Temp          E_pair         E_mol          TotEng         Press     
+         0   10            -56.207652      0             -55.092137      33.341103    
+      5000   8.4535562     -55.150518      0             -54.207511      318.20862    
+Loop time of 2.26831 on 1 procs for 5000 steps with 864 atoms
+
+Performance: 19044.977 ns/day, 0.001 hours/ns, 2204.280 timesteps/s, 1.904 Matom-step/s
+99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 1.2802     | 1.2802     | 1.2802     |   0.0 | 56.44
+Bond    | 0.00051213 | 0.00051213 | 0.00051213 |   0.0 |  0.02
+Neigh   | 0.27007    | 0.27007    | 0.27007    |   0.0 | 11.91
+Comm    | 0.057527   | 0.057527   | 0.057527   |   0.0 |  2.54
+Output  | 6.3876e-05 | 6.3876e-05 | 6.3876e-05 |   0.0 |  0.00
+Modify  | 0.63364    | 0.63364    | 0.63364    |   0.0 | 27.93
+Other   |            | 0.02635    |            |       |  1.16
+
+Nlocal:            864 ave         864 max         864 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:           1593 ave        1593 max        1593 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:          18143 ave       18143 max       18143 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 18143
+Ave neighs/atom = 20.998843
+Ave special neighs/atom = 0
+Neighbor list builds = 258
+Dangerous builds = 0
+
+fix energies all ave/time 1 20000 20000 c_myKE c_myPE #file ave.out
+
+thermo 2000
+run 35000
+Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
+Per MPI rank memory allocation (min/avg/max) = 6.481 | 6.481 | 6.481 Mbytes
+   Step          Temp          E_pair         E_mol          TotEng         Press     
+      5000   8.4535562     -55.150518      0             -54.207511      318.20862    
+      6000   8.4899401     -55.108242      0             -54.161176      331.10703    
+      8000   8.3618893     -55.092171      0             -54.15939       334.11831    
+     10000   8.8684311     -55.100316      0             -54.111029      334.09931    
+     12000   8.4339192     -55.07343       0             -54.132614      340.00487    
+     14000   8.072393      -55.115121      0             -54.214633      327.98965    
+     16000   8.3420289     -55.077813      0             -54.147247      337.74926    
+     18000   8.3803911     -55.12201       0             -54.187164      326.10485    
+     20000   8.4676985     -55.176339      0             -54.231754      311.57092    
+     22000   8.8560138     -55.110505      0             -54.122603      330.66179    
+     24000   8.3187826     -55.120592      0             -54.192619      327.01148    
+     26000   8.0327666     -55.116664      0             -54.220596      326.25179    
+     28000   8.3672169     -55.130413      0             -54.197037      324.2368     
+     30000   8.1669275     -55.057678      0             -54.146645      344.9168     
+     32000   8.3819314     -55.08989       0             -54.154873      335.45317    
+     34000   8.109088      -55.17222       0             -54.267639      310.83717    
+     36000   8.3048574     -55.079475      0             -54.153056      338.04291    
+     38000   8.8708544     -55.108991      0             -54.119434      330.70097    
+     40000   8.4012779     -55.080817      0             -54.143642      338.54326    
+Loop time of 18.9699 on 1 procs for 35000 steps with 864 atoms
+
+Performance: 15941.040 ns/day, 0.002 hours/ns, 1845.028 timesteps/s, 1.594 Matom-step/s
+99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 11.593     | 11.593     | 11.593     |   0.0 | 61.11
+Bond    | 0.0041801  | 0.0041801  | 0.0041801  |   0.0 |  0.02
+Neigh   | 2.2671     | 2.2671     | 2.2671     |   0.0 | 11.95
+Comm    | 0.42339    | 0.42339    | 0.42339    |   0.0 |  2.23
+Output  | 0.00062204 | 0.00062204 | 0.00062204 |   0.0 |  0.00
+Modify  | 4.4976     | 4.4976     | 4.4976     |   0.0 | 23.71
+Other   |            | 0.1839     |            |       |  0.97
+
+Nlocal:            864 ave         864 max         864 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:           1592 ave        1592 max        1592 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:          18144 ave       18144 max       18144 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 18144
+Ave neighs/atom = 21
+Ave special neighs/atom = 0
+Neighbor list builds = 2122
+Dangerous builds = 0
+Total wall time: 0:00:21
diff --git a/examples/gjf/log.2Apr25.gjf.vfull.g++.4 b/examples/gjf/log.2Apr25.gjf.vfull.g++.4
new file mode 100644
index 0000000000..93466e8dfe
--- /dev/null
+++ b/examples/gjf/log.2Apr25.gjf.vfull.g++.4
@@ -0,0 +1,193 @@
+LAMMPS (2 Apr 2025 - Development - d4867ab55e-modified)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:99)
+  using 1 OpenMP thread(s) per MPI task
+# GJ thermostat
+
+units   	metal
+atom_style      full
+
+boundary 	p p p
+read_data argon.lmp
+Reading data file ...
+  orthogonal box = (0 0 0) to (32.146 32.146 32.146)
+  1 by 2 by 2 MPI processor grid
+  reading atoms ...
+  864 atoms
+Finding 1-2 1-3 1-4 neighbors ...
+  special bond factors lj:    0        0        0       
+  special bond factors coul:  0        0        0       
+     0 = max # of 1-2 neighbors
+     0 = max # of 1-3 neighbors
+     0 = max # of 1-4 neighbors
+     1 = max # of special neighbors
+  special bonds CPU = 0.002 seconds
+  read_data CPU = 0.015 seconds
+include ff-argon.lmp
+#############################
+#Atoms types - mass - charge#
+#############################
+#@ 1 atom types #!THIS LINE IS NECESSARY DON'T SPEND HOURS FINDING THAT OUT!#
+
+variable Ar equal 1
+
+#############
+#Atom Masses#
+#############
+
+mass ${Ar}      39.903
+mass 1      39.903
+
+###########################
+#Pair Potentials - Tersoff#
+###########################
+
+pair_style      lj/cubic
+pair_coeff      * * 0.0102701 3.42
+
+
+velocity all create 10 2357 mom yes dist gaussian
+neighbor 1 bin
+
+timestep 0.1
+
+compute myKE all ke
+compute myPE all pe
+
+fix lang all gjf 10 10 1 26488 vel vfull method 1
+
+run 5000
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Your simulation uses code contributions which should be cited:
+
+- GJ methods: doi:10.1080/00268976.2019.1662506
+
+@Article{gronbech-jensen_complete_2020,
+title = {Complete set of stochastic Verlet-type thermostats for correct Langevin simulations},
+volume = {118},
+number = {8},
+url = {https://www.tandfonline.com/doi/full/10.1080/00268976.2019.1662506},
+doi = {10.1080/00268976.2019.1662506},
+journal = {Molecular Physics},
+author = {Grønbech-Jensen, Niels},
+year = {2020}
+}
+
+- GJ-I vfull method: doi:10.1080/00268976.2012.760055
+
+@Article{gronbech-jensen_simple_2013,
+title = {A simple and effective Verlet-type algorithm for simulating Langevin dynamics},
+volume = {111},
+url = {http://www.tandfonline.com/doi/abs/10.1080/00268976.2012.760055},
+doi = {10.1080/00268976.2012.760055},
+pages = {983-991},
+number = {8},
+journal = {Molecular Physics},
+author = {Grønbech-Jensen, Niels and Farago, Oded},
+year = {2013}
+}
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 6.9407173
+  ghost atom cutoff = 6.9407173
+  binsize = 3.4703587, bins = 10 10 10
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cubic, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 6.427 | 6.427 | 6.427 Mbytes
+   Step          Temp          E_pair         E_mol          TotEng         Press     
+         0   10            -56.207652      0             -55.092137      33.341103    
+      5000   7.946377      -55.076514      0             -54.190084      337.31999    
+Loop time of 2.0998 on 4 procs for 5000 steps with 864 atoms
+
+Performance: 20573.405 ns/day, 0.001 hours/ns, 2381.181 timesteps/s, 2.057 Matom-step/s
+65.2% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.53641    | 0.54389    | 0.54721    |   0.6 | 25.90
+Bond    | 0.00056487 | 0.0006195  | 0.00068462 |   0.0 |  0.03
+Neigh   | 0.10567    | 0.1086     | 0.11128    |   0.7 |  5.17
+Comm    | 0.96913    | 0.97758    | 0.98191    |   0.5 | 46.56
+Output  | 0.00025213 | 0.00025642 | 0.00026405 |   0.0 |  0.01
+Modify  | 0.25061    | 0.25105    | 0.25172    |   0.1 | 11.96
+Other   |            | 0.2178     |            |       | 10.37
+
+Nlocal:            216 ave         216 max         216 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+Nghost:         884.75 ave         885 max         884 min
+Histogram: 1 0 0 0 0 0 0 0 0 3
+Neighs:           4536 ave        4737 max        4335 min
+Histogram: 2 0 0 0 0 0 0 0 0 2
+
+Total # of neighbors = 18144
+Ave neighs/atom = 21
+Ave special neighs/atom = 0
+Neighbor list builds = 273
+Dangerous builds = 0
+
+fix energies all ave/time 1 20000 20000 c_myKE c_myPE #file ave.out
+
+thermo 2000
+run 35000
+Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
+Per MPI rank memory allocation (min/avg/max) = 6.428 | 6.428 | 6.428 Mbytes
+   Step          Temp          E_pair         E_mol          TotEng         Press     
+      5000   7.946377      -55.076514      0             -54.190084      337.31999    
+      6000   8.2565866     -55.129244      0             -54.208209      324.57967    
+      8000   7.9942397     -55.101417      0             -54.209648      331.24127    
+     10000   8.5413968     -55.083292      0             -54.130486      337.82599    
+     12000   8.3682078     -55.090905      0             -54.157419      335.08066    
+     14000   8.5082065     -55.085051      0             -54.135948      336.2765     
+     16000   8.1944037     -55.090733      0             -54.176635      334.03786    
+     18000   8.2607106     -55.030131      0             -54.108637      352.49892    
+     20000   8.1154691     -55.104072      0             -54.198779      330.14203    
+     22000   8.5592601     -55.152019      0             -54.197221      318.03507    
+     24000   8.3182914     -55.115242      0             -54.187324      328.46084    
+     26000   8.3691375     -55.125275      0             -54.191685      325.43673    
+     28000   8.531632      -55.107097      0             -54.155381      331.42771    
+     30000   8.1102222     -55.099011      0             -54.194304      332.04678    
+     32000   8.5558571     -55.077016      0             -54.122598      339.87746    
+     34000   8.4213946     -55.097068      0             -54.157649      333.34935    
+     36000   8.0936615     -55.152202      0             -54.249342      316.20169    
+     38000   7.999652      -55.048407      0             -54.156034      345.07945    
+     40000   8.6699753     -55.087634      0             -54.120485      337.23709    
+Loop time of 17.6726 on 4 procs for 35000 steps with 864 atoms
+
+Performance: 17111.263 ns/day, 0.001 hours/ns, 1980.470 timesteps/s, 1.711 Matom-step/s
+65.4% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 5.0739     | 5.1178     | 5.1689     |   1.5 | 28.96
+Bond    | 0.0043764  | 0.004688   | 0.0051706  |   0.4 |  0.03
+Neigh   | 0.83797    | 0.85506    | 0.87554    |   1.8 |  4.84
+Comm    | 6.816      | 6.8932     | 6.9215     |   1.7 | 39.00
+Output  | 0.0043624  | 0.0045336  | 0.004998   |   0.4 |  0.03
+Modify  | 3.3008     | 3.3033     | 3.3066     |   0.1 | 18.69
+Other   |            | 1.494      |            |       |  8.45
+
+Nlocal:            216 ave         222 max         210 min
+Histogram: 2 0 0 0 0 0 0 0 0 2
+Nghost:          905.5 ave         911 max         899 min
+Histogram: 1 1 0 0 0 0 0 0 0 2
+Neighs:        4535.75 ave        4837 max        4218 min
+Histogram: 1 0 0 1 0 0 1 0 0 1
+
+Total # of neighbors = 18143
+Ave neighs/atom = 20.998843
+Ave special neighs/atom = 0
+Neighbor list builds = 2140
+Dangerous builds = 0
+Total wall time: 0:00:19
diff --git a/examples/gjf/log.2Apr25.gjf.vhalf.g++.1 b/examples/gjf/log.2Apr25.gjf.vhalf.g++.1
new file mode 100644
index 0000000000..569b13bad6
--- /dev/null
+++ b/examples/gjf/log.2Apr25.gjf.vhalf.g++.1
@@ -0,0 +1,192 @@
+LAMMPS (2 Apr 2025 - Development - d4867ab55e-modified)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:99)
+  using 1 OpenMP thread(s) per MPI task
+# GJ thermostat
+
+units   	metal
+atom_style      full
+
+boundary 	p p p
+read_data argon.lmp
+Reading data file ...
+  orthogonal box = (0 0 0) to (32.146 32.146 32.146)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  864 atoms
+Finding 1-2 1-3 1-4 neighbors ...
+  special bond factors lj:    0        0        0       
+  special bond factors coul:  0        0        0       
+     0 = max # of 1-2 neighbors
+     0 = max # of 1-3 neighbors
+     0 = max # of 1-4 neighbors
+     1 = max # of special neighbors
+  special bonds CPU = 0.000 seconds
+  read_data CPU = 0.010 seconds
+include ff-argon.lmp
+#############################
+#Atoms types - mass - charge#
+#############################
+#@ 1 atom types #!THIS LINE IS NECESSARY DON'T SPEND HOURS FINDING THAT OUT!#
+
+variable Ar equal 1
+
+#############
+#Atom Masses#
+#############
+
+mass ${Ar}      39.903
+mass 1      39.903
+
+###########################
+#Pair Potentials - Tersoff#
+###########################
+
+pair_style      lj/cubic
+pair_coeff      * * 0.0102701 3.42
+
+
+velocity all create 10 2357 mom yes dist gaussian
+neighbor 1 bin
+
+timestep 0.1
+
+compute myKE all ke
+compute myPE all pe
+
+fix lang all gjf 10 10 1 26488
+
+run 5000
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Your simulation uses code contributions which should be cited:
+
+- GJ methods: doi:10.1080/00268976.2019.1662506
+
+@Article{gronbech-jensen_complete_2020,
+title = {Complete set of stochastic Verlet-type thermostats for correct Langevin simulations},
+volume = {118},
+number = {8},
+url = {https://www.tandfonline.com/doi/full/10.1080/00268976.2019.1662506},
+doi = {10.1080/00268976.2019.1662506},
+journal = {Molecular Physics},
+author = {Grønbech-Jensen, Niels},
+year = {2020}
+}
+
+- GJ-I vhalf method: doi:10.1080/00268976.2019.1570369
+
+@Article{jensen_accurate_2019,
+title = {Accurate configurational and kinetic statistics in discrete-time Langevin systems},
+volume = {117},
+url = {https://www.tandfonline.com/doi/full/10.1080/00268976.2019.1570369},
+doi = {10.1080/00268976.2019.1570369},
+number = {18},
+journal = {Molecular Physics},
+author = {Jensen, Lucas Frese Grønbech and Grønbech-Jensen, Niels},
+year = {2019}
+}
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 6.9407173
+  ghost atom cutoff = 6.9407173
+  binsize = 3.4703587, bins = 10 10 10
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cubic, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 6.481 | 6.481 | 6.481 Mbytes
+   Step          Temp          E_pair         E_mol          TotEng         Press     
+         0   10            -56.207652      0             -55.092137      33.341103    
+      5000   9.7731898     -55.150518      0             -54.060304      322.94195    
+Loop time of 2.28421 on 1 procs for 5000 steps with 864 atoms
+
+Performance: 18912.438 ns/day, 0.001 hours/ns, 2188.940 timesteps/s, 1.891 Matom-step/s
+99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 1.2715     | 1.2715     | 1.2715     |   0.0 | 55.66
+Bond    | 0.00057126 | 0.00057126 | 0.00057126 |   0.0 |  0.03
+Neigh   | 0.27008    | 0.27008    | 0.27008    |   0.0 | 11.82
+Comm    | 0.057938   | 0.057938   | 0.057938   |   0.0 |  2.54
+Output  | 6.1954e-05 | 6.1954e-05 | 6.1954e-05 |   0.0 |  0.00
+Modify  | 0.658      | 0.658      | 0.658      |   0.0 | 28.81
+Other   |            | 0.0261     |            |       |  1.14
+
+Nlocal:            864 ave         864 max         864 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:           1593 ave        1593 max        1593 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:          18143 ave       18143 max       18143 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 18143
+Ave neighs/atom = 20.998843
+Ave special neighs/atom = 0
+Neighbor list builds = 258
+Dangerous builds = 0
+
+fix energies all ave/time 1 20000 20000 c_myKE c_myPE #file ave.out
+
+thermo 2000
+run 35000
+Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
+Per MPI rank memory allocation (min/avg/max) = 6.481 | 6.481 | 6.481 Mbytes
+   Step          Temp          E_pair         E_mol          TotEng         Press     
+      5000   9.7731898     -55.150518      0             -54.060304      322.94195    
+      6000   10.024842     -55.108242      0             -53.989956      336.6125     
+      8000   10.118994     -55.092171      0             -53.963382      340.42078    
+     10000   10.541359     -55.100316      0             -53.924412      340.09986    
+     12000   10.023234     -55.07343       0             -53.955323      345.70551    
+     14000   9.5912018     -55.115121      0             -54.045208      333.43739    
+     16000   9.9450498     -55.077813      0             -53.968428      343.49906    
+     18000   10.113744     -55.12201       0             -53.993806      332.32214    
+     20000   9.9345204     -55.176339      0             -54.068128      316.83219    
+     22000   10.585719     -55.110505      0             -53.929652      336.86599    
+     24000   10.024757     -55.120592      0             -54.002315      333.13056    
+     26000   9.7787474     -55.116664      0             -54.02583       332.51437    
+     28000   9.6092087     -55.130413      0             -54.058491      328.69165    
+     30000   9.8245787     -55.057678      0             -53.961731      350.86255    
+     32000   10.066994     -55.08989       0             -53.966902      341.49724    
+     34000   9.5677059     -55.17222       0             -54.104928      316.06902    
+     36000   9.7252627     -55.079475      0             -53.994608      343.13769    
+     38000   10.438984     -55.108991      0             -53.944506      336.32562    
+     40000   10.238268     -55.080817      0             -53.938723      345.13228    
+Loop time of 19.138 on 1 procs for 35000 steps with 864 atoms
+
+Performance: 15801.041 ns/day, 0.002 hours/ns, 1828.824 timesteps/s, 1.580 Matom-step/s
+99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 11.568     | 11.568     | 11.568     |   0.0 | 60.44
+Bond    | 0.0042372  | 0.0042372  | 0.0042372  |   0.0 |  0.02
+Neigh   | 2.2577     | 2.2577     | 2.2577     |   0.0 | 11.80
+Comm    | 0.42841    | 0.42841    | 0.42841    |   0.0 |  2.24
+Output  | 0.00060128 | 0.00060128 | 0.00060128 |   0.0 |  0.00
+Modify  | 4.694      | 4.694      | 4.694      |   0.0 | 24.53
+Other   |            | 0.1852     |            |       |  0.97
+
+Nlocal:            864 ave         864 max         864 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:           1592 ave        1592 max        1592 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:          18144 ave       18144 max       18144 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 18144
+Ave neighs/atom = 21
+Ave special neighs/atom = 0
+Neighbor list builds = 2122
+Dangerous builds = 0
+Total wall time: 0:00:21
diff --git a/examples/gjf/log.2Apr25.gjf.vhalf.g++.4 b/examples/gjf/log.2Apr25.gjf.vhalf.g++.4
new file mode 100644
index 0000000000..2b5e19e634
--- /dev/null
+++ b/examples/gjf/log.2Apr25.gjf.vhalf.g++.4
@@ -0,0 +1,192 @@
+LAMMPS (2 Apr 2025 - Development - d4867ab55e-modified)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:99)
+  using 1 OpenMP thread(s) per MPI task
+# GJ thermostat
+
+units   	metal
+atom_style      full
+
+boundary 	p p p
+read_data argon.lmp
+Reading data file ...
+  orthogonal box = (0 0 0) to (32.146 32.146 32.146)
+  1 by 2 by 2 MPI processor grid
+  reading atoms ...
+  864 atoms
+Finding 1-2 1-3 1-4 neighbors ...
+  special bond factors lj:    0        0        0       
+  special bond factors coul:  0        0        0       
+     0 = max # of 1-2 neighbors
+     0 = max # of 1-3 neighbors
+     0 = max # of 1-4 neighbors
+     1 = max # of special neighbors
+  special bonds CPU = 0.002 seconds
+  read_data CPU = 0.015 seconds
+include ff-argon.lmp
+#############################
+#Atoms types - mass - charge#
+#############################
+#@ 1 atom types #!THIS LINE IS NECESSARY DON'T SPEND HOURS FINDING THAT OUT!#
+
+variable Ar equal 1
+
+#############
+#Atom Masses#
+#############
+
+mass ${Ar}      39.903
+mass 1      39.903
+
+###########################
+#Pair Potentials - Tersoff#
+###########################
+
+pair_style      lj/cubic
+pair_coeff      * * 0.0102701 3.42
+
+
+velocity all create 10 2357 mom yes dist gaussian
+neighbor 1 bin
+
+timestep 0.1
+
+compute myKE all ke
+compute myPE all pe
+
+fix lang all gjf 10 10 1 26488
+
+run 5000
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Your simulation uses code contributions which should be cited:
+
+- GJ methods: doi:10.1080/00268976.2019.1662506
+
+@Article{gronbech-jensen_complete_2020,
+title = {Complete set of stochastic Verlet-type thermostats for correct Langevin simulations},
+volume = {118},
+number = {8},
+url = {https://www.tandfonline.com/doi/full/10.1080/00268976.2019.1662506},
+doi = {10.1080/00268976.2019.1662506},
+journal = {Molecular Physics},
+author = {Grønbech-Jensen, Niels},
+year = {2020}
+}
+
+- GJ-I vhalf method: doi:10.1080/00268976.2019.1570369
+
+@Article{jensen_accurate_2019,
+title = {Accurate configurational and kinetic statistics in discrete-time Langevin systems},
+volume = {117},
+url = {https://www.tandfonline.com/doi/full/10.1080/00268976.2019.1570369},
+doi = {10.1080/00268976.2019.1570369},
+number = {18},
+journal = {Molecular Physics},
+author = {Jensen, Lucas Frese Grønbech and Grønbech-Jensen, Niels},
+year = {2019}
+}
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 6.9407173
+  ghost atom cutoff = 6.9407173
+  binsize = 3.4703587, bins = 10 10 10
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cubic, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 6.427 | 6.427 | 6.427 Mbytes
+   Step          Temp          E_pair         E_mol          TotEng         Press     
+         0   10            -56.207652      0             -55.092137      33.341103    
+      5000   9.3726166     -55.076514      0             -54.030985      342.43571    
+Loop time of 2.11818 on 4 procs for 5000 steps with 864 atoms
+
+Performance: 20394.822 ns/day, 0.001 hours/ns, 2360.512 timesteps/s, 2.039 Matom-step/s
+63.1% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.53987    | 0.54922    | 0.56044    |   1.2 | 25.93
+Bond    | 0.00058281 | 0.00063674 | 0.00075153 |   0.0 |  0.03
+Neigh   | 0.10821    | 0.10912    | 0.11017    |   0.2 |  5.15
+Comm    | 0.96075    | 0.97484    | 0.98645    |   1.1 | 46.02
+Output  | 0.00026318 | 0.00026575 | 0.00027192 |   0.0 |  0.01
+Modify  | 0.26142    | 0.2634     | 0.26465    |   0.2 | 12.44
+Other   |            | 0.2207     |            |       | 10.42
+
+Nlocal:            216 ave         216 max         216 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+Nghost:         884.75 ave         885 max         884 min
+Histogram: 1 0 0 0 0 0 0 0 0 3
+Neighs:           4536 ave        4737 max        4335 min
+Histogram: 2 0 0 0 0 0 0 0 0 2
+
+Total # of neighbors = 18144
+Ave neighs/atom = 21
+Ave special neighs/atom = 0
+Neighbor list builds = 273
+Dangerous builds = 0
+
+fix energies all ave/time 1 20000 20000 c_myKE c_myPE #file ave.out
+
+thermo 2000
+run 35000
+Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
+Per MPI rank memory allocation (min/avg/max) = 6.428 | 6.428 | 6.428 Mbytes
+   Step          Temp          E_pair         E_mol          TotEng         Press     
+      5000   9.3726166     -55.076514      0             -54.030985      342.43571    
+      6000   9.6911866     -55.129244      0             -54.048177      329.72537    
+      8000   9.7296551     -55.101417      0             -54.016059      337.46595    
+     10000   10.098808     -55.083292      0             -53.956755      343.4122     
+     12000   10.114344     -55.090905      0             -53.962635      341.3438     
+     14000   10.230012     -55.085051      0             -53.943878      342.45237    
+     16000   9.5989709     -55.090733      0             -54.019954      339.07584    
+     18000   10.016071     -55.030131      0             -53.912824      358.79514    
+     20000   9.7197057     -55.104072      0             -54.019824      335.89619    
+     22000   9.959647      -55.152019      0             -54.041005      323.05805    
+     24000   10.075138     -55.115242      0             -53.991345      334.76239    
+     26000   10.227192     -55.125275      0             -53.984416      332.10131    
+     28000   10.177109     -55.107097      0             -53.971825      337.32979    
+     30000   9.521036      -55.099011      0             -54.036925      337.10716    
+     32000   10.265633     -55.077016      0             -53.93187       346.01018    
+     34000   10.173978     -55.097068      0             -53.962146      339.63562    
+     36000   9.6032778     -55.152202      0             -54.080942      321.61646    
+     38000   9.8802995     -55.048407      0             -53.946245      351.82506    
+     40000   10.372288     -55.087634      0             -53.93059       343.34304    
+Loop time of 17.867 on 4 procs for 35000 steps with 864 atoms
+
+Performance: 16925.013 ns/day, 0.001 hours/ns, 1958.914 timesteps/s, 1.693 Matom-step/s
+65.3% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 5.0932     | 5.1683     | 5.2256     |   2.5 | 28.93
+Bond    | 0.0044473  | 0.0048347  | 0.0058137  |   0.8 |  0.03
+Neigh   | 0.85262    | 0.8601     | 0.87438    |   0.9 |  4.81
+Comm    | 6.8164     | 6.8981     | 6.9859     |   2.6 | 38.61
+Output  | 0.0046884  | 0.0047093  | 0.0047322  |   0.0 |  0.03
+Modify  | 3.4107     | 3.4186     | 3.4248     |   0.3 | 19.13
+Other   |            | 1.512      |            |       |  8.47
+
+Nlocal:            216 ave         222 max         210 min
+Histogram: 2 0 0 0 0 0 0 0 0 2
+Nghost:          905.5 ave         911 max         899 min
+Histogram: 1 1 0 0 0 0 0 0 0 2
+Neighs:        4535.75 ave        4837 max        4218 min
+Histogram: 1 0 0 1 0 0 1 0 0 1
+
+Total # of neighbors = 18143
+Ave neighs/atom = 20.998843
+Ave special neighs/atom = 0
+Neighbor list builds = 2140
+Dangerous builds = 0
+Total wall time: 0:00:21
diff --git a/examples/granular/in.tableting.200 b/examples/granular/in.tableting.200
index 41723c72af..58fc8dde3e 100644
--- a/examples/granular/in.tableting.200
+++ b/examples/granular/in.tableting.200
@@ -28,19 +28,20 @@ variable dieHeight     equal 1e-2
 
 pair_style granular
 
-# mdr = E, nu, Y, gamma, psi_b, CoR
+# mdr = E, nu, Y, gamma, psi_b, damp
 variable YoungsModulus      equal 5e6
 variable YieldStress        equal 1.9e5
 variable PoissonsRatio      equal 0.4
 variable SurfaceEnergy      equal 2
 variable SurfaceEnergyWall  equal 0.0
-variable CoR                equal 0.5
 variable psi_b              equal 0.5
+variable damp               equal 0.2
+variable damp_type          equal 1
 
 # linear_history = k_t, x_gammat, mu_s
 variable kt         equal 2/7*${YoungsModulus}*${atomRadius}
 variable kt_wall    equal 2/7*${YoungsModulus}*${atomRadius}
-variable xgammat    equal 0.0 
+variable xgammat    equal 1.0
 variable mu_s       equal 0.7
 variable mu_s_wall  equal 0.1
 
@@ -49,14 +50,17 @@ variable mu_roll     equal 0.6
 variable k_roll      equal 2.25*${mu_roll}*${mu_roll}*${YoungsModulus}*${atomRadius}
 variable gamma_roll  equal 0.0
 
-pair_coeff * * mdr ${YoungsModulus} ${PoissonsRatio} ${YieldStress} ${SurfaceEnergy} ${psi_b} ${CoR} tangential linear_history ${kt} ${xgammat} ${mu_s} rolling sds ${k_roll} ${gamma_roll} ${mu_roll} damping none
+pair_coeff * * mdr ${YoungsModulus} ${PoissonsRatio} ${YieldStress} ${SurfaceEnergy} ${psi_b} ${damp} &
+    damping mdr ${damp_type} &
+    tangential linear_history ${kt} ${xgammat} ${mu_s} &
+    rolling sds ${k_roll} ${gamma_roll} ${mu_roll}
 
 ######################################### ADD DIE AND PUNCH WALLS ############################################
 
 variable disp_upper      equal 0.0
 variable disp_lower      equal 0.0
 
-variable wall_contact_string string "granular mdr ${YoungsModulus} ${PoissonsRatio} ${YieldStress} ${SurfaceEnergyWall} ${psi_b} ${CoR} tangential linear_history ${kt_wall} ${xgammat} ${mu_s_wall} rolling sds ${k_roll} ${gamma_roll} ${mu_roll} damping none"
+variable wall_contact_string string "granular mdr ${YoungsModulus} ${PoissonsRatio} ${YieldStress} ${SurfaceEnergyWall} ${psi_b} ${damp} damping mdr ${damp_type} tangential linear_history ${kt_wall} ${xgammat} ${mu_s_wall} rolling sds ${k_roll} ${gamma_roll} ${mu_roll}"
 
 variable dieHeight2 equal 2*${dieHeight}
 
@@ -73,7 +77,7 @@ variable avgUpperPunchForce equal c_avgUpperPunchForce
 compute avgLowerPunchForce all reduce sum f_lowerPunch[4]
 variable avgLowerPunchForce equal c_avgLowerPunchForce
 
-fix printFD all print 1 "${disp_upper} ${avgUpperPunchForce} ${avgLowerPunchForce}" file punch_force_disp_tableting200.csv screen no
+#fix printFD all print 1 "${disp_upper} ${avgUpperPunchForce} ${avgLowerPunchForce}" file punch_force_disp_tableting200.csv screen no
 
 ##################################### INTEGRATION AND GRAVITY #################################################
 
@@ -109,13 +113,13 @@ variable syy_ave equal c_sigmayy_ave
 variable szz_ave equal c_sigmazz_ave
 variable Vparticles equal c_Velas_sum
 
-fix log all print 1 "${sxx_ave} ${syy_ave} ${szz_ave} ${Vparticles}" file average_normal_stresses_tableting200.csv screen no
-dump dumpParticles all custom ${output_rate} tableting200.dump id type mass diameter x y z vx vy vz fx fy fz c_ke c_sigmaxx c_sigmayy c_sigmazz
+#fix log all print 1 "${sxx_ave} ${syy_ave} ${szz_ave} ${Vparticles}" file average_normal_stresses_tableting200.csv screen no
+#dump dumpParticles all custom ${output_rate} tableting200.dump id type mass diameter x y z vx vy vz fx fy fz c_ke c_sigmaxx c_sigmayy c_sigmazz
 #dump dumpParticlesVTK all vtk ${output_rate} post/particles_*.vtk id x y z fx fy fz vx vy vz c_ke radius c_sigmaxx c_sigmayy c_sigmazz
 
 ############################################## RUN SIMULATION #################################################
 
-variable upper_punch_stroke  equal 0.6733*${dieHeight}
+variable upper_punch_stroke  equal 0.7*${dieHeight}
 variable vel_upper           equal 0.25
 
 variable settling_steps      equal round(0.02/dt)
@@ -146,4 +150,4 @@ run ${ejection_steps}
 variable disp_lower  equal ${dieHeight}
 variable disp_upper  equal ${dieHeight}*0.9
 variable max_disp    equal ${dieRadius}*0.75
-run ${free_float_steps}
\ No newline at end of file
+run ${free_float_steps}
diff --git a/examples/granular/in.triaxial.compaction.12 b/examples/granular/in.triaxial.compaction.12
index eee9a2bfd8..0a35ab3c0a 100644
--- a/examples/granular/in.triaxial.compaction.12
+++ b/examples/granular/in.triaxial.compaction.12
@@ -1,7 +1,7 @@
 ############################### SIMULATION SETTINGS ###################################################
 
 atom_style      sphere 1
-atom_modify     map array 
+atom_modify     map array
 comm_modify vel yes
 units           si
 newton          off
@@ -24,20 +24,23 @@ variable atomRadius equal 0.5
 
 pair_style granular
 
-# mdr = E, nu, Y, gamma, psi_b, CoR 
+# mdr = E, nu, Y, gamma, psi_b, damp
 variable YoungsModulus  equal 1e9
 variable PoissonsRatio  equal 0.3
 variable YieldStress    equal 50e6
 variable SurfaceEnergy  equal 0.0
 variable psi_b          equal 0.5
-variable CoR            equal 0.5
+variable damp           equal 0.2
+variable damp_type      equal 1
 
 # linear_history = k_t, x_gamma,t, mu_s
 variable kt       equal 2/7*${YoungsModulus}*${atomRadius}
 variable xgammat  equal 0.0
 variable mu_s     equal 0.5
 
-pair_coeff * * mdr ${YoungsModulus} ${PoissonsRatio} ${YieldStress} ${SurfaceEnergy} ${psi_b} ${CoR} tangential linear_history ${kt} ${xgammat} ${mu_s} damping none
+pair_coeff * * mdr ${YoungsModulus} ${PoissonsRatio} ${YieldStress} ${SurfaceEnergy} ${psi_b} ${damp} &
+   damping mdr ${damp_type} &
+   tangential linear_history ${kt} ${xgammat} ${mu_s}
 
 ######################################### ADD IN PLANES ################################################
 
@@ -54,7 +57,7 @@ region plane_xz_neg plane 0 -${halfBoxWidth} 0 0 1 0 side in move NULL v_plane_d
 region plane_xy_pos plane 0 0 ${halfBoxWidth} 0 0 -1  side in move NULL NULL v_plane_disp_neg units box
 region plane_xy_neg plane 0 0 -${halfBoxWidth} 0 0 1 side in move NULL NULL v_plane_disp units box
 
-variable wall_contact_string string "granular mdr ${YoungsModulus} ${PoissonsRatio} ${YieldStress} ${SurfaceEnergy} ${psi_b} ${CoR} tangential linear_history ${kt} ${xgammat} ${mu_s} damping none"
+variable wall_contact_string string "granular mdr ${YoungsModulus} ${PoissonsRatio} ${YieldStress} ${SurfaceEnergy} ${psi_b} ${damp} damping mdr ${damp_type} tangential linear_history ${kt} ${xgammat} ${mu_s} "
 
 fix plane_yz_pos all wall/gran/region ${wall_contact_string} region plane_yz_pos contacts
 fix plane_yz_neg all wall/gran/region ${wall_contact_string} region plane_yz_neg contacts
@@ -72,12 +75,12 @@ variable plane_xz_neg_force equal c_plane_xz_neg_force
 compute plane_yz_neg_force all reduce sum f_plane_yz_neg[2]
 variable plane_yz_neg_force equal c_plane_yz_neg_force
 
-fix print1 all print 1 "${plane_disp} ${plane_xy_neg_force} ${plane_xz_neg_force} ${plane_yz_neg_force}" file force_disp_triaxial12.csv screen no
+#fix print1 all print 1 "${plane_disp} ${plane_xy_neg_force} ${plane_xz_neg_force} ${plane_yz_neg_force}" file force_disp_triaxial12.csv screen no
 
-######################################## SCREEN OUTPUT  #################################################### 
+######################################## SCREEN OUTPUT  ####################################################
 
 compute       1 all erotate/sphere
-thermo_style  custom dt step atoms ke c_1 vol 
+thermo_style  custom dt step atoms ke c_1 vol
 thermo        100
 thermo_modify lost ignore norm no
 
@@ -89,8 +92,8 @@ variable compression_steps equal round(${disp_max}/${ddisp})
 variable output_rate equal round(${compression_steps}/100)
 
 ##################################### SET UP DUMP OUTPUTS  ####################################################
- 
-dump dumpParticles all custom ${output_rate} triaxial_compaction_12.dump id type mass x y z vx vy vz fx fy fz radius
+
+#dump dumpParticles all custom ${output_rate} triaxial_compaction_12.dump id type mass x y z vx vy vz fx fy fz radius
 #dump   dmp all vtk ${output_rate} post/triaxial12particles_*.vtk id type mass x y z vx vy vz fx fy fz radius
 
 #################################### COMPRESS THE PARTICLES  ##################################################
@@ -101,7 +104,7 @@ run 0
 compute Ac_1_12 particles_1_12 pair/local p13 cutoff radius
 compute Ac_1_12_sum particles_1_12 reduce sum c_Ac_1_12 inputs local
 variable Ac_1_12 equal c_Ac_1_12_sum
-fix logArea all print 100 "${plane_disp} ${Ac_1_12}" file pair_1_12_contact_area_triaxial12.csv screen no
+#fix logArea all print 100 "${plane_disp} ${Ac_1_12}" file pair_1_12_contact_area_triaxial12.csv screen no
 
 variable plane_disp equal ${ddisp}*elapsed
 variable plane_disp_neg equal -${ddisp}*elapsed
diff --git a/examples/granular/log.4Feb25.tableting.200.g++.1 b/examples/granular/log.4Feb25.tableting.200.g++.1
new file mode 100644
index 0000000000..c164f3dbee
--- /dev/null
+++ b/examples/granular/log.4Feb25.tableting.200.g++.1
@@ -0,0 +1,795 @@
+LAMMPS (4 Feb 2025 - Development - patch_5May2020-22356-g0c29a0a0c9-modified)
+##################################### SIMULATION SETTINGS ###################################################
+
+atom_style sphere 1
+atom_modify map array
+comm_modify vel yes
+units si
+newton off
+neighbor      1.0e-3 bin
+neigh_modify every 10 delay 60 check no
+timestep 4e-6
+#processors 2 2 1
+
+############################## SIMULATION BOUNDING BOX AND INSERT PARTICLES #################################
+
+boundary f f f
+read_data spheres200.data
+Reading data file ...
+  orthogonal box = (-0.005 -0.005 -0.001) to (0.005 0.005 0.02)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  200 atoms
+  read_data CPU = 0.024 seconds
+
+#################################### ADD DIE AND ATOM PARAMETERIZATION ######################################
+
+variable atomRadius    equal 0.44e-3*1.25
+variable atomDiameter  equal 2*${atomRadius}
+variable atomDiameter  equal 2*0.00055
+variable atomDensity   equal 1560
+variable atomMassAvg   equal ${atomDensity}*4.0/3.0*PI*${atomRadius}^3.0
+variable atomMassAvg   equal 1560*4.0/3.0*PI*${atomRadius}^3.0
+variable atomMassAvg   equal 1560*4.0/3.0*PI*0.00055^3.0
+variable dieRadius     equal 4e-3
+variable dieHeight     equal 1e-2
+
+############################## PARTICLE MATERIAL PROPERTIES AND FORCE MODEL ##################################
+
+pair_style granular
+
+# mdr = E, nu, Y, gamma, psi_b, damp
+variable YoungsModulus      equal 5e6
+variable YieldStress        equal 1.9e5
+variable PoissonsRatio      equal 0.4
+variable SurfaceEnergy      equal 2
+variable SurfaceEnergyWall  equal 0.0
+variable psi_b              equal 0.5
+variable damp               equal 0.2
+variable damp_type          equal 1
+
+# linear_history = k_t, x_gammat, mu_s
+variable kt         equal 2/7*${YoungsModulus}*${atomRadius}
+variable kt         equal 2/7*5000000*${atomRadius}
+variable kt         equal 2/7*5000000*0.00055
+variable kt_wall    equal 2/7*${YoungsModulus}*${atomRadius}
+variable kt_wall    equal 2/7*5000000*${atomRadius}
+variable kt_wall    equal 2/7*5000000*0.00055
+variable xgammat    equal 1.0
+variable mu_s       equal 0.7
+variable mu_s_wall  equal 0.1
+
+# sds = mu_roll, k_roll, gamma_roll
+variable mu_roll     equal 0.6
+variable k_roll      equal 2.25*${mu_roll}*${mu_roll}*${YoungsModulus}*${atomRadius}
+variable k_roll      equal 2.25*0.6*${mu_roll}*${YoungsModulus}*${atomRadius}
+variable k_roll      equal 2.25*0.6*0.6*${YoungsModulus}*${atomRadius}
+variable k_roll      equal 2.25*0.6*0.6*5000000*${atomRadius}
+variable k_roll      equal 2.25*0.6*0.6*5000000*0.00055
+variable gamma_roll  equal 0.0
+
+pair_coeff * * mdr ${YoungsModulus} ${PoissonsRatio} ${YieldStress} ${SurfaceEnergy} ${psi_b} ${damp}     damping mdr ${damp_type}     tangential linear_history ${kt} ${xgammat} ${mu_s}     rolling sds ${k_roll} ${gamma_roll} ${mu_roll}
+pair_coeff * * mdr 5000000 ${PoissonsRatio} ${YieldStress} ${SurfaceEnergy} ${psi_b} ${damp}     damping mdr ${damp_type}     tangential linear_history ${kt} ${xgammat} ${mu_s}     rolling sds ${k_roll} ${gamma_roll} ${mu_roll}
+pair_coeff * * mdr 5000000 0.4 ${YieldStress} ${SurfaceEnergy} ${psi_b} ${damp}     damping mdr ${damp_type}     tangential linear_history ${kt} ${xgammat} ${mu_s}     rolling sds ${k_roll} ${gamma_roll} ${mu_roll}
+pair_coeff * * mdr 5000000 0.4 190000 ${SurfaceEnergy} ${psi_b} ${damp}     damping mdr ${damp_type}     tangential linear_history ${kt} ${xgammat} ${mu_s}     rolling sds ${k_roll} ${gamma_roll} ${mu_roll}
+pair_coeff * * mdr 5000000 0.4 190000 2 ${psi_b} ${damp}     damping mdr ${damp_type}     tangential linear_history ${kt} ${xgammat} ${mu_s}     rolling sds ${k_roll} ${gamma_roll} ${mu_roll}
+pair_coeff * * mdr 5000000 0.4 190000 2 0.5 ${damp}     damping mdr ${damp_type}     tangential linear_history ${kt} ${xgammat} ${mu_s}     rolling sds ${k_roll} ${gamma_roll} ${mu_roll}
+pair_coeff * * mdr 5000000 0.4 190000 2 0.5 0.2     damping mdr ${damp_type}     tangential linear_history ${kt} ${xgammat} ${mu_s}     rolling sds ${k_roll} ${gamma_roll} ${mu_roll}
+pair_coeff * * mdr 5000000 0.4 190000 2 0.5 0.2     damping mdr 1     tangential linear_history ${kt} ${xgammat} ${mu_s}     rolling sds ${k_roll} ${gamma_roll} ${mu_roll}
+pair_coeff * * mdr 5000000 0.4 190000 2 0.5 0.2     damping mdr 1     tangential linear_history 785.714285714286 ${xgammat} ${mu_s}     rolling sds ${k_roll} ${gamma_roll} ${mu_roll}
+pair_coeff * * mdr 5000000 0.4 190000 2 0.5 0.2     damping mdr 1     tangential linear_history 785.714285714286 1 ${mu_s}     rolling sds ${k_roll} ${gamma_roll} ${mu_roll}
+pair_coeff * * mdr 5000000 0.4 190000 2 0.5 0.2     damping mdr 1     tangential linear_history 785.714285714286 1 0.7     rolling sds ${k_roll} ${gamma_roll} ${mu_roll}
+pair_coeff * * mdr 5000000 0.4 190000 2 0.5 0.2     damping mdr 1     tangential linear_history 785.714285714286 1 0.7     rolling sds 2227.5 ${gamma_roll} ${mu_roll}
+pair_coeff * * mdr 5000000 0.4 190000 2 0.5 0.2     damping mdr 1     tangential linear_history 785.714285714286 1 0.7     rolling sds 2227.5 0 ${mu_roll}
+pair_coeff * * mdr 5000000 0.4 190000 2 0.5 0.2     damping mdr 1     tangential linear_history 785.714285714286 1 0.7     rolling sds 2227.5 0 0.6
+
+######################################### ADD DIE AND PUNCH WALLS ############################################
+
+variable disp_upper      equal 0.0
+variable disp_lower      equal 0.0
+
+variable wall_contact_string string "granular mdr ${YoungsModulus} ${PoissonsRatio} ${YieldStress} ${SurfaceEnergyWall} ${psi_b} ${damp} damping mdr ${damp_type} tangential linear_history ${kt_wall} ${xgammat} ${mu_s_wall} rolling sds ${k_roll} ${gamma_roll} ${mu_roll}"
+granular mdr 5000000 ${PoissonsRatio} ${YieldStress} ${SurfaceEnergyWall} ${psi_b} ${damp} damping mdr ${damp_type} tangential linear_history ${kt_wall} ${xgammat} ${mu_s_wall} rolling sds ${k_roll} ${gamma_roll} ${mu_roll}
+granular mdr 5000000 0.4 ${YieldStress} ${SurfaceEnergyWall} ${psi_b} ${damp} damping mdr ${damp_type} tangential linear_history ${kt_wall} ${xgammat} ${mu_s_wall} rolling sds ${k_roll} ${gamma_roll} ${mu_roll}
+granular mdr 5000000 0.4 190000 ${SurfaceEnergyWall} ${psi_b} ${damp} damping mdr ${damp_type} tangential linear_history ${kt_wall} ${xgammat} ${mu_s_wall} rolling sds ${k_roll} ${gamma_roll} ${mu_roll}
+granular mdr 5000000 0.4 190000 0 ${psi_b} ${damp} damping mdr ${damp_type} tangential linear_history ${kt_wall} ${xgammat} ${mu_s_wall} rolling sds ${k_roll} ${gamma_roll} ${mu_roll}
+granular mdr 5000000 0.4 190000 0 0.5 ${damp} damping mdr ${damp_type} tangential linear_history ${kt_wall} ${xgammat} ${mu_s_wall} rolling sds ${k_roll} ${gamma_roll} ${mu_roll}
+granular mdr 5000000 0.4 190000 0 0.5 0.2 damping mdr ${damp_type} tangential linear_history ${kt_wall} ${xgammat} ${mu_s_wall} rolling sds ${k_roll} ${gamma_roll} ${mu_roll}
+granular mdr 5000000 0.4 190000 0 0.5 0.2 damping mdr 1 tangential linear_history ${kt_wall} ${xgammat} ${mu_s_wall} rolling sds ${k_roll} ${gamma_roll} ${mu_roll}
+granular mdr 5000000 0.4 190000 0 0.5 0.2 damping mdr 1 tangential linear_history 785.714285714286 ${xgammat} ${mu_s_wall} rolling sds ${k_roll} ${gamma_roll} ${mu_roll}
+granular mdr 5000000 0.4 190000 0 0.5 0.2 damping mdr 1 tangential linear_history 785.714285714286 1 ${mu_s_wall} rolling sds ${k_roll} ${gamma_roll} ${mu_roll}
+granular mdr 5000000 0.4 190000 0 0.5 0.2 damping mdr 1 tangential linear_history 785.714285714286 1 0.1 rolling sds ${k_roll} ${gamma_roll} ${mu_roll}
+granular mdr 5000000 0.4 190000 0 0.5 0.2 damping mdr 1 tangential linear_history 785.714285714286 1 0.1 rolling sds 2227.5 ${gamma_roll} ${mu_roll}
+granular mdr 5000000 0.4 190000 0 0.5 0.2 damping mdr 1 tangential linear_history 785.714285714286 1 0.1 rolling sds 2227.5 0 ${mu_roll}
+granular mdr 5000000 0.4 190000 0 0.5 0.2 damping mdr 1 tangential linear_history 785.714285714286 1 0.1 rolling sds 2227.5 0 0.6
+
+variable dieHeight2 equal 2*${dieHeight}
+variable dieHeight2 equal 2*0.01
+
+region lowerPunch plane 0 0 0 0 0 1 side in units box move NULL NULL v_disp_lower units box
+region upperPunch plane 0 0 ${dieHeight} 0 0 -1 side in move NULL NULL v_disp_upper units box
+region upperPunch plane 0 0 0.01 0 0 -1 side in move NULL NULL v_disp_upper units box
+region die cylinder z 0 0 ${dieRadius} 0 ${dieHeight2} side in units box
+region die cylinder z 0 0 0.004 0 ${dieHeight2} side in units box
+region die cylinder z 0 0 0.004 0 0.02 side in units box
+
+fix lowerPunch all wall/gran/region ${wall_contact_string} region lowerPunch contacts
+fix lowerPunch all wall/gran/region granular mdr 5000000 0.4 190000 0 0.5 0.2 damping mdr 1 tangential linear_history 785.714285714286 1 0.1 rolling sds 2227.5 0 0.6 region lowerPunch contacts
+fix upperPunch all wall/gran/region ${wall_contact_string} region upperPunch contacts
+fix upperPunch all wall/gran/region granular mdr 5000000 0.4 190000 0 0.5 0.2 damping mdr 1 tangential linear_history 785.714285714286 1 0.1 rolling sds 2227.5 0 0.6 region upperPunch contacts
+fix die all wall/gran/region ${wall_contact_string} region die contacts
+fix die all wall/gran/region granular mdr 5000000 0.4 190000 0 0.5 0.2 damping mdr 1 tangential linear_history 785.714285714286 1 0.1 rolling sds 2227.5 0 0.6 region die contacts
+
+compute avgUpperPunchForce all reduce sum f_upperPunch[4]
+variable avgUpperPunchForce equal c_avgUpperPunchForce
+compute avgLowerPunchForce all reduce sum f_lowerPunch[4]
+variable avgLowerPunchForce equal c_avgLowerPunchForce
+
+#fix printFD all print 1 "${disp_upper} ${avgUpperPunchForce} ${avgLowerPunchForce}" file punch_force_disp_tableting200.csv screen no
+
+##################################### INTEGRATION AND GRAVITY #################################################
+
+fix 1 all nve/sphere
+fix grav all gravity 9.81 vector 0 0 -1
+
+########################################### SCREEN OUTPUT  ####################################################
+
+compute       1 all erotate/sphere
+thermo_style  custom dt step atoms ke vol v_disp_upper
+thermo        100
+thermo_modify lost ignore norm no
+
+##################################### SET UP DUMP OUTPUTS  ####################################################
+
+compute ke all ke/atom
+variable output_rate equal round(1e-3/dt)
+
+run 0
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Your simulation uses code contributions which should be cited:
+
+- MDR contact model command: (i) https://doi.org/10.1016/j.jmps.2023.105492 || (ii) https://doi.org/10.1016/j.jmps.2023.105493 || (iii) https://doi.org/10.31224/4289
+
+@Article{zunker2024mechanicallyI,
+ author =  {Zunker, William and Kamrin, Ken},
+ title =   {A mechanically-derived contact model for adhesive elastic-perfectly plastic particles,
+            Part I: Utilizing the method of dimensionality reduction},
+ journal = {Journal of the Mechanics and Physics of Solids},
+ year =    {2024},
+ volume =  {183},
+ pages =   {105492},
+}
+
+@Article{zunker2024mechanicallyII,
+ author =  {Zunker, William and Kamrin, Ken},
+ title =   {A mechanically-derived contact model for adhesive elastic-perfectly plastic particles,
+            Part II: Contact under high compaction—modeling a bulk elastic response},
+ journal = {Journal of the Mechanics and Physics of Solids},
+ year =    {2024},
+ volume =  {183},
+ pages =   {105493},
+}
+
+@Article{zunker2025experimentally,
+ author =  {Zunker, William and Dunatunga, Sachith and Thakur, Subhash and Tang, Pingjun and Kamrin, Ken},
+ title =   {Experimentally validated DEM for large deformation powder compaction:
+            mechanically-derived contact model and screening of non-physical contacts},
+ journal = {Powder Technology},
+ year =    {2025},
+ pages =   {120972},
+}
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
+Neighbor list info ...
+  update: every = 10 steps, delay = 60 steps, check = no
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 0.002318
+  ghost atom cutoff = 0.002318
+  binsize = 0.001159, bins = 9 9 19
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair granular, perpetual
+      attributes: half, newton off, size, history
+      pair build: half/size/bin/atomonly/newtoff
+      stencil: full/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 72.3 | 72.3 | 72.3 Mbytes
+      Dt          Step        Atoms        KinEng         Volume      v_disp_upper 
+ 4e-06                  0         200   0              2.1e-06        0            
+Loop time of 7.43e-07 on 1 procs for 0 steps with 200 atoms
+
+134.6% CPU use with 1 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0          | 0          | 0          |   0.0 |  0.00
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0          | 0          | 0          |   0.0 |  0.00
+Output  | 0          | 0          | 0          |   0.0 |  0.00
+Modify  | 0          | 0          | 0          |   0.0 |  0.00
+Other   |            | 7.43e-07   |            |       |100.00
+
+Nlocal:            200 ave         200 max         200 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:              0 ave           0 max           0 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:           1341 ave        1341 max        1341 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 1341
+Ave neighs/atom = 6.705
+Neighbor list builds = 0
+Dangerous builds not checked
+
+compute sigmaxx all property/atom d_sigmaxx
+compute sigmayy all property/atom d_sigmayy
+compute sigmazz all property/atom d_sigmazz
+compute Velas all property/atom d_Velas
+
+compute sigmaxx_ave all reduce ave c_sigmaxx
+compute sigmayy_ave all reduce ave c_sigmayy
+compute sigmazz_ave all reduce ave c_sigmazz
+compute Velas_sum all reduce sum c_Velas
+
+variable sxx_ave equal c_sigmaxx_ave
+variable syy_ave equal c_sigmayy_ave
+variable szz_ave equal c_sigmazz_ave
+variable Vparticles equal c_Velas_sum
+
+#fix log all print 1 "${sxx_ave} ${syy_ave} ${szz_ave} ${Vparticles}" file average_normal_stresses_tableting200.csv screen no
+#dump dumpParticles all custom ${output_rate} tableting200.dump id type mass diameter x y z vx vy vz fx fy fz c_ke c_sigmaxx c_sigmayy c_sigmazz
+#dump dumpParticlesVTK all vtk ${output_rate} post/particles_*.vtk id x y z fx fy fz vx vy vz c_ke radius c_sigmaxx c_sigmayy c_sigmazz
+
+############################################## RUN SIMULATION #################################################
+
+variable upper_punch_stroke  equal 0.7*${dieHeight}
+variable upper_punch_stroke  equal 0.7*0.01
+variable vel_upper           equal 0.25
+
+variable settling_steps      equal round(0.02/dt)
+variable compression_steps   equal 2*round(${upper_punch_stroke}/${vel_upper}/dt)
+variable compression_steps   equal 2*round(0.007/${vel_upper}/dt)
+variable compression_steps   equal 2*round(0.007/0.25/dt)
+variable ejection_steps      equal ${compression_steps}
+variable ejection_steps      equal 14000
+variable free_float_steps    equal round(0.02/dt)
+
+##### SETTLING #####
+
+run ${settling_steps}
+run 5000
+Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
+Per MPI rank memory allocation (min/avg/max) = 72.3 | 72.3 | 72.3 Mbytes
+      Dt          Step        Atoms        KinEng         Volume      v_disp_upper 
+ 4e-06                  0         200   0              2.1e-06        0            
+ 4e-06                100         200   1.5945503e-09  2.1e-06        0            
+ 4e-06                200         200   6.3750614e-09  2.1e-06        0            
+ 4e-06                300         200   1.4225143e-08  2.1e-06        0            
+ 4e-06                400         200   2.5255561e-08  2.1e-06        0            
+ 4e-06                500         200   3.947508e-08   2.1e-06        0            
+ 4e-06                600         200   5.6839079e-08  2.1e-06        0            
+ 4e-06                700         200   7.7346494e-08  2.1e-06        0            
+ 4e-06                800         200   1.0075645e-07  2.1e-06        0            
+ 4e-06                900         200   1.2660105e-07  2.1e-06        0            
+ 4e-06               1000         200   1.5571123e-07  2.1e-06        0            
+ 4e-06               1100         200   1.8785107e-07  2.1e-06        0            
+ 4e-06               1200         200   2.2200974e-07  2.1e-06        0            
+ 4e-06               1300         200   2.6009223e-07  2.1e-06        0            
+ 4e-06               1400         200   3.0148646e-07  2.1e-06        0            
+ 4e-06               1500         200   3.4269724e-07  2.1e-06        0            
+ 4e-06               1600         200   3.8502938e-07  2.1e-06        0            
+ 4e-06               1700         200   4.2763891e-07  2.1e-06        0            
+ 4e-06               1800         200   4.6779321e-07  2.1e-06        0            
+ 4e-06               1900         200   5.1285578e-07  2.1e-06        0            
+ 4e-06               2000         200   5.6630973e-07  2.1e-06        0            
+ 4e-06               2100         200   6.1904302e-07  2.1e-06        0            
+ 4e-06               2200         200   6.7462868e-07  2.1e-06        0            
+ 4e-06               2300         200   7.3066636e-07  2.1e-06        0            
+ 4e-06               2400         200   7.7407334e-07  2.1e-06        0            
+ 4e-06               2500         200   8.3353557e-07  2.1e-06        0            
+ 4e-06               2600         200   9.0017986e-07  2.1e-06        0            
+ 4e-06               2700         200   9.5154909e-07  2.1e-06        0            
+ 4e-06               2800         200   1.0110977e-06  2.1e-06        0            
+ 4e-06               2900         200   1.0661364e-06  2.1e-06        0            
+ 4e-06               3000         200   1.1226841e-06  2.1e-06        0            
+ 4e-06               3100         200   1.1703917e-06  2.1e-06        0            
+ 4e-06               3200         200   1.2254551e-06  2.1e-06        0            
+ 4e-06               3300         200   1.2239859e-06  2.1e-06        0            
+ 4e-06               3400         200   1.273437e-06   2.1e-06        0            
+ 4e-06               3500         200   1.3357598e-06  2.1e-06        0            
+ 4e-06               3600         200   1.3949477e-06  2.1e-06        0            
+ 4e-06               3700         200   1.459988e-06   2.1e-06        0            
+ 4e-06               3800         200   1.5053806e-06  2.1e-06        0            
+ 4e-06               3900         200   1.4952453e-06  2.1e-06        0            
+ 4e-06               4000         200   1.5037857e-06  2.1e-06        0            
+ 4e-06               4100         200   1.5225204e-06  2.1e-06        0            
+ 4e-06               4200         200   1.5375323e-06  2.1e-06        0            
+ 4e-06               4300         200   1.5552328e-06  2.1e-06        0            
+ 4e-06               4400         200   1.581097e-06   2.1e-06        0            
+ 4e-06               4500         200   1.6066427e-06  2.1e-06        0            
+ 4e-06               4600         200   1.6061944e-06  2.1e-06        0            
+ 4e-06               4700         200   1.6110891e-06  2.1e-06        0            
+ 4e-06               4800         200   1.6072997e-06  2.1e-06        0            
+ 4e-06               4900         200   1.5907992e-06  2.1e-06        0            
+ 4e-06               5000         200   1.5501104e-06  2.1e-06        0            
+Loop time of 0.592588 on 1 procs for 5000 steps with 200 atoms
+
+99.4% CPU use with 1 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.20465    | 0.20465    | 0.20465    |   0.0 | 34.53
+Neigh   | 0.013252   | 0.013252   | 0.013252   |   0.0 |  2.24
+Comm    | 0.00037594 | 0.00037594 | 0.00037594 |   0.0 |  0.06
+Output  | 0.00037454 | 0.00037454 | 0.00037454 |   0.0 |  0.06
+Modify  | 0.37295    | 0.37295    | 0.37295    |   0.0 | 62.93
+Other   |            | 0.0009914  |            |       |  0.17
+
+Nlocal:            200 ave         200 max         200 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:              0 ave           0 max           0 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:           1632 ave        1632 max        1632 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 1632
+Ave neighs/atom = 8.16
+Neighbor list builds = 83
+Dangerous builds not checked
+
+##### Compression & Release #####
+
+variable punch_frequency  equal PI/2/(dt*${compression_steps}/2)
+variable punch_frequency  equal PI/2/(dt*14000/2)
+variable disp_upper       equal -${upper_punch_stroke}*sin(${punch_frequency}*elapsed*dt)
+variable disp_upper       equal -0.007*sin(${punch_frequency}*elapsed*dt)
+variable disp_upper       equal -0.007*sin(56.0998688141035*elapsed*dt)
+variable short_release    equal round(${compression_steps}*1.0)
+variable short_release    equal round(14000*1.0)
+run ${short_release}
+run 14000
+Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
+Per MPI rank memory allocation (min/avg/max) = 72.3 | 72.3 | 72.3 Mbytes
+      Dt          Step        Atoms        KinEng         Volume      v_disp_upper 
+ 4e-06               5000         200   1.5501104e-06  2.1e-06        0            
+ 4e-06               5100         200   1.5146077e-06  2.1e-06       -0.00015706645
+ 4e-06               5200         200   1.4902158e-06  2.1e-06       -0.00031405381
+ 4e-06               5300         200   1.3871134e-06  2.1e-06       -0.00047088304
+ 4e-06               5400         200   1.3531184e-06  2.1e-06       -0.00062747516
+ 4e-06               5500         200   1.3154278e-06  2.1e-06       -0.00078375133
+ 4e-06               5600         200   1.2461265e-06  2.1e-06       -0.00093963286
+ 4e-06               5700         200   1.1840322e-06  2.1e-06       -0.0010950413 
+ 4e-06               5800         200   1.083844e-06   2.1e-06       -0.0012498983 
+ 4e-06               5900         200   9.8572649e-07  2.1e-06       -0.0014041259 
+ 4e-06               6000         200   8.6605656e-07  2.1e-06       -0.0015576465 
+ 4e-06               6100         200   7.6694391e-07  2.1e-06       -0.0017103828 
+ 4e-06               6200         200   7.1292979e-07  2.1e-06       -0.0018622579 
+ 4e-06               6300         200   6.475067e-07   2.1e-06       -0.0020131953 
+ 4e-06               6400         200   5.378202e-07   2.1e-06       -0.002163119  
+ 4e-06               6500         200   4.5668598e-07  2.1e-06       -0.0023119534 
+ 4e-06               6600         200   3.1208987e-07  2.1e-06       -0.0024596238 
+ 4e-06               6700         200   2.2996407e-07  2.1e-06       -0.0026060556 
+ 4e-06               6800         200   1.658813e-07   2.1e-06       -0.0027511752 
+ 4e-06               6900         200   1.4495016e-07  2.1e-06       -0.0028949095 
+ 4e-06               7000         200   1.6172966e-07  2.1e-06       -0.0030371862 
+ 4e-06               7100         200   4.6620591e-07  2.1e-06       -0.0031779335 
+ 4e-06               7200         200   6.8121833e-07  2.1e-06       -0.0033170806 
+ 4e-06               7300         200   1.5506154e-06  2.1e-06       -0.0034545575 
+ 4e-06               7400         200   2.5919669e-06  2.1e-06       -0.0035902949 
+ 4e-06               7500         200   2.9403576e-06  2.1e-06       -0.0037242245 
+ 4e-06               7600         200   2.7726732e-06  2.1e-06       -0.0038562789 
+ 4e-06               7700         200   2.6586936e-06  2.1e-06       -0.0039863915 
+ 4e-06               7800         200   2.7059447e-06  2.1e-06       -0.0041144968 
+ 4e-06               7900         200   2.8454301e-06  2.1e-06       -0.0042405303 
+ 4e-06               8000         200   2.7747574e-06  2.1e-06       -0.0043644286 
+ 4e-06               8100         200   2.6329747e-06  2.1e-06       -0.0044861293 
+ 4e-06               8200         200   2.3654294e-06  2.1e-06       -0.0046055711 
+ 4e-06               8300         200   2.616465e-06   2.1e-06       -0.0047226938 
+ 4e-06               8400         200   2.6920973e-06  2.1e-06       -0.0048374385 
+ 4e-06               8500         200   2.6041158e-06  2.1e-06       -0.0049497475 
+ 4e-06               8600         200   1.8236056e-06  2.1e-06       -0.005059564  
+ 4e-06               8700         200   2.1290462e-06  2.1e-06       -0.005166833  
+ 4e-06               8800         200   1.8723934e-06  2.1e-06       -0.0052715003 
+ 4e-06               8900         200   1.7196474e-06  2.1e-06       -0.0053735132 
+ 4e-06               9000         200   1.558001e-06   2.1e-06       -0.0054728204 
+ 4e-06               9100         200   1.1714433e-06  2.1e-06       -0.0055693718 
+ 4e-06               9200         200   1.7973167e-06  2.1e-06       -0.005663119  
+ 4e-06               9300         200   1.4951874e-06  2.1e-06       -0.0057540145 
+ 4e-06               9400         200   1.197557e-06   2.1e-06       -0.0058420128 
+ 4e-06               9500         200   1.1638085e-06  2.1e-06       -0.0059270694 
+ 4e-06               9600         200   1.2591061e-06  2.1e-06       -0.0060091416 
+ 4e-06               9700         200   1.2747299e-06  2.1e-06       -0.0060881879 
+ 4e-06               9800         200   1.2424243e-06  2.1e-06       -0.0061641687 
+ 4e-06               9900         200   1.1624586e-06  2.1e-06       -0.0062370457 
+ 4e-06              10000         200   1.0724272e-06  2.1e-06       -0.0063067821 
+ 4e-06              10100         200   1.0806622e-06  2.1e-06       -0.0063733428 
+ 4e-06              10200         200   9.2046484e-07  2.1e-06       -0.0064366944 
+ 4e-06              10300         200   8.1801156e-07  2.1e-06       -0.0064968049 
+ 4e-06              10400         200   7.74927e-07    2.1e-06       -0.0065536441 
+ 4e-06              10500         200   6.85447e-07    2.1e-06       -0.0066071833 
+ 4e-06              10600         200   5.4693931e-07  2.1e-06       -0.0066573956 
+ 4e-06              10700         200   4.5275522e-07  2.1e-06       -0.0067042557 
+ 4e-06              10800         200   4.2807826e-07  2.1e-06       -0.00674774   
+ 4e-06              10900         200   3.5676739e-07  2.1e-06       -0.0067878266 
+ 4e-06              11000         200   2.9448839e-07  2.1e-06       -0.0068244954 
+ 4e-06              11100         200   2.7397196e-07  2.1e-06       -0.0068577278 
+ 4e-06              11200         200   1.8313029e-07  2.1e-06       -0.0068875071 
+ 4e-06              11300         200   1.4616679e-07  2.1e-06       -0.0069138184 
+ 4e-06              11400         200   1.0916404e-07  2.1e-06       -0.0069366483 
+ 4e-06              11500         200   7.4608897e-08  2.1e-06       -0.0069559855 
+ 4e-06              11600         200   4.9799693e-08  2.1e-06       -0.0069718201 
+ 4e-06              11700         200   2.996701e-08   2.1e-06       -0.0069841441 
+ 4e-06              11800         200   1.1810054e-08  2.1e-06       -0.0069929515 
+ 4e-06              11900         200   3.9455661e-09  2.1e-06       -0.0069982376 
+ 4e-06              12000         200   7.0818836e-11  2.1e-06       -0.007        
+ 4e-06              12100         200   3.989114e-09   2.1e-06       -0.0069982376 
+ 4e-06              12200         200   1.0481589e-08  2.1e-06       -0.0069929515 
+ 4e-06              12300         200   2.3561653e-08  2.1e-06       -0.0069841441 
+ 4e-06              12400         200   4.1819363e-08  2.1e-06       -0.0069718201 
+ 4e-06              12500         200   6.5328826e-08  2.1e-06       -0.0069559855 
+ 4e-06              12600         200   9.3738095e-08  2.1e-06       -0.0069366483 
+ 4e-06              12700         200   1.3058219e-07  2.1e-06       -0.0069138184 
+ 4e-06              12800         200   1.7668602e-07  2.1e-06       -0.0068875071 
+ 4e-06              12900         200   2.1482809e-07  2.1e-06       -0.0068577278 
+ 4e-06              13000         200   2.7185589e-07  2.1e-06       -0.0068244954 
+ 4e-06              13100         200   3.3577426e-07  2.1e-06       -0.0067878266 
+ 4e-06              13200         200   3.9749034e-07  2.1e-06       -0.00674774   
+ 4e-06              13300         200   5.0743398e-07  2.1e-06       -0.0067042557 
+ 4e-06              13400         200   5.6629069e-07  2.1e-06       -0.0066573956 
+ 4e-06              13500         200   5.9092105e-07  2.1e-06       -0.0066071833 
+ 4e-06              13600         200   7.313638e-07   2.1e-06       -0.0065536441 
+ 4e-06              13700         200   1.0954352e-06  2.1e-06       -0.0064968049 
+ 4e-06              13800         200   7.1637332e-07  2.1e-06       -0.0064366944 
+ 4e-06              13900         200   8.5398051e-07  2.1e-06       -0.0063733428 
+ 4e-06              14000         200   1.0429888e-06  2.1e-06       -0.0063067821 
+ 4e-06              14100         200   1.6673022e-07  2.1e-06       -0.0062370457 
+ 4e-06              14200         200   2.0206568e-08  2.1e-06       -0.0061641687 
+ 4e-06              14300         200   5.6062261e-09  2.1e-06       -0.0060881879 
+ 4e-06              14400         200   4.5198973e-09  2.1e-06       -0.0060091416 
+ 4e-06              14500         200   2.5522353e-09  2.1e-06       -0.0059270694 
+ 4e-06              14600         200   9.091094e-10   2.1e-06       -0.0058420128 
+ 4e-06              14700         200   1.3992806e-10  2.1e-06       -0.0057540145 
+ 4e-06              14800         200   1.0208666e-11  2.1e-06       -0.005663119  
+ 4e-06              14900         200   8.4078334e-11  2.1e-06       -0.0055693718 
+ 4e-06              15000         200   1.2567311e-10  2.1e-06       -0.0054728204 
+ 4e-06              15100         200   5.2285722e-10  2.1e-06       -0.0053735132 
+ 4e-06              15200         200   1.5839179e-10  2.1e-06       -0.0052715003 
+ 4e-06              15300         200   3.2283374e-11  2.1e-06       -0.005166833  
+ 4e-06              15400         200   2.9516435e-12  2.1e-06       -0.005059564  
+ 4e-06              15500         200   1.0302118e-11  2.1e-06       -0.0049497475 
+ 4e-06              15600         200   1.7289975e-11  2.1e-06       -0.0048374385 
+ 4e-06              15700         200   1.4850209e-11  2.1e-06       -0.0047226938 
+ 4e-06              15800         200   8.0260964e-12  2.1e-06       -0.0046055711 
+ 4e-06              15900         200   2.616591e-12   2.1e-06       -0.0044861293 
+ 4e-06              16000         200   3.0793261e-13  2.1e-06       -0.0043644286 
+ 4e-06              16100         200   4.9187696e-13  2.1e-06       -0.0042405303 
+ 4e-06              16200         200   3.9849142e-13  2.1e-06       -0.0041144968 
+ 4e-06              16300         200   5.2823345e-13  2.1e-06       -0.0039863915 
+ 4e-06              16400         200   3.9902725e-13  2.1e-06       -0.0038562789 
+ 4e-06              16500         200   1.9259043e-13  2.1e-06       -0.0037242245 
+ 4e-06              16600         200   5.3557316e-14  2.1e-06       -0.0035902949 
+ 4e-06              16700         200   3.7734621e-15  2.1e-06       -0.0034545575 
+ 4e-06              16800         200   3.0867115e-15  2.1e-06       -0.0033170806 
+ 4e-06              16900         200   1.1841579e-14  2.1e-06       -0.0031779335 
+ 4e-06              17000         200   1.3850503e-14  2.1e-06       -0.0030371862 
+ 4e-06              17100         200   9.8491914e-15  2.1e-06       -0.0028949095 
+ 4e-06              17200         200   4.7140149e-15  2.1e-06       -0.0027511752 
+ 4e-06              17300         200   1.3440466e-15  2.1e-06       -0.0026060556 
+ 4e-06              17400         200   1.0627828e-16  2.1e-06       -0.0024596238 
+ 4e-06              17500         200   6.2015781e-17  2.1e-06       -0.0023119534 
+ 4e-06              17600         200   2.8723007e-16  2.1e-06       -0.002163119  
+ 4e-06              17700         200   3.6601367e-16  2.1e-06       -0.0020131953 
+ 4e-06              17800         200   2.7862312e-16  2.1e-06       -0.0018622579 
+ 4e-06              17900         200   1.4268051e-16  2.1e-06       -0.0017103828 
+ 4e-06              18000         200   4.5443603e-17  2.1e-06       -0.0015576465 
+ 4e-06              18100         200   5.2330376e-18  2.1e-06       -0.0014041259 
+ 4e-06              18200         200   7.3566254e-19  2.1e-06       -0.0012498983 
+ 4e-06              18300         200   6.5880468e-18  2.1e-06       -0.0010950413 
+ 4e-06              18400         200   9.5744931e-18  2.1e-06       -0.00093963286
+ 4e-06              18500         200   7.8604487e-18  2.1e-06       -0.00078375133
+ 4e-06              18600         200   4.3166295e-18  2.1e-06       -0.00062747516
+ 4e-06              18700         200   1.5188792e-18  2.1e-06       -0.00047088304
+ 4e-06              18800         200   2.3221067e-19  2.1e-06       -0.00031405381
+ 4e-06              18900         200   4.7558964e-21  2.1e-06       -0.00015706645
+ 4e-06              19000         200   1.4567292e-19  2.1e-06        2.0903119e-17
+Loop time of 6.96616 on 1 procs for 14000 steps with 200 atoms
+
+99.5% CPU use with 1 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 3.8534     | 3.8534     | 3.8534     |   0.0 | 55.32
+Neigh   | 0.038778   | 0.038778   | 0.038778   |   0.0 |  0.56
+Comm    | 0.0015081  | 0.0015081  | 0.0015081  |   0.0 |  0.02
+Output  | 0.0018007  | 0.0018007  | 0.0018007  |   0.0 |  0.03
+Modify  | 3.0668     | 3.0668     | 3.0668     |   0.0 | 44.02
+Other   |            | 0.003851   |            |       |  0.06
+
+Nlocal:            200 ave         200 max         200 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:              0 ave           0 max           0 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:           3031 ave        3031 max        3031 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 3031
+Ave neighs/atom = 15.155
+Neighbor list builds = 233
+Dangerous builds not checked
+
+##### EJECTION #####
+
+variable punch_frequency  equal PI/2/(dt*${ejection_steps})
+variable punch_frequency  equal PI/2/(dt*14000)
+variable disp_lower       equal ${dieHeight}*sin(${punch_frequency}*elapsed*dt)
+variable disp_lower       equal 0.01*sin(${punch_frequency}*elapsed*dt)
+variable disp_lower       equal 0.01*sin(28.0499344070517*elapsed*dt)
+variable disp_upper       equal 0.9*v_disp_lower
+run ${ejection_steps}
+run 14000
+Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
+Per MPI rank memory allocation (min/avg/max) = 72.31 | 72.31 | 72.31 Mbytes
+      Dt          Step        Atoms        KinEng         Volume      v_disp_upper 
+ 4e-06              19000         200   1.4567292e-19  2.1e-06        0            
+ 4e-06              19100         200   1.9132195e-05  2.1e-06        0.00010097765
+ 4e-06              19200         200   3.1561748e-06  2.1e-06        0.00020194258
+ 4e-06              19300         200   1.4462178e-05  2.1e-06        0.00030288209
+ 4e-06              19400         200   4.6622112e-06  2.1e-06        0.00040378347
+ 4e-06              19500         200   1.1929852e-05  2.1e-06        0.00050463403
+ 4e-06              19600         200   5.6933661e-06  2.1e-06        0.00060542105
+ 4e-06              19700         200   1.0429976e-05  2.1e-06        0.00070613186
+ 4e-06              19800         200   6.6580254e-06  2.1e-06        0.00080675378
+ 4e-06              19900         200   9.1721686e-06  2.1e-06        0.00090727414
+ 4e-06              20000         200   7.4553343e-06  2.1e-06        0.0010076803 
+ 4e-06              20100         200   8.3534813e-06  2.1e-06        0.0011079596 
+ 4e-06              20200         200   7.8671075e-06  2.1e-06        0.0012080994 
+ 4e-06              20300         200   7.9115898e-06  2.1e-06        0.0013080871 
+ 4e-06              20400         200   8.2222312e-06  2.1e-06        0.0014079102 
+ 4e-06              20500         200   7.5281175e-06  2.1e-06        0.001507556  
+ 4e-06              20600         200   8.4242027e-06  2.1e-06        0.0016070121 
+ 4e-06              20700         200   7.3282364e-06  2.1e-06        0.0017062658 
+ 4e-06              20800         200   8.3501222e-06  2.1e-06        0.0018053047 
+ 4e-06              20900         200   7.3417566e-06  2.1e-06        0.0019041164 
+ 4e-06              21000         200   8.0702927e-06  2.1e-06        0.0020026884 
+ 4e-06              21100         200   7.5896194e-06  2.1e-06        0.0021010083 
+ 4e-06              21200         200   7.6596342e-06  2.1e-06        0.0021990637 
+ 4e-06              21300         200   7.7009755e-06  2.1e-06        0.0022968422 
+ 4e-06              21400         200   7.4010568e-06  2.1e-06        0.0023943316 
+ 4e-06              21500         200   7.7254953e-06  2.1e-06        0.0024915196 
+ 4e-06              21600         200   7.2931076e-06  2.1e-06        0.0025883939 
+ 4e-06              21700         200   7.5667043e-06  2.1e-06        0.0026849424 
+ 4e-06              21800         200   7.2767179e-06  2.1e-06        0.0027811529 
+ 4e-06              21900         200   7.3632148e-06  2.1e-06        0.0028770133 
+ 4e-06              22000         200   7.2563523e-06  2.1e-06        0.0029725116 
+ 4e-06              22100         200   7.2003226e-06  2.1e-06        0.0030676356 
+ 4e-06              22200         200   7.1862422e-06  2.1e-06        0.0031623734 
+ 4e-06              22300         200   7.0035785e-06  2.1e-06        0.0032567132 
+ 4e-06              22400         200   7.1023437e-06  2.1e-06        0.0033506429 
+ 4e-06              22500         200   6.8767896e-06  2.1e-06        0.0034441509 
+ 4e-06              22600         200   6.9556381e-06  2.1e-06        0.0035372253 
+ 4e-06              22700         200   6.7754491e-06  2.1e-06        0.0036298544 
+ 4e-06              22800         200   6.7752923e-06  2.1e-06        0.0037220265 
+ 4e-06              22900         200   6.6947789e-06  2.1e-06        0.0038137301 
+ 4e-06              23000         200   6.5811876e-06  2.1e-06        0.0039049537 
+ 4e-06              23100         200   6.5600064e-06  2.1e-06        0.0039956856 
+ 4e-06              23200         200   6.407054e-06   2.1e-06        0.0040859145 
+ 4e-06              23300         200   6.4635326e-06  2.1e-06        0.0041756291 
+ 4e-06              23400         200   6.2604509e-06  2.1e-06        0.004264818  
+ 4e-06              23500         200   6.2914059e-06  2.1e-06        0.00435347   
+ 4e-06              23600         200   6.1416598e-06  2.1e-06        0.004441574  
+ 4e-06              23700         200   6.0839487e-06  2.1e-06        0.0045291188 
+ 4e-06              23800         200   6.0216029e-06  2.1e-06        0.0046160935 
+ 4e-06              23900         200   5.896464e-06   2.1e-06        0.0047024871 
+ 4e-06              24000         200   5.8682556e-06  2.1e-06        0.0047882887 
+ 4e-06              24100         200   5.8744357e-06  2.1e-06        0.0048734875 
+ 4e-06              24200         200   5.6172509e-06  2.1e-06        0.0049580728 
+ 4e-06              24300         200   5.6527872e-06  2.1e-06        0.005042034  
+ 4e-06              24400         200   5.4706998e-06  2.1e-06        0.0051253604 
+ 4e-06              24500         200   5.4368713e-06  2.1e-06        0.0052080417 
+ 4e-06              24600         200   5.3496195e-06  2.1e-06        0.0052900673 
+ 4e-06              24700         200   5.2020248e-06  2.1e-06        0.0053714269 
+ 4e-06              24800         200   5.2035809e-06  2.1e-06        0.0054521104 
+ 4e-06              24900         200   5.0302031e-06  2.1e-06        0.0055321075 
+ 4e-06              25000         200   5.0094633e-06  2.1e-06        0.0056114082 
+ 4e-06              25100         200   4.8588064e-06  2.1e-06        0.0056900025 
+ 4e-06              25200         200   4.8221437e-06  2.1e-06        0.0057678805 
+ 4e-06              25300         200   4.7117322e-06  2.1e-06        0.0058450324 
+ 4e-06              25400         200   4.6148719e-06  2.1e-06        0.0059214485 
+ 4e-06              25500         200   4.5348297e-06  2.1e-06        0.0059971192 
+ 4e-06              25600         200   4.4325937e-06  2.1e-06        0.0060720349 
+ 4e-06              25700         200   4.3587865e-06  2.1e-06        0.0061461862 
+ 4e-06              25800         200   4.2449842e-06  2.1e-06        0.0062195638 
+ 4e-06              25900         200   4.1730814e-06  2.1e-06        0.0062921585 
+ 4e-06              26000         200   4.0712085e-06  2.1e-06        0.006363961  
+ 4e-06              26100         200   3.9603603e-06  2.1e-06        0.0064349624 
+ 4e-06              26200         200   3.9152641e-06  2.1e-06        0.0065051538 
+ 4e-06              26300         200   3.7864366e-06  2.1e-06        0.0065745262 
+ 4e-06              26400         200   3.7211553e-06  2.1e-06        0.006643071  
+ 4e-06              26500         200   3.6038142e-06  2.1e-06        0.0067107795 
+ 4e-06              26600         200   3.5518456e-06  2.1e-06        0.0067776432 
+ 4e-06              26700         200   3.4213616e-06  2.1e-06        0.0068436537 
+ 4e-06              26800         200   3.348649e-06   2.1e-06        0.0069088027 
+ 4e-06              26900         200   3.2592054e-06  2.1e-06        0.0069730819 
+ 4e-06              27000         200   3.1640896e-06  2.1e-06        0.0070364833 
+ 4e-06              27100         200   3.1491467e-06  2.1e-06        0.007098999  
+ 4e-06              27200         200   2.9475347e-06  2.1e-06        0.0071606209 
+ 4e-06              27300         200   2.9234007e-06  2.1e-06        0.0072213415 
+ 4e-06              27400         200   2.8106832e-06  2.1e-06        0.0072811529 
+ 4e-06              27500         200   2.7190831e-06  2.1e-06        0.0073400478 
+ 4e-06              27600         200   2.6595021e-06  2.1e-06        0.0073980187 
+ 4e-06              27700         200   2.5384163e-06  2.1e-06        0.0074550582 
+ 4e-06              27800         200   2.4906759e-06  2.1e-06        0.0075111593 
+ 4e-06              27900         200   2.3760852e-06  2.1e-06        0.0075663148 
+ 4e-06              28000         200   2.3135864e-06  2.1e-06        0.0076205178 
+ 4e-06              28100         200   2.206388e-06   2.1e-06        0.0076737615 
+ 4e-06              28200         200   2.1580755e-06  2.1e-06        0.0077260391 
+ 4e-06              28300         200   2.0541807e-06  2.1e-06        0.0077773442 
+ 4e-06              28400         200   1.9879886e-06  2.1e-06        0.0078276702 
+ 4e-06              28500         200   1.9080731e-06  2.1e-06        0.0078770108 
+ 4e-06              28600         200   1.8244513e-06  2.1e-06        0.0079253598 
+ 4e-06              28700         200   1.7612085e-06  2.1e-06        0.0079727111 
+ 4e-06              28800         200   1.6725418e-06  2.1e-06        0.0080190587 
+ 4e-06              28900         200   1.6108221e-06  2.1e-06        0.0080643969 
+ 4e-06              29000         200   1.5315923e-06  2.1e-06        0.0081087198 
+ 4e-06              29100         200   1.4668177e-06  2.1e-06        0.008152022  
+ 4e-06              29200         200   1.389947e-06   2.1e-06        0.0081942979 
+ 4e-06              29300         200   1.3244327e-06  2.1e-06        0.0082355423 
+ 4e-06              29400         200   1.2613389e-06  2.1e-06        0.00827575   
+ 4e-06              29500         200   1.189317e-06   2.1e-06        0.0083149158 
+ 4e-06              29600         200   1.1328651e-06  2.1e-06        0.0083530349 
+ 4e-06              29700         200   1.0634003e-06  2.1e-06        0.0083901025 
+ 4e-06              29800         200   1.0089659e-06  2.1e-06        0.0084261138 
+ 4e-06              29900         200   9.452383e-07   2.1e-06        0.0084610645 
+ 4e-06              30000         200   8.8857387e-07  2.1e-06        0.00849495   
+ 4e-06              30100         200   8.3934751e-07  2.1e-06        0.0085277661 
+ 4e-06              30200         200   7.7404495e-07  2.1e-06        0.0085595086 
+ 4e-06              30300         200   7.2760888e-07  2.1e-06        0.0085901737 
+ 4e-06              30400         200   6.7239685e-07  2.1e-06        0.0086197574 
+ 4e-06              30500         200   6.245416e-07   2.1e-06        0.0086482559 
+ 4e-06              30600         200   5.7834155e-07  2.1e-06        0.0086756657 
+ 4e-06              30700         200   5.2814574e-07  2.1e-06        0.0087019834 
+ 4e-06              30800         200   4.8878902e-07  2.1e-06        0.0087272057 
+ 4e-06              30900         200   4.4241206e-07  2.1e-06        0.0087513293 
+ 4e-06              31000         200   4.0442514e-07  2.1e-06        0.0087743512 
+ 4e-06              31100         200   3.642199e-07   2.1e-06        0.0087962686 
+ 4e-06              31200         200   3.2738558e-07  2.1e-06        0.0088170786 
+ 4e-06              31300         200   2.9389397e-07  2.1e-06        0.0088367787 
+ 4e-06              31400         200   2.5861566e-07  2.1e-06        0.0088553663 
+ 4e-06              31500         200   2.2934636e-07  2.1e-06        0.0088728392 
+ 4e-06              31600         200   1.9888494e-07  2.1e-06        0.0088891951 
+ 4e-06              31700         200   1.7250531e-07  2.1e-06        0.0089044319 
+ 4e-06              31800         200   1.4678036e-07  2.1e-06        0.0089185479 
+ 4e-06              31900         200   1.2324632e-07  2.1e-06        0.0089315411 
+ 4e-06              32000         200   1.0248084e-07  2.1e-06        0.0089434099 
+ 4e-06              32100         200   8.2609273e-08  2.1e-06        0.0089541529 
+ 4e-06              32200         200   6.551679e-08   2.1e-06        0.0089637686 
+ 4e-06              32300         200   5.0080052e-08  2.1e-06        0.008972256  
+ 4e-06              32400         200   3.6856646e-08  2.1e-06        0.0089796139 
+ 4e-06              32500         200   2.5648284e-08  2.1e-06        0.0089858413 
+ 4e-06              32600         200   1.637837e-08   2.1e-06        0.0089909376 
+ 4e-06              32700         200   9.2578154e-09  2.1e-06        0.008994902  
+ 4e-06              32800         200   4.0824723e-09  2.1e-06        0.0089977341 
+ 4e-06              32900         200   1.0371165e-09  2.1e-06        0.0089994335 
+ 4e-06              33000         200   6.1012168e-14  2.1e-06        0.009        
+Loop time of 7.70808 on 1 procs for 14000 steps with 200 atoms
+
+99.6% CPU use with 1 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 4.4414     | 4.4414     | 4.4414     |   0.0 | 57.62
+Neigh   | 0.040941   | 0.040941   | 0.040941   |   0.0 |  0.53
+Comm    | 0.001526   | 0.001526   | 0.001526   |   0.0 |  0.02
+Output  | 0.0019617  | 0.0019617  | 0.0019617  |   0.0 |  0.03
+Modify  | 3.2183     | 3.2183     | 3.2183     |   0.0 | 41.75
+Other   |            | 0.003942   |            |       |  0.05
+
+Nlocal:            200 ave         200 max         200 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:              0 ave           0 max           0 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:           3026 ave        3026 max        3026 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 3026
+Ave neighs/atom = 15.13
+Neighbor list builds = 233
+Dangerous builds not checked
+
+##### FREE FLOAT #####
+
+variable disp_lower  equal ${dieHeight}
+variable disp_lower  equal 0.01
+variable disp_upper  equal ${dieHeight}*0.9
+variable disp_upper  equal 0.01*0.9
+variable max_disp    equal ${dieRadius}*0.75
+variable max_disp    equal 0.004*0.75
+run ${free_float_steps}
+run 5000
+Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
+Per MPI rank memory allocation (min/avg/max) = 72.31 | 72.31 | 72.31 Mbytes
+      Dt          Step        Atoms        KinEng         Volume      v_disp_upper 
+ 4e-06              33000         200   6.1012168e-14  2.1e-06        0.009        
+ 4e-06              33100         200   6.7902539e-11  2.1e-06        0.009        
+ 4e-06              33200         200   2.0896758e-11  2.1e-06        0.009        
+ 4e-06              33300         200   2.5200405e-11  2.1e-06        0.009        
+ 4e-06              33400         200   2.1747895e-12  2.1e-06        0.009        
+ 4e-06              33500         200   1.1228817e-11  2.1e-06        0.009        
+ 4e-06              33600         200   3.3597579e-12  2.1e-06        0.009        
+ 4e-06              33700         200   1.4808583e-12  2.1e-06        0.009        
+ 4e-06              33800         200   3.5132295e-12  2.1e-06        0.009        
+ 4e-06              33900         200   8.2438639e-14  2.1e-06        0.009        
+ 4e-06              34000         200   1.3267378e-12  2.1e-06        0.009        
+ 4e-06              34100         200   6.2365031e-13  2.1e-06        0.009        
+ 4e-06              34200         200   1.1820072e-13  2.1e-06        0.009        
+ 4e-06              34300         200   5.2797742e-13  2.1e-06        0.009        
+ 4e-06              34400         200   3.2199555e-14  2.1e-06        0.009        
+ 4e-06              34500         200   1.553388e-13   2.1e-06        0.009        
+ 4e-06              34600         200   1.1458173e-13  2.1e-06        0.009        
+ 4e-06              34700         200   5.8686124e-15  2.1e-06        0.009        
+ 4e-06              34800         200   7.3486748e-14  2.1e-06        0.009        
+ 4e-06              34900         200   1.0877367e-14  2.1e-06        0.009        
+ 4e-06              35000         200   1.5284442e-14  2.1e-06        0.009        
+ 4e-06              35100         200   2.0294057e-14  2.1e-06        0.009        
+ 4e-06              35200         200   1.5385334e-17  2.1e-06        0.009        
+ 4e-06              35300         200   9.5858898e-15  2.1e-06        0.009        
+ 4e-06              35400         200   3.1985384e-15  2.1e-06        0.009        
+ 4e-06              35500         200   1.1327574e-15  2.1e-06        0.009        
+ 4e-06              35600         200   3.3810722e-15  2.1e-06        0.009        
+ 4e-06              35700         200   1.2867327e-16  2.1e-06        0.009        
+ 4e-06              35800         200   1.0755232e-15  2.1e-06        0.009        
+ 4e-06              35900         200   7.3381985e-16  2.1e-06        0.009        
+ 4e-06              36000         200   3.7750251e-17  2.1e-06        0.009        
+ 4e-06              36100         200   4.8518794e-16  2.1e-06        0.009        
+ 4e-06              36200         200   8.361623e-17   2.1e-06        0.009        
+ 4e-06              36300         200   8.9347649e-17  2.1e-06        0.009        
+ 4e-06              36400         200   1.4528409e-16  2.1e-06        0.009        
+ 4e-06              36500         200   8.2328133e-19  2.1e-06        0.009        
+ 4e-06              36600         200   5.9628413e-17  2.1e-06        0.009        
+ 4e-06              36700         200   2.857306e-17   2.1e-06        0.009        
+ 4e-06              36800         200   4.1078269e-18  2.1e-06        0.009        
+ 4e-06              36900         200   2.4094514e-17  2.1e-06        0.009        
+ 4e-06              37000         200   2.6153896e-18  2.1e-06        0.009        
+ 4e-06              37100         200   5.6577297e-18  2.1e-06        0.009        
+ 4e-06              37200         200   6.5849416e-18  2.1e-06        0.009        
+ 4e-06              37300         200   4.5596918e-21  2.1e-06        0.009        
+ 4e-06              37400         200   3.2329813e-18  2.1e-06        0.009        
+ 4e-06              37500         200   1.123288e-18   2.1e-06        0.009        
+ 4e-06              37600         200   3.4227094e-19  2.1e-06        0.009        
+ 4e-06              37700         200   1.1782135e-18  2.1e-06        0.009        
+ 4e-06              37800         200   6.9535961e-20  2.1e-06        0.009        
+ 4e-06              37900         200   3.4055174e-19  2.1e-06        0.009        
+ 4e-06              38000         200   2.8968649e-19  2.1e-06        0.009        
+Loop time of 2.65906 on 1 procs for 5000 steps with 200 atoms
+
+99.6% CPU use with 1 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 1.5595     | 1.5595     | 1.5595     |   0.0 | 58.65
+Neigh   | 0.012904   | 0.012904   | 0.012904   |   0.0 |  0.49
+Comm    | 0.00041333 | 0.00041333 | 0.00041333 |   0.0 |  0.02
+Output  | 0.00053486 | 0.00053486 | 0.00053486 |   0.0 |  0.02
+Modify  | 1.0844     | 1.0844     | 1.0844     |   0.0 | 40.78
+Other   |            | 0.001336   |            |       |  0.05
+
+Nlocal:            200 ave         200 max         200 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:              0 ave           0 max           0 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:           3026 ave        3026 max        3026 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 3026
+Ave neighs/atom = 15.13
+Neighbor list builds = 83
+Dangerous builds not checked
+Total wall time: 0:00:18
diff --git a/examples/granular/log.4Feb25.triaxial.compaction.12.g++.1 b/examples/granular/log.4Feb25.triaxial.compaction.12.g++.1
new file mode 100644
index 0000000000..1992fb2555
--- /dev/null
+++ b/examples/granular/log.4Feb25.triaxial.compaction.12.g++.1
@@ -0,0 +1,784 @@
+LAMMPS (4 Feb 2025 - Development - patch_5May2020-22356-g0c29a0a0c9-modified)
+############################### SIMULATION SETTINGS ###################################################
+
+atom_style      sphere 1
+atom_modify     map array
+comm_modify vel yes
+units           si
+newton          off
+neighbor        2 bin
+neigh_modify    delay 0
+timestep        1e-6
+
+##################### SIMULATION BOUNDING BOX, INSERT PARTICLES, AND INTEGRATION #######################
+
+boundary f f f
+read_data spheres12.data
+Reading data file ...
+  orthogonal box = (-10 -10 -10) to (10 10 10)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  12 atoms
+  read_data CPU = 0.029 seconds
+fix integr all nve/sphere
+
+# create pair group for contact area outputs
+group particles_1_12 id 1 12
+2 atoms in group particles_1_12
+
+########################### PARTICLE MATERIAL PROPERTIES AND FORCE MODEL ###############################
+
+variable atomRadius equal 0.5
+
+pair_style granular
+
+# mdr = E, nu, Y, gamma, psi_b, damp
+variable YoungsModulus  equal 1e9
+variable PoissonsRatio  equal 0.3
+variable YieldStress    equal 50e6
+variable SurfaceEnergy  equal 0.0
+variable psi_b          equal 0.5
+variable damp           equal 0.2
+variable damp_type      equal 1
+
+# linear_history = k_t, x_gamma,t, mu_s
+variable kt       equal 2/7*${YoungsModulus}*${atomRadius}
+variable kt       equal 2/7*1000000000*${atomRadius}
+variable kt       equal 2/7*1000000000*0.5
+variable xgammat  equal 0.0
+variable mu_s     equal 0.5
+
+pair_coeff * * mdr ${YoungsModulus} ${PoissonsRatio} ${YieldStress} ${SurfaceEnergy} ${psi_b} ${damp}    damping mdr ${damp_type}    tangential linear_history ${kt} ${xgammat} ${mu_s}
+pair_coeff * * mdr 1000000000 ${PoissonsRatio} ${YieldStress} ${SurfaceEnergy} ${psi_b} ${damp}    damping mdr ${damp_type}    tangential linear_history ${kt} ${xgammat} ${mu_s}
+pair_coeff * * mdr 1000000000 0.3 ${YieldStress} ${SurfaceEnergy} ${psi_b} ${damp}    damping mdr ${damp_type}    tangential linear_history ${kt} ${xgammat} ${mu_s}
+pair_coeff * * mdr 1000000000 0.3 50000000 ${SurfaceEnergy} ${psi_b} ${damp}    damping mdr ${damp_type}    tangential linear_history ${kt} ${xgammat} ${mu_s}
+pair_coeff * * mdr 1000000000 0.3 50000000 0 ${psi_b} ${damp}    damping mdr ${damp_type}    tangential linear_history ${kt} ${xgammat} ${mu_s}
+pair_coeff * * mdr 1000000000 0.3 50000000 0 0.5 ${damp}    damping mdr ${damp_type}    tangential linear_history ${kt} ${xgammat} ${mu_s}
+pair_coeff * * mdr 1000000000 0.3 50000000 0 0.5 0.2    damping mdr ${damp_type}    tangential linear_history ${kt} ${xgammat} ${mu_s}
+pair_coeff * * mdr 1000000000 0.3 50000000 0 0.5 0.2    damping mdr 1    tangential linear_history ${kt} ${xgammat} ${mu_s}
+pair_coeff * * mdr 1000000000 0.3 50000000 0 0.5 0.2    damping mdr 1    tangential linear_history 142857142.857143 ${xgammat} ${mu_s}
+pair_coeff * * mdr 1000000000 0.3 50000000 0 0.5 0.2    damping mdr 1    tangential linear_history 142857142.857143 0 ${mu_s}
+pair_coeff * * mdr 1000000000 0.3 50000000 0 0.5 0.2    damping mdr 1    tangential linear_history 142857142.857143 0 0.5
+
+######################################### ADD IN PLANES ################################################
+
+variable boxWidth equal 3
+variable halfBoxWidth equal ${boxWidth}/2
+variable halfBoxWidth equal 3/2
+
+variable plane_disp equal 0.0
+variable plane_disp_neg equal 0.0
+
+region plane_yz_pos plane ${halfBoxWidth} 0 0 -1 0 0 side in move v_plane_disp_neg NULL NULL units box
+region plane_yz_pos plane 1.5 0 0 -1 0 0 side in move v_plane_disp_neg NULL NULL units box
+region plane_yz_neg plane -${halfBoxWidth} 0 0 1 0 0 side in move v_plane_disp NULL NULL units box
+region plane_yz_neg plane -1.5 0 0 1 0 0 side in move v_plane_disp NULL NULL units box
+region plane_xz_pos plane 0 ${halfBoxWidth} 0 0 -1 0 side in move NULL v_plane_disp_neg NULL units box
+region plane_xz_pos plane 0 1.5 0 0 -1 0 side in move NULL v_plane_disp_neg NULL units box
+region plane_xz_neg plane 0 -${halfBoxWidth} 0 0 1 0 side in move NULL v_plane_disp NULL units box
+region plane_xz_neg plane 0 -1.5 0 0 1 0 side in move NULL v_plane_disp NULL units box
+region plane_xy_pos plane 0 0 ${halfBoxWidth} 0 0 -1  side in move NULL NULL v_plane_disp_neg units box
+region plane_xy_pos plane 0 0 1.5 0 0 -1  side in move NULL NULL v_plane_disp_neg units box
+region plane_xy_neg plane 0 0 -${halfBoxWidth} 0 0 1 side in move NULL NULL v_plane_disp units box
+region plane_xy_neg plane 0 0 -1.5 0 0 1 side in move NULL NULL v_plane_disp units box
+
+variable wall_contact_string string "granular mdr ${YoungsModulus} ${PoissonsRatio} ${YieldStress} ${SurfaceEnergy} ${psi_b} ${damp} damping mdr ${damp_type} tangential linear_history ${kt} ${xgammat} ${mu_s} "
+granular mdr 1000000000 ${PoissonsRatio} ${YieldStress} ${SurfaceEnergy} ${psi_b} ${damp} damping mdr ${damp_type} tangential linear_history ${kt} ${xgammat} ${mu_s} 
+granular mdr 1000000000 0.3 ${YieldStress} ${SurfaceEnergy} ${psi_b} ${damp} damping mdr ${damp_type} tangential linear_history ${kt} ${xgammat} ${mu_s} 
+granular mdr 1000000000 0.3 50000000 ${SurfaceEnergy} ${psi_b} ${damp} damping mdr ${damp_type} tangential linear_history ${kt} ${xgammat} ${mu_s} 
+granular mdr 1000000000 0.3 50000000 0 ${psi_b} ${damp} damping mdr ${damp_type} tangential linear_history ${kt} ${xgammat} ${mu_s} 
+granular mdr 1000000000 0.3 50000000 0 0.5 ${damp} damping mdr ${damp_type} tangential linear_history ${kt} ${xgammat} ${mu_s} 
+granular mdr 1000000000 0.3 50000000 0 0.5 0.2 damping mdr ${damp_type} tangential linear_history ${kt} ${xgammat} ${mu_s} 
+granular mdr 1000000000 0.3 50000000 0 0.5 0.2 damping mdr 1 tangential linear_history ${kt} ${xgammat} ${mu_s} 
+granular mdr 1000000000 0.3 50000000 0 0.5 0.2 damping mdr 1 tangential linear_history 142857142.857143 ${xgammat} ${mu_s} 
+granular mdr 1000000000 0.3 50000000 0 0.5 0.2 damping mdr 1 tangential linear_history 142857142.857143 0 ${mu_s} 
+granular mdr 1000000000 0.3 50000000 0 0.5 0.2 damping mdr 1 tangential linear_history 142857142.857143 0 0.5 
+
+fix plane_yz_pos all wall/gran/region ${wall_contact_string} region plane_yz_pos contacts
+fix plane_yz_pos all wall/gran/region granular mdr 1000000000 0.3 50000000 0 0.5 0.2 damping mdr 1 tangential linear_history 142857142.857143 0 0.5  region plane_yz_pos contacts
+fix plane_yz_neg all wall/gran/region ${wall_contact_string} region plane_yz_neg contacts
+fix plane_yz_neg all wall/gran/region granular mdr 1000000000 0.3 50000000 0 0.5 0.2 damping mdr 1 tangential linear_history 142857142.857143 0 0.5  region plane_yz_neg contacts
+fix plane_xz_pos all wall/gran/region ${wall_contact_string} region plane_xz_pos contacts
+fix plane_xz_pos all wall/gran/region granular mdr 1000000000 0.3 50000000 0 0.5 0.2 damping mdr 1 tangential linear_history 142857142.857143 0 0.5  region plane_xz_pos contacts
+fix plane_xz_neg all wall/gran/region ${wall_contact_string} region plane_xz_neg contacts
+fix plane_xz_neg all wall/gran/region granular mdr 1000000000 0.3 50000000 0 0.5 0.2 damping mdr 1 tangential linear_history 142857142.857143 0 0.5  region plane_xz_neg contacts
+fix plane_xy_pos all wall/gran/region ${wall_contact_string} region plane_xy_pos contacts
+fix plane_xy_pos all wall/gran/region granular mdr 1000000000 0.3 50000000 0 0.5 0.2 damping mdr 1 tangential linear_history 142857142.857143 0 0.5  region plane_xy_pos contacts
+fix plane_xy_neg all wall/gran/region ${wall_contact_string} region plane_xy_neg contacts
+fix plane_xy_neg all wall/gran/region granular mdr 1000000000 0.3 50000000 0 0.5 0.2 damping mdr 1 tangential linear_history 142857142.857143 0 0.5  region plane_xy_neg contacts
+
+compute plane_xy_neg_force all reduce sum f_plane_xy_neg[4]
+variable plane_xy_neg_force equal c_plane_xy_neg_force
+
+compute plane_xz_neg_force all reduce sum f_plane_xz_neg[3]
+variable plane_xz_neg_force equal c_plane_xz_neg_force
+
+compute plane_yz_neg_force all reduce sum f_plane_yz_neg[2]
+variable plane_yz_neg_force equal c_plane_yz_neg_force
+
+#fix print1 all print 1 "${plane_disp} ${plane_xy_neg_force} ${plane_xz_neg_force} ${plane_yz_neg_force}" file force_disp_triaxial12.csv screen no
+
+######################################## SCREEN OUTPUT  ####################################################
+
+compute       1 all erotate/sphere
+thermo_style  custom dt step atoms ke c_1 vol
+thermo        100
+thermo_modify lost ignore norm no
+
+##################################### DEFINE WALL MOVEMENT  #################################################
+
+variable disp_max equal 0.499
+variable ddisp equal 0.00001
+variable compression_steps equal round(${disp_max}/${ddisp})
+variable compression_steps equal round(0.499/${ddisp})
+variable compression_steps equal round(0.499/1e-05)
+variable output_rate equal round(${compression_steps}/100)
+variable output_rate equal round(49900/100)
+
+##################################### SET UP DUMP OUTPUTS  ####################################################
+
+#dump dumpParticles all custom ${output_rate} triaxial_compaction_12.dump id type mass x y z vx vy vz fx fy fz radius
+#dump   dmp all vtk ${output_rate} post/triaxial12particles_*.vtk id type mass x y z vx vy vz fx fy fz radius
+
+#################################### COMPRESS THE PARTICLES  ##################################################
+
+run 0
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Your simulation uses code contributions which should be cited:
+
+- MDR contact model command: (i) https://doi.org/10.1016/j.jmps.2023.105492 || (ii) https://doi.org/10.1016/j.jmps.2023.105493 || (iii) https://doi.org/10.31224/4289
+
+@Article{zunker2024mechanicallyI,
+ author =  {Zunker, William and Kamrin, Ken},
+ title =   {A mechanically-derived contact model for adhesive elastic-perfectly plastic particles,
+            Part I: Utilizing the method of dimensionality reduction},
+ journal = {Journal of the Mechanics and Physics of Solids},
+ year =    {2024},
+ volume =  {183},
+ pages =   {105492},
+}
+
+@Article{zunker2024mechanicallyII,
+ author =  {Zunker, William and Kamrin, Ken},
+ title =   {A mechanically-derived contact model for adhesive elastic-perfectly plastic particles,
+            Part II: Contact under high compaction—modeling a bulk elastic response},
+ journal = {Journal of the Mechanics and Physics of Solids},
+ year =    {2024},
+ volume =  {183},
+ pages =   {105493},
+}
+
+@Article{zunker2025experimentally,
+ author =  {Zunker, William and Dunatunga, Sachith and Thakur, Subhash and Tang, Pingjun and Kamrin, Ken},
+ title =   {Experimentally validated DEM for large deformation powder compaction:
+            mechanically-derived contact model and screening of non-physical contacts},
+ journal = {Powder Technology},
+ year =    {2025},
+ pages =   {120972},
+}
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 3.2
+  ghost atom cutoff = 3.2
+  binsize = 1.6, bins = 13 13 13
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair granular, perpetual
+      attributes: half, newton off, size, history
+      pair build: half/size/bin/atomonly/newtoff
+      stencil: full/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 74.54 | 74.54 | 74.54 Mbytes
+      Dt          Step        Atoms        KinEng          c_1           Volume    
+ 1e-06                  0          12   0              0              8000         
+Loop time of 8.28e-07 on 1 procs for 0 steps with 12 atoms
+
+0.0% CPU use with 1 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0          | 0          | 0          |   0.0 |  0.00
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0          | 0          | 0          |   0.0 |  0.00
+Output  | 0          | 0          | 0          |   0.0 |  0.00
+Modify  | 0          | 0          | 0          |   0.0 |  0.00
+Other   |            | 8.28e-07   |            |       |100.00
+
+Nlocal:             12 ave          12 max          12 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:              0 ave           0 max           0 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:             66 ave          66 max          66 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 66
+Ave neighs/atom = 5.5
+Neighbor list builds = 0
+Dangerous builds = 0
+
+# print out contact area evolution for particles 1 and 12
+compute Ac_1_12 particles_1_12 pair/local p13 cutoff radius
+compute Ac_1_12_sum particles_1_12 reduce sum c_Ac_1_12 inputs local
+variable Ac_1_12 equal c_Ac_1_12_sum
+#fix logArea all print 100 "${plane_disp} ${Ac_1_12}" file pair_1_12_contact_area_triaxial12.csv screen no
+
+variable plane_disp equal ${ddisp}*elapsed
+variable plane_disp equal 1e-05*elapsed
+variable plane_disp_neg equal -${ddisp}*elapsed
+variable plane_disp_neg equal -1e-05*elapsed
+
+run ${compression_steps}
+run 49900
+Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 3.2
+  ghost atom cutoff = 3.2
+  binsize = 1.6, bins = 13 13 13
+  2 neighbor lists, perpetual/occasional/extra = 1 1 0
+  (1) pair granular, perpetual
+      attributes: half, newton off, size, history
+      pair build: half/size/bin/atomonly/newtoff
+      stencil: full/bin/3d
+      bin: standard
+  (2) compute pair/local, occasional
+      attributes: half, newton off, size
+      pair build: half/size/bin/atomonly/newtoff
+      stencil: full/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 75.17 | 75.17 | 75.17 Mbytes
+      Dt          Step        Atoms        KinEng          c_1           Volume    
+ 1e-06                  0          12   0              0              8000         
+ 1e-06                100          12   0.063728867    0              8000         
+ 1e-06                200          12   0.79328888     0              8000         
+ 1e-06                300          12   3.1671095      0              8000         
+ 1e-06                400          12   8.7248683      0              8000         
+ 1e-06                500          12   20.158012      0              8000         
+ 1e-06                600          12   41.157061      0              8000         
+ 1e-06                700          12   76.628872      0              8000         
+ 1e-06                800          12   132.88829      0              8000         
+ 1e-06                900          12   217.80403      0              8000         
+ 1e-06               1000          12   340.91325      0              8000         
+ 1e-06               1100          12   513.50254      0              8000         
+ 1e-06               1200          12   748.65202      0              8000         
+ 1e-06               1300          12   1061.2394      0              8000         
+ 1e-06               1400          12   1467.9017      0              8000         
+ 1e-06               1500          12   1986.9524      0              8000         
+ 1e-06               1600          12   2638.2542      0              8000         
+ 1e-06               1700          12   3443.0464      0              8000         
+ 1e-06               1800          12   4423.728       0              8000         
+ 1e-06               1900          12   5603.6004      0              8000         
+ 1e-06               2000          12   7006.5697      0              8000         
+ 1e-06               2100          12   8656.8153      0              8000         
+ 1e-06               2200          12   10575.061      0.00041930161  8000         
+ 1e-06               2300          12   12776.34       0.031472922    8000         
+ 1e-06               2400          12   15285.834      0.16547279     8000         
+ 1e-06               2500          12   18123.472      0.48824818     8000         
+ 1e-06               2600          12   21306.73       1.1172148      8000         
+ 1e-06               2700          12   24850.635      2.2101203      8000         
+ 1e-06               2800          12   28767.448      3.9742876      8000         
+ 1e-06               2900          12   33066.341      6.6765245      8000         
+ 1e-06               3000          12   37753.104      10.653677      8000         
+ 1e-06               3100          12   42829.905      16.323673      8000         
+ 1e-06               3200          12   48295.143      24.196804      8000         
+ 1e-06               3300          12   54143.784      34.886968      8000         
+ 1e-06               3400          12   60367.427      49.122473      8000         
+ 1e-06               3500          12   66954.459      67.756017      8000         
+ 1e-06               3600          12   73890.367      91.773371      8000         
+ 1e-06               3700          12   81158.051      122.30029      8000         
+ 1e-06               3800          12   88738.15       160.60714      8000         
+ 1e-06               3900          12   96609.374      208.11075      8000         
+ 1e-06               4000          12   104748.82      266.37305      8000         
+ 1e-06               4100          12   113132.27      337.09603      8000         
+ 1e-06               4200          12   121734.46      422.11267      8000         
+ 1e-06               4300          12   130529.26      523.37361      8000         
+ 1e-06               4400          12   139489.96      642.8869       8000         
+ 1e-06               4500          12   148590.48      781.76659      8000         
+ 1e-06               4600          12   157805.54      939.73777      8000         
+ 1e-06               4700          12   167102.78      1115.3765      8000         
+ 1e-06               4800          12   176408.3       1304.8823      8000         
+ 1e-06               4900          12   185727.22      1506.1791      8000         
+ 1e-06               5000          12   195037.08      1715.5141      8000         
+ 1e-06               5100          12   204311.37      1928.3496      8000         
+ 1e-06               5200          12   213522.05      2139.6607      8000         
+ 1e-06               5300          12   222640.01      2344.2078      8000         
+ 1e-06               5400          12   231635.3       2536.8271      8000         
+ 1e-06               5500          12   240477.26      2712.7272      8000         
+ 1e-06               5600          12   249036.18      2865.4067      8000         
+ 1e-06               5700          12   257225.21      2990.2307      8000         
+ 1e-06               5800          12   265107.22      3088.4777      8000         
+ 1e-06               5900          12   272662.78      3160.202       8000         
+ 1e-06               6000          12   279867.43      3206.6334      8000         
+ 1e-06               6100          12   286696.38      3230.2607      8000         
+ 1e-06               6200          12   293126.06      3234.7442      8000         
+ 1e-06               6300          12   299135.09      3224.7366      8000         
+ 1e-06               6400          12   304704.84      3205.6378      8000         
+ 1e-06               6500          12   309820.07      3183.3082      8000         
+ 1e-06               6600          12   314469.34      3163.76        8000         
+ 1e-06               6700          12   318645.43      3152.8517      8000         
+ 1e-06               6800          12   322345.74      3156.004       8000         
+ 1e-06               6900          12   325572.61      3177.9574      8000         
+ 1e-06               7000          12   328333.69      3222.5849      8000         
+ 1e-06               7100          12   330642.36      3292.7692      8000         
+ 1e-06               7200          12   332518.34      3390.3494      8000         
+ 1e-06               7300          12   333988.86      3516.133       8000         
+ 1e-06               7400          12   335091.17      3669.9688      8000         
+ 1e-06               7500          12   335881.45      3850.8676      8000         
+ 1e-06               7600          12   336541.99      4057.1561      8000         
+ 1e-06               7700          12   337100.25      4286.6487      8000         
+ 1e-06               7800          12   337436.07      4536.819       8000         
+ 1e-06               7900          12   337576.23      4804.9572      8000         
+ 1e-06               8000          12   337553.74      5088.3001      8000         
+ 1e-06               8100          12   337412.69      5384.125       8000         
+ 1e-06               8200          12   337266.12      5689.8034      8000         
+ 1e-06               8300          12   337141.16      6002.815       8000         
+ 1e-06               8400          12   336990.23      6320.7444      8000         
+ 1e-06               8500          12   336832.35      6645.3584      8000         
+ 1e-06               8600          12   336684.67      6980.6347      8000         
+ 1e-06               8700          12   336479.86      7317.4769      8000         
+ 1e-06               8800          12   336286.29      7653.1265      8000         
+ 1e-06               8900          12   336134.3       7982.859       8000         
+ 1e-06               9000          12   336043.31      8307.1796      8000         
+ 1e-06               9100          12   336035         8626.669       8000         
+ 1e-06               9200          12   336134.55      8942.2162      8000         
+ 1e-06               9300          12   336370.73      9254.9724      8000         
+ 1e-06               9400          12   336775.97      9566.1537      8000         
+ 1e-06               9500          12   337386.26      9876.5005      8000         
+ 1e-06               9600          12   338238.87      10185.038      8000         
+ 1e-06               9700          12   339373.93      10490.42       8000         
+ 1e-06               9800          12   340832.84      10792.816      8000         
+ 1e-06               9900          12   342649.99      11091.69       8000         
+ 1e-06              10000          12   344857.32      11386.232      8000         
+ 1e-06              10100          12   347498.52      11676.525      8000         
+ 1e-06              10200          12   350625.53      11966.038      8000         
+ 1e-06              10300          12   354228.27      12250.382      8000         
+ 1e-06              10400          12   358314.64      12529.915      8000         
+ 1e-06              10500          12   362887.71      12806.586      8000         
+ 1e-06              10600          12   367944.26      13083.935      8000         
+ 1e-06              10700          12   373508.47      13375.901      8000         
+ 1e-06              10800          12   379544.75      13682.748      8000         
+ 1e-06              10900          12   385955.58      13992.431      8000         
+ 1e-06              11000          12   392687         14307.793      8000         
+ 1e-06              11100          12   399676.01      14631.631      8000         
+ 1e-06              11200          12   406851.99      14966.316      8000         
+ 1e-06              11300          12   414139.37      15312.299      8000         
+ 1e-06              11400          12   421458.35      15668.76       8000         
+ 1e-06              11500          12   428725.85      16034.567      8000         
+ 1e-06              11600          12   435860.63      16408.01       8000         
+ 1e-06              11700          12   442914.03      16784.22       8000         
+ 1e-06              11800          12   449956.98      17165.7        8000         
+ 1e-06              11900          12   456943.52      17548.894      8000         
+ 1e-06              12000          12   463832.6       17927.761      8000         
+ 1e-06              12100          12   470563.99      18302.718      8000         
+ 1e-06              12200          12   477048.89      18678.218      8000         
+ 1e-06              12300          12   483212.97      19059.367      8000         
+ 1e-06              12400          12   489085.23      19458.596      8000         
+ 1e-06              12500          12   494661.19      19885.306      8000         
+ 1e-06              12600          12   499940         20344.66       8000         
+ 1e-06              12700          12   504927.83      20838.53       8000         
+ 1e-06              12800          12   509641.24      21366.554      8000         
+ 1e-06              12900          12   514117.28      21934.019      8000         
+ 1e-06              13000          12   518378.2       22538.416      8000         
+ 1e-06              13100          12   522458.77      23167.62       8000         
+ 1e-06              13200          12   525853.3       23780.032      8000         
+ 1e-06              13300          12   528815.22      24345.078      8000         
+ 1e-06              13400          12   531513.34      24883.702      8000         
+ 1e-06              13500          12   534014.75      25400.482      8000         
+ 1e-06              13600          12   536357.43      25891.923      8000         
+ 1e-06              13700          12   538572.95      26355.972      8000         
+ 1e-06              13800          12   540657.99      26795.836      8000         
+ 1e-06              13900          12   542612.1       27223.435      8000         
+ 1e-06              14000          12   544466.84      27647.413      8000         
+ 1e-06              14100          12   546241.59      28080.229      8000         
+ 1e-06              14200          12   547953.9       28538.741      8000         
+ 1e-06              14300          12   549620.46      29043.928      8000         
+ 1e-06              14400          12   551260.1       29617.65       8000         
+ 1e-06              14500          12   552886.91      30280.697      8000         
+ 1e-06              14600          12   554517.93      31050.292      8000         
+ 1e-06              14700          12   556175.17      31938.659      8000         
+ 1e-06              14800          12   557897.11      32951.814      8000         
+ 1e-06              14900          12   559696.65      34092.387      8000         
+ 1e-06              15000          12   561564.33      35360.561      8000         
+ 1e-06              15100          12   563512.54      36749.828      8000         
+ 1e-06              15200          12   565557.63      38246.838      8000         
+ 1e-06              15300          12   567718.92      39832.256      8000         
+ 1e-06              15400          12   569910.41      41477.599      8000         
+ 1e-06              15500          12   572038.38      43132.458      8000         
+ 1e-06              15600          12   573913.47      44763.446      8000         
+ 1e-06              15700          12   575651.62      46334.905      8000         
+ 1e-06              15800          12   577306.75      47812.828      8000         
+ 1e-06              15900          12   578897.86      49166.374      8000         
+ 1e-06              16000          12   580431.98      50370.863      8000         
+ 1e-06              16100          12   581917.67      51410.025      8000         
+ 1e-06              16200          12   583392.37      52290.363      8000         
+ 1e-06              16300          12   584896.54      53001.616      8000         
+ 1e-06              16400          12   586445.21      53535.481      8000         
+ 1e-06              16500          12   588052.51      53896.917      8000         
+ 1e-06              16600          12   589732.17      54102.867      8000         
+ 1e-06              16700          12   591496.83      54180.423      8000         
+ 1e-06              16800          12   593357.37      54163.728      8000         
+ 1e-06              16900          12   595249.77      54088.894      8000         
+ 1e-06              17000          12   597130.72      53987.792      8000         
+ 1e-06              17100          12   598744.15      53852.584      8000         
+ 1e-06              17200          12   600057.69      53662.064      8000         
+ 1e-06              17300          12   601263.08      53505.031      8000         
+ 1e-06              17400          12   602411.57      53452.104      8000         
+ 1e-06              17500          12   603541.22      53553.609      8000         
+ 1e-06              17600          12   604067.01      53875.668      8000         
+ 1e-06              17700          12   602571.09      54490.575      8000         
+ 1e-06              17800          12   600292.75      55434.526      8000         
+ 1e-06              17900          12   597386.86      56736.273      8000         
+ 1e-06              18000          12   593981.08      58424.745      8000         
+ 1e-06              18100          12   590178.8       60527.06       8000         
+ 1e-06              18200          12   586056.65      63073.313      8000         
+ 1e-06              18300          12   581695.18      66084.237      8000         
+ 1e-06              18400          12   577195.63      69560.379      8000         
+ 1e-06              18500          12   572657.32      73490.56       8000         
+ 1e-06              18600          12   568173.86      77853.568      8000         
+ 1e-06              18700          12   563830.46      82617.321      8000         
+ 1e-06              18800          12   559701.49      87738.316      8000         
+ 1e-06              18900          12   555848.76      93162.955      8000         
+ 1e-06              19000          12   552088.23      98803.436      8000         
+ 1e-06              19100          12   548185.32      104427.42      8000         
+ 1e-06              19200          12   544535.46      110064.18      8000         
+ 1e-06              19300          12   541228.34      115757.05      8000         
+ 1e-06              19400          12   538293.89      121467.69      8000         
+ 1e-06              19500          12   535108.7       127127.41      8000         
+ 1e-06              19600          12   531413.61      132495.98      8000         
+ 1e-06              19700          12   527729.51      137678.56      8000         
+ 1e-06              19800          12   524151.76      142754.02      8000         
+ 1e-06              19900          12   520715.23      147730.68      8000         
+ 1e-06              20000          12   517436.17      152621.73      8000         
+ 1e-06              20100          12   514316.17      157423.95      8000         
+ 1e-06              20200          12   511400.64      162074.69      8000         
+ 1e-06              20300          12   508692.38      166497.97      8000         
+ 1e-06              20400          12   506153.1       170617.84      8000         
+ 1e-06              20500          12   503717.78      174351.25      8000         
+ 1e-06              20600          12   501293.63      177615.72      8000         
+ 1e-06              20700          12   498966.36      180365.82      8000         
+ 1e-06              20800          12   496760.02      182555.39      8000         
+ 1e-06              20900          12   494705.8       184132.07      8000         
+ 1e-06              21000          12   492841.84      185052.2       8000         
+ 1e-06              21100          12   491215.97      185288.72      8000         
+ 1e-06              21200          12   489677.84      184786.05      8000         
+ 1e-06              21300          12   488104.26      183535.77      8000         
+ 1e-06              21400          12   486394.75      181560.76      8000         
+ 1e-06              21500          12   484603.38      178874.58      8000         
+ 1e-06              21600          12   482731.75      175610.89      8000         
+ 1e-06              21700          12   480707.38      172071.04      8000         
+ 1e-06              21800          12   478504.9       168244.44      8000         
+ 1e-06              21900          12   476099.5       164130.08      8000         
+ 1e-06              22000          12   473401.48      159927.13      8000         
+ 1e-06              22100          12   470370.26      155762.08      8000         
+ 1e-06              22200          12   467067.88      151634.35      8000         
+ 1e-06              22300          12   463485.76      147589.77      8000         
+ 1e-06              22400          12   459593.94      143696.49      8000         
+ 1e-06              22500          12   455376.67      140035.23      8000         
+ 1e-06              22600          12   450819.48      136692.31      8000         
+ 1e-06              22700          12   445945.91      133704.47      8000         
+ 1e-06              22800          12   440758.89      131179.35      8000         
+ 1e-06              22900          12   435302.31      129342.22      8000         
+ 1e-06              23000          12   429598.98      128282.87      8000         
+ 1e-06              23100          12   423606.11      127995.47      8000         
+ 1e-06              23200          12   417364.51      128494.32      8000         
+ 1e-06              23300          12   410931.48      129772.21      8000         
+ 1e-06              23400          12   404376.34      131791.95      8000         
+ 1e-06              23500          12   397794.05      134484.8       8000         
+ 1e-06              23600          12   390990.06      137778.07      8000         
+ 1e-06              23700          12   384162.26      141638.59      8000         
+ 1e-06              23800          12   377636.5       145932.03      8000         
+ 1e-06              23900          12   371606.04      150514.72      8000         
+ 1e-06              24000          12   366266.24      155257.47      8000         
+ 1e-06              24100          12   361786.35      160026.32      8000         
+ 1e-06              24200          12   358332.56      164701.26      8000         
+ 1e-06              24300          12   356060.82      169171.06      8000         
+ 1e-06              24400          12   355078.48      173360.16      8000         
+ 1e-06              24500          12   355450.93      177217.02      8000         
+ 1e-06              24600          12   357219.84      180708.91      8000         
+ 1e-06              24700          12   360415.01      183807.52      8000         
+ 1e-06              24800          12   365023.77      186489.43      8000         
+ 1e-06              24900          12   371003.31      188744.64      8000         
+ 1e-06              25000          12   377719.96      190508.83      8000         
+ 1e-06              25100          12   385044.02      191723.21      8000         
+ 1e-06              25200          12   392909.32      192416.99      8000         
+ 1e-06              25300          12   401135.52      192645.59      8000         
+ 1e-06              25400          12   409539.24      192477.65      8000         
+ 1e-06              25500          12   417291.42      191761.92      8000         
+ 1e-06              25600          12   424737.36      190597.14      8000         
+ 1e-06              25700          12   431913.53      189106.01      8000         
+ 1e-06              25800          12   438736.08      187391.23      8000         
+ 1e-06              25900          12   445137.75      185551.39      8000         
+ 1e-06              26000          12   451066.87      183681.98      8000         
+ 1e-06              26100          12   456506.74      181876.3       8000         
+ 1e-06              26200          12   461444.38      180210.36      8000         
+ 1e-06              26300          12   465927.86      178746.53      8000         
+ 1e-06              26400          12   470133.57      177568.71      8000         
+ 1e-06              26500          12   474134.12      176672.99      8000         
+ 1e-06              26600          12   477972.05      176033.42      8000         
+ 1e-06              26700          12   481533.44      175580.73      8000         
+ 1e-06              26800          12   484262.56      175093.24      8000         
+ 1e-06              26900          12   486070.74      174178.51      8000         
+ 1e-06              27000          12   487322.68      173184.75      8000         
+ 1e-06              27100          12   488010.07      172084.41      8000         
+ 1e-06              27200          12   488079.27      170808.69      8000         
+ 1e-06              27300          12   487463.01      169291.88      8000         
+ 1e-06              27400          12   486083.88      167484.29      8000         
+ 1e-06              27500          12   483861.52      165357.96      8000         
+ 1e-06              27600          12   480726.55      162915.35      8000         
+ 1e-06              27700          12   476628.16      160197.08      8000         
+ 1e-06              27800          12   471546.66      157277.97      8000         
+ 1e-06              27900          12   465514.41      154256.68      8000         
+ 1e-06              28000          12   458577.33      151265.67      8000         
+ 1e-06              28100          12   450810.58      148472.95      8000         
+ 1e-06              28200          12   442311.34      146058.64      8000         
+ 1e-06              28300          12   433200.29      144179.21      8000         
+ 1e-06              28400          12   423625.66      142953.04      8000         
+ 1e-06              28500          12   413765.24      142450.35      8000         
+ 1e-06              28600          12   403817.87      142689.67      8000         
+ 1e-06              28700          12   393989.03      143646.16      8000         
+ 1e-06              28800          12   384484.33      145259.76      8000         
+ 1e-06              28900          12   375502.5       147428.86      8000         
+ 1e-06              29000          12   367250.91      150041.97      8000         
+ 1e-06              29100          12   359903.16      153002.45      8000         
+ 1e-06              29200          12   353678.89      156213.09      8000         
+ 1e-06              29300          12   348513.14      159524.58      8000         
+ 1e-06              29400          12   344408.06      162793.23      8000         
+ 1e-06              29500          12   341354.25      165860.66      8000         
+ 1e-06              29600          12   339292.66      168560.48      8000         
+ 1e-06              29700          12   338131.77      170723.76      8000         
+ 1e-06              29800          12   337633.87      172131.92      8000         
+ 1e-06              29900          12   337374.11      172433.2       8000         
+ 1e-06              30000          12   337379.48      171680.35      8000         
+ 1e-06              30100          12   337530.24      169927.53      8000         
+ 1e-06              30200          12   337706.33      167168.98      8000         
+ 1e-06              30300          12   337790.98      163426.76      8000         
+ 1e-06              30400          12   337697.64      158781.65      8000         
+ 1e-06              30500          12   337366.76      153367.47      8000         
+ 1e-06              30600          12   336755.07      147361.71      8000         
+ 1e-06              30700          12   335854.47      140976.25      8000         
+ 1e-06              30800          12   334662.2       134438.92      8000         
+ 1e-06              30900          12   333206.26      127982.95      8000         
+ 1e-06              31000          12   331464.93      121911.39      8000         
+ 1e-06              31100          12   329488.62      116351.57      8000         
+ 1e-06              31200          12   327373.17      111369.57      8000         
+ 1e-06              31300          12   325228.82      107014.37      8000         
+ 1e-06              31400          12   323169.4       103330.69      8000         
+ 1e-06              31500          12   321071.03      100385.5       8000         
+ 1e-06              31600          12   319031.38      98193.648      8000         
+ 1e-06              31700          12   317126.27      96712.557      8000         
+ 1e-06              31800          12   315438.37      95876.196      8000         
+ 1e-06              31900          12   313815.51      95565.683      8000         
+ 1e-06              32000          12   312239.38      95585.612      8000         
+ 1e-06              32100          12   310926.28      95828.27       8000         
+ 1e-06              32200          12   309948.5       96173.311      8000         
+ 1e-06              32300          12   309370.35      96442.454      8000         
+ 1e-06              32400          12   309201.18      96472.304      8000         
+ 1e-06              32500          12   309383.35      96140.953      8000         
+ 1e-06              32600          12   309865.73      95349.932      8000         
+ 1e-06              32700          12   310615.88      94029.061      8000         
+ 1e-06              32800          12   311624.15      92145.895      8000         
+ 1e-06              32900          12   312897.69      89717.564      8000         
+ 1e-06              33000          12   314451.54      86812.433      8000         
+ 1e-06              33100          12   316227.53      83544.302      8000         
+ 1e-06              33200          12   318247.7       80051.39       8000         
+ 1e-06              33300          12   320540.3       76478.862      8000         
+ 1e-06              33400          12   323095.15      72955.915      8000         
+ 1e-06              33500          12   325858.6       69576.683      8000         
+ 1e-06              33600          12   328744.8       66391.967      8000         
+ 1e-06              33700          12   331653.91      63413.798      8000         
+ 1e-06              33800          12   334464.83      60630.152      8000         
+ 1e-06              33900          12   337044.79      58019.718      8000         
+ 1e-06              34000          12   339236.01      55565.231      8000         
+ 1e-06              34100          12   340903.65      53265.034      8000         
+ 1e-06              34200          12   341925.23      51135.512      8000         
+ 1e-06              34300          12   342206.93      49222.627      8000         
+ 1e-06              34400          12   341649.35      47586.207      8000         
+ 1e-06              34500          12   340176.56      46291.105      8000         
+ 1e-06              34600          12   337667.96      45393.826      8000         
+ 1e-06              34700          12   333648.9       44922.723      8000         
+ 1e-06              34800          12   328322.97      44898.764      8000         
+ 1e-06              34900          12   321776.97      45316.954      8000         
+ 1e-06              35000          12   314135.31      46151.059      8000         
+ 1e-06              35100          12   305598.53      47348.476      8000         
+ 1e-06              35200          12   296352.23      48877.862      8000         
+ 1e-06              35300          12   286690.37      50727.291      8000         
+ 1e-06              35400          12   276916.56      52890.03       8000         
+ 1e-06              35500          12   267324.34      55346.515      8000         
+ 1e-06              35600          12   258272.27      58117.818      8000         
+ 1e-06              35700          12   250043.55      61209.498      8000         
+ 1e-06              35800          12   242858.64      64607.799      8000         
+ 1e-06              35900          12   236841.89      68268.869      8000         
+ 1e-06              36000          12   232039.2       72109.967      8000         
+ 1e-06              36100          12   228474.09      76010.069      8000         
+ 1e-06              36200          12   226115.66      79808.944      8000         
+ 1e-06              36300          12   224858.91      83318.088      8000         
+ 1e-06              36400          12   224574.55      86340.123      8000         
+ 1e-06              36500          12   225062.39      88691.698      8000         
+ 1e-06              36600          12   226104.86      90211.829      8000         
+ 1e-06              36700          12   227493.83      90797.861      8000         
+ 1e-06              36800          12   229033.4       90446.128      8000         
+ 1e-06              36900          12   230503.16      89238.368      8000         
+ 1e-06              37000          12   231859.26      87294.45       8000         
+ 1e-06              37100          12   233083.51      84767.219      8000         
+ 1e-06              37200          12   234235.94      81827.155      8000         
+ 1e-06              37300          12   235402.6       78655.588      8000         
+ 1e-06              37400          12   236702.37      75432.209      8000         
+ 1e-06              37500          12   238271.62      72343.239      8000         
+ 1e-06              37600          12   240191.8       69493.919      8000         
+ 1e-06              37700          12   242533.8       66949.307      8000         
+ 1e-06              37800          12   245357.31      64751.161      8000         
+ 1e-06              37900          12   248707.62      62935.998      8000         
+ 1e-06              38000          12   252613.72      61551.838      8000         
+ 1e-06              38100          12   257052.41      60672.572      8000         
+ 1e-06              38200          12   261959.48      60392.157      8000         
+ 1e-06              38300          12   267252.31      60809.22       8000         
+ 1e-06              38400          12   272813.05      62005.004      8000         
+ 1e-06              38500          12   278498.77      64020.387      8000         
+ 1e-06              38600          12   284150.51      66835.207      8000         
+ 1e-06              38700          12   289604.09      70354.609      8000         
+ 1e-06              38800          12   294696.3       74392.688      8000         
+ 1e-06              38900          12   299233.72      78587.64       8000         
+ 1e-06              39000          12   303148.91      82674.175      8000         
+ 1e-06              39100          12   306708.36      86536.948      8000         
+ 1e-06              39200          12   309600.62      89862.067      8000         
+ 1e-06              39300          12   311700.43      92456.711      8000         
+ 1e-06              39400          12   312846.09      94218.029      8000         
+ 1e-06              39500          12   312996.55      95123.249      8000         
+ 1e-06              39600          12   311853.31      95204.167      8000         
+ 1e-06              39700          12   309573.91      94589.061      8000         
+ 1e-06              39800          12   306120.98      93445.743      8000         
+ 1e-06              39900          12   301697.58      91905.478      8000         
+ 1e-06              40000          12   296465.51      90172.857      8000         
+ 1e-06              40100          12   290548.45      88402.41       8000         
+ 1e-06              40200          12   284112.93      86721.642      8000         
+ 1e-06              40300          12   277419.21      85394.11       8000         
+ 1e-06              40400          12   270601.67      84327.103      8000         
+ 1e-06              40500          12   263782.18      83363.209      8000         
+ 1e-06              40600          12   257075.79      82350.637      8000         
+ 1e-06              40700          12   250587.81      81169.87       8000         
+ 1e-06              40800          12   244408.86      79765.108      8000         
+ 1e-06              40900          12   238613.12      78159.356      8000         
+ 1e-06              41000          12   233263.11      76444.561      8000         
+ 1e-06              41100          12   228410.45      74765.108      8000         
+ 1e-06              41200          12   224097.82      73288.059      8000         
+ 1e-06              41300          12   220360.61      72170.137      8000         
+ 1e-06              41400          12   217228.06      71527.798      8000         
+ 1e-06              41500          12   214724.04      71415.43       8000         
+ 1e-06              41600          12   212866.32      71815.969      8000         
+ 1e-06              41700          12   211661         72645.932      8000         
+ 1e-06              41800          12   211103.8       73762.785      8000         
+ 1e-06              41900          12   211177.1       74986.705      8000         
+ 1e-06              42000          12   211607.62      76028.195      8000         
+ 1e-06              42100          12   212363.62      76700.039      8000         
+ 1e-06              42200          12   213529.03      76938.547      8000         
+ 1e-06              42300          12   215008.35      76693.739      8000         
+ 1e-06              42400          12   216674.6       75984.606      8000         
+ 1e-06              42500          12   218453.89      74873.786      8000         
+ 1e-06              42600          12   220270.22      73459.542      8000         
+ 1e-06              42700          12   222044.1       71860.203      8000         
+ 1e-06              42800          12   223693.34      70197.611      8000         
+ 1e-06              42900          12   225131.14      68583.075      8000         
+ 1e-06              43000          12   226264.5       67108.382      8000         
+ 1e-06              43100          12   227001.96      65842.14       8000         
+ 1e-06              43200          12   227259.44      64830.806      8000         
+ 1e-06              43300          12   226966.5       64102.73       8000         
+ 1e-06              43400          12   226072.23      63673.107      8000         
+ 1e-06              43500          12   224550.01      63547.892      8000         
+ 1e-06              43600          12   222400.27      63725.546      8000         
+ 1e-06              43700          12   219649.08      64198.122      8000         
+ 1e-06              43800          12   216345.77      64940.248      8000         
+ 1e-06              43900          12   212561.67      65898.806      8000         
+ 1e-06              44000          12   208386.13      66994.934      8000         
+ 1e-06              44100          12   203921.52      68131.378      8000         
+ 1e-06              44200          12   199278.14      69204.386      8000         
+ 1e-06              44300          12   194569.44      70118.481      8000         
+ 1e-06              44400          12   189909.06      70810.594      8000         
+ 1e-06              44500          12   185407.95      71268.155      8000         
+ 1e-06              44600          12   181166.51      71493.826      8000         
+ 1e-06              44700          12   177276.76      71524.448      8000         
+ 1e-06              44800          12   173823.45      71423.226      8000         
+ 1e-06              44900          12   170886.41      71262.132      8000         
+ 1e-06              45000          12   168528.37      71107.616      8000         
+ 1e-06              45100          12   166817.96      70991.562      8000         
+ 1e-06              45200          12   165820.31      70920.666      8000         
+ 1e-06              45300          12   165574.99      70863.13       8000         
+ 1e-06              45400          12   166019.37      70710.029      8000         
+ 1e-06              45500          12   167147.87      70350.412      8000         
+ 1e-06              45600          12   169082.77      69657.821      8000         
+ 1e-06              45700          12   171945.43      68520.345      8000         
+ 1e-06              45800          12   175909.9       66883.643      8000         
+ 1e-06              45900          12   181195.22      64747.392      8000         
+ 1e-06              46000          12   188023.1       62178.63       8000         
+ 1e-06              46100          12   196442.27      59308.946      8000         
+ 1e-06              46200          12   206276.92      56311.64       8000         
+ 1e-06              46300          12   217187.2       53375.161      8000         
+ 1e-06              46400          12   228713.53      50677.575      8000         
+ 1e-06              46500          12   240278.4       48364.725      8000         
+ 1e-06              46600          12   251273.01      46535.827      8000         
+ 1e-06              46700          12   261407.3       45239.704      8000         
+ 1e-06              46800          12   270264.36      44479.416      8000         
+ 1e-06              46900          12   277578.87      44223.038      8000         
+ 1e-06              47000          12   283258         44417.235      8000         
+ 1e-06              47100          12   287156.44      45000.19       8000         
+ 1e-06              47200          12   289242.87      45911.176      8000         
+ 1e-06              47300          12   289597.36      47095.056      8000         
+ 1e-06              47400          12   288380.97      48501.691      8000         
+ 1e-06              47500          12   285805.13      50081.427      8000         
+ 1e-06              47600          12   282130.22      51780.572      8000         
+ 1e-06              47700          12   277128.82      53385.936      8000         
+ 1e-06              47800          12   271407.61      54873.773      8000         
+ 1e-06              47900          12   265556.17      56217.287      8000         
+ 1e-06              48000          12   260082.32      57338.782      8000         
+ 1e-06              48100          12   255399.15      58166.789      8000         
+ 1e-06              48200          12   251785.89      58653.934      8000         
+ 1e-06              48300          12   249277.86      58789.891      8000         
+ 1e-06              48400          12   247494.11      58609.44       8000         
+ 1e-06              48500          12   245660.12      58192.865      8000         
+ 1e-06              48600          12   242968.44      57656.856      8000         
+ 1e-06              48700          12   238968.69      57140.878      8000         
+ 1e-06              48800          12   233565.17      56788.953      8000         
+ 1e-06              48900          12   227411.99      56720.334      8000         
+ 1e-06              49000          12   221053.64      57003.014      8000         
+ 1e-06              49100          12   215153.96      57629.419      8000         
+ 1e-06              49200          12   210402.16      58574.859      8000         
+ 1e-06              49300          12   207216.41      59750.238      8000         
+ 1e-06              49400          12   205860.43      61020.818      8000         
+ 1e-06              49500          12   206382.26      62237.849      8000         
+ 1e-06              49600          12   208600.44      63264.958      8000         
+ 1e-06              49700          12   212280.2       63999.734      8000         
+ 1e-06              49800          12   217167.6       64388.533      8000         
+ 1e-06              49900          12   223002.61      64430.977      8000         
+Loop time of 0.997677 on 1 procs for 49900 steps with 12 atoms
+
+99.2% CPU use with 1 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 0.24529    | 0.24529    | 0.24529    |   0.0 | 24.59
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.0019929  | 0.0019929  | 0.0019929  |   0.0 |  0.20
+Output  | 0.0024977  | 0.0024977  | 0.0024977  |   0.0 |  0.25
+Modify  | 0.74056    | 0.74056    | 0.74056    |   0.0 | 74.23
+Other   |            | 0.007341   |            |       |  0.74
+
+Nlocal:             12 ave          12 max          12 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:              0 ave           0 max           0 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:             66 ave          66 max          66 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 66
+Ave neighs/atom = 5.5
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:00:01
diff --git a/lib/colvars/Makefile.common b/lib/colvars/Makefile.common
index f0282b8caf..9203f9d158 100644
--- a/lib/colvars/Makefile.common
+++ b/lib/colvars/Makefile.common
@@ -32,6 +32,7 @@ COLVARS_SRCS = \
         colvarbias_histogram_reweight_amd.cpp \
         colvarbias_meta.cpp \
         colvarbias_restraint.cpp \
+        colvarbias_opes.cpp \
         colvarcomp_alchlambda.cpp \
         colvarcomp_angles.cpp \
         colvarcomp_apath.cpp \
@@ -40,6 +41,7 @@ COLVARS_SRCS = \
         colvarcomp_distances.cpp \
         colvarcomp_gpath.cpp \
         colvarcomp_neuralnetwork.cpp \
+        colvarcomp_torchann.cpp \
         colvarcomp_combination.cpp \
         colvarcomp_protein.cpp \
         colvarcomp_rotations.cpp \
diff --git a/lib/colvars/Makefile.deps b/lib/colvars/Makefile.deps
index 61f0b1a335..82d36e53c4 100644
--- a/lib/colvars/Makefile.deps
+++ b/lib/colvars/Makefile.deps
@@ -52,6 +52,12 @@ $(COLVARS_OBJ_DIR)colvarbias_restraint.o: colvarbias_restraint.cpp \
  colvarproxy_tcl.h colvarproxy_volmaps.h colvarvalue.h \
  colvarbias_restraint.h colvarbias.h colvar.h colvarparse.h \
  colvarparams.h colvardeps.h
+$(COLVARS_OBJ_DIR)colvarbias_opes.o: colvarbias_opes.cpp \
+ colvarmodule.h colvars_version.h colvarproxy.h colvartypes.h \
+ ../../src/math_eigen_impl.h colvarproxy_io.h colvarproxy_system.h \
+ colvarproxy_tcl.h colvarproxy_volmaps.h colvarvalue.h \
+ colvarbias_opes.h colvarbias.h colvar.h colvarparse.h \
+ colvarparams.h colvardeps.h
 $(COLVARS_OBJ_DIR)colvarcomp_alchlambda.o: colvarcomp_alchlambda.cpp \
  colvarmodule.h colvars_version.h colvarvalue.h colvartypes.h \
  ../../src/math_eigen_impl.h colvar.h colvarparse.h colvarparams.h \
@@ -101,6 +107,11 @@ $(COLVARS_OBJ_DIR)colvarcomp_neuralnetwork.o: \
  colvarproxy.h colvarproxy_io.h colvarproxy_system.h colvarproxy_tcl.h \
  colvarproxy_volmaps.h colvar_geometricpath.h \
  colvar_neuralnetworkcompute.h
+$(COLVARS_OBJ_DIR)colvarcomp_torchann.o: \
+ colvarcomp_torchann.cpp colvarmodule.h colvars_version.h \
+ colvarvalue.h colvartypes.h colvarparse.h colvarparams.h colvar.h \
+ colvardeps.h colvarcomp.h colvarcomp_torchann.h colvaratoms.h colvarproxy.h colvarproxy_io.h \
+ colvarproxy_system.h colvarproxy_tcl.h 
 $(COLVARS_OBJ_DIR)colvarcomp_combination.o: colvarcomp_combination.cpp \
  colvarcomp.h colvarmodule.h colvars_version.h colvaratoms.h \
  colvarproxy.h colvartypes.h ../../src/math_eigen_impl.h colvarproxy_io.h \
@@ -127,7 +138,7 @@ $(COLVARS_OBJ_DIR)colvarcomp_volmaps.o: colvarcomp_volmaps.cpp \
  colvar_geometricpath.h
 $(COLVARS_OBJ_DIR)colvar.o: colvar.cpp colvarmodule.h colvars_version.h \
  colvarvalue.h colvartypes.h ../../src/math_eigen_impl.h colvarparse.h \
- colvarparams.h colvarcomp.h colvaratoms.h colvarproxy.h colvarproxy_io.h \
+ colvarparams.h colvarcomp.h colvarcomp_torchann.h colvaratoms.h colvarproxy.h colvarproxy_io.h \
  colvarproxy_system.h colvarproxy_tcl.h colvarproxy_volmaps.h \
  colvardeps.h colvar.h colvar_geometricpath.h colvarbias.h \
  colvars_memstream.h
@@ -152,7 +163,8 @@ $(COLVARS_OBJ_DIR)colvarmodule.o: colvarmodule.cpp colvarmodule.h \
  colvarbias_histogram_reweight_amd.h colvarbias_meta.h colvarscript.h \
  colvarscript_commands.h colvarscript_commands_colvar.h \
  colvarscript_commands_bias.h colvaratoms.h colvarcomp.h \
- colvar_geometricpath.h colvars_memstream.h colvarmodule_refs.h
+ colvar_geometricpath.h colvars_memstream.h colvarmodule_refs.h \
+ colvarbias_opes.h
 $(COLVARS_OBJ_DIR)colvarparams.o: colvarparams.cpp colvarmodule.h \
  colvars_version.h colvarvalue.h colvartypes.h \
  ../../src/math_eigen_impl.h colvarparams.h
diff --git a/lib/colvars/colvar.cpp b/lib/colvars/colvar.cpp
index 58eb87fd0e..136aa2afd5 100644
--- a/lib/colvars/colvar.cpp
+++ b/lib/colvars/colvar.cpp
@@ -21,6 +21,7 @@
 #include "colvarbias.h"
 #include "colvars_memstream.h"
 
+#include "colvarcomp_torchann.h"
 
 std::map<std::string, std::function<colvar::cvc *()>> colvar::global_cvc_map =
     std::map<std::string, std::function<colvar::cvc *()>>();
@@ -95,6 +96,12 @@ int colvar::init(std::string const &conf)
   if (error_code != COLVARS_OK) {
     return cvm::get_error();
   }
+#else
+  if (key_lookup(conf, "customFunction")) {
+    return cvm::error(
+        "Error: customFunction keyword is used, but the Lepton library is not available.\n",
+        COLVARS_NOT_IMPLEMENTED);
+  }
 #endif
 
   // Setup colvar as scripted function of components
@@ -175,12 +182,6 @@ int colvar::init(std::string const &conf)
 
   set_enabled(f_cv_scalar, (value().type() == colvarvalue::type_scalar));
 
-  // If using scripted biases, any colvar may receive bias forces
-  // and will need its gradient
-  if (cvm::scripted_forces()) {
-    enable(f_cv_gradient);
-  }
-
   // check for linear combinations
   {
     bool lin = !(is_enabled(f_cv_scripted) || is_enabled(f_cv_custom_function));
@@ -311,9 +312,27 @@ int colvar::init(std::string const &conf)
   // Detect if we have a single component that is an alchemical lambda
   if (is_enabled(f_cv_single_cvc) && cvcs[0]->function_type() == "alchLambda") {
     enable(f_cv_external);
+
+    static_cast<colvar::alch_lambda *>(cvcs[0].get())->init_alchemy(time_step_factor);
+  }
+
+  // If using scripted biases, any colvar may receive bias forces
+  if (cvm::scripted_forces()) {
+    enable(f_cv_apply_force);
   }
 
   error_code |= init_extended_Lagrangian(conf);
+
+  // when total atomic forces are obtained from the previous time step,
+  // we cannot (currently) have colvar values and projected total forces for the same timestep
+  // (that would require anticipating the total force request by one timestep)
+  // i.e. the combination of f_cv_total_force_calc and f_cv_multiple_ts requires f_cv_total_force_current_step
+  // Because f_cv_total_force_current_step is static, we can hard-code this, once other features are set
+  // that is f_cv_external and f_cv_extended_Lagrangian
+  if (!is_enabled(f_cv_total_force_current_step)) {
+    exclude_feature_self(f_cv_multiple_ts, f_cv_total_force_calc);
+  }
+
   error_code |= init_output_flags(conf);
 
   // Now that the children are defined we can solve dependencies
@@ -495,8 +514,6 @@ int colvar::init_grid_parameters(std::string const &conf)
 {
   int error_code = COLVARS_OK;
 
-  colvarmodule *cv = cvm::main();
-
   cvm::real default_width = width;
 
   if (!key_already_set("width")) {
@@ -522,34 +539,68 @@ int colvar::init_grid_parameters(std::string const &conf)
 
   if (is_enabled(f_cv_scalar)) {
 
-    if (is_enabled(f_cv_single_cvc)) {
-      // Get the default boundaries from the component
+    // Record the CVC's intrinsic boundaries, and set them as default values for the user's choice
+    colvarvalue cvc_lower_boundary, cvc_upper_boundary;
+
+    if (is_enabled(f_cv_single_cvc)) { // Get the intrinsic boundaries of the CVC
+
       if (cvcs[0]->is_enabled(f_cvc_lower_boundary)) {
         enable(f_cv_lower_boundary);
         enable(f_cv_hard_lower_boundary);
-        lower_boundary =
+        lower_boundary = cvc_lower_boundary =
           *(reinterpret_cast<colvarvalue const *>(cvcs[0]->get_param_ptr("lowerBoundary")));
       }
+
       if (cvcs[0]->is_enabled(f_cvc_upper_boundary)) {
         enable(f_cv_upper_boundary);
         enable(f_cv_hard_upper_boundary);
-        upper_boundary =
-          *(reinterpret_cast<colvarvalue const *>(cvcs[0]->get_param_ptr("upperBoundary")));
+        upper_boundary = cvc_upper_boundary =
+            *(reinterpret_cast<colvarvalue const *>(cvcs[0]->get_param_ptr("upperBoundary")));
       }
     }
 
     if (get_keyval(conf, "lowerBoundary", lower_boundary, lower_boundary)) {
       enable(f_cv_lower_boundary);
-      // Because this is the user's choice, we cannot assume it is a true
-      // physical boundary
-      disable(f_cv_hard_lower_boundary);
+      if (is_enabled(f_cv_single_cvc) && is_enabled(f_cv_hard_lower_boundary)) {
+        if (cvm::sqrt(dist2(lower_boundary, cvc_lower_boundary))/width > colvar_boundaries_tol)  {
+          // The user choice is different from the CVC's default
+          disable(f_cv_hard_lower_boundary);
+        }
+      }
     }
 
     if (get_keyval(conf, "upperBoundary", upper_boundary, upper_boundary)) {
       enable(f_cv_upper_boundary);
-      disable(f_cv_hard_upper_boundary);
+      if (is_enabled(f_cv_single_cvc) && is_enabled(f_cv_hard_upper_boundary)) {
+        if (cvm::sqrt(dist2(upper_boundary, cvc_upper_boundary))/width > colvar_boundaries_tol)  {
+          disable(f_cv_hard_upper_boundary);
+        }
+      }
     }
 
+    get_keyval_feature(this, conf, "hardLowerBoundary", f_cv_hard_lower_boundary,
+                       is_enabled(f_cv_hard_lower_boundary));
+
+    get_keyval_feature(this, conf, "hardUpperBoundary", f_cv_hard_upper_boundary,
+                       is_enabled(f_cv_hard_upper_boundary));
+
+    get_keyval(conf, "expandBoundaries", expand_boundaries, expand_boundaries);
+
+    error_code |= parse_legacy_wall_params(conf);
+    error_code |= check_grid_parameters();
+  }
+
+  return error_code;
+}
+
+
+int colvar::parse_legacy_wall_params(std::string const &conf)
+{
+  int error_code = COLVARS_OK;
+  colvarmodule *cv = cvm::main();
+
+  if (is_enabled(f_cv_scalar)) {
+
     // Parse legacy wall options and set up a harmonicWalls bias if needed
     cvm::real lower_wall_k = 0.0, upper_wall_k = 0.0;
     cvm::real lower_wall = 0.0, upper_wall = 0.0;
@@ -603,13 +654,14 @@ harmonicWalls {\n\
     }
   }
 
-  get_keyval_feature(this, conf, "hardLowerBoundary", f_cv_hard_lower_boundary,
-                     is_enabled(f_cv_hard_lower_boundary));
+  return error_code;
+}
 
-  get_keyval_feature(this, conf, "hardUpperBoundary", f_cv_hard_upper_boundary,
-                     is_enabled(f_cv_hard_upper_boundary));
 
-  // consistency checks for boundaries and walls
+int colvar::check_grid_parameters()
+{
+  int error_code = COLVARS_OK;
+
   if (is_enabled(f_cv_lower_boundary) && is_enabled(f_cv_upper_boundary)) {
     if (lower_boundary >= upper_boundary) {
       error_code |= cvm::error("Error: the upper boundary, "+
@@ -620,7 +672,6 @@ harmonicWalls {\n\
     }
   }
 
-  get_keyval(conf, "expandBoundaries", expand_boundaries, expand_boundaries);
   if (expand_boundaries && periodic_boundaries()) {
     error_code |= cvm::error("Error: trying to expand boundaries that already "
                              "cover a whole period of a periodic colvar.\n",
@@ -654,14 +705,15 @@ int colvar::init_extended_Lagrangian(std::string const &conf)
     x_ext.type(colvarvalue::type_notset);
     v_ext.type(value());
     fr.type(value());
-    const bool temp_provided = get_keyval(conf, "extendedTemp", temp,
-                                          proxy->target_temperature());
+    const bool temp_provided = get_keyval(conf, "extendedTemp", temp, proxy->target_temperature());
     if (is_enabled(f_cv_external)) {
-      // In the case of an "external" coordinate, there is no coupling potential:
+      // In the case of a driven external parameter in the back-end, there is no coupling potential:
       // only the fictitious mass is meaningful
       get_keyval(conf, "extendedMass", ext_mass);
       // Ensure that the computed restraint energy term is zero
       ext_force_k = 0.0;
+      // Then we need forces from the back-end
+      enable(f_cv_total_force_calc);
     } else {
       // Standard case of coupling to a geometric colvar
       if (temp <= 0.0) { // Then a finite temperature is required
@@ -779,6 +831,7 @@ int colvar::init_components_type(const std::string& conf, const char* def_config
                            &def_conf,
                            &pos) ) {
 
+    cvm::increase_depth();
     cvm::log("Initializing "
              "a new \""+std::string(def_config_key)+"\" component"+
              (cvm::debug() ? ", with configuration:\n"+def_conf
@@ -791,7 +844,6 @@ int colvar::init_components_type(const std::string& conf, const char* def_config
     }
     cvcs.push_back(std::shared_ptr<colvar::cvc>(cvcp));
 
-    cvm::increase_depth();
     int error_code_this = cvcp->init(def_conf);
     if (error_code_this == COLVARS_OK) {
       // Checking for invalid keywords only if the parsing was successful, otherwise any
@@ -851,12 +903,8 @@ void colvar::define_component_types()
   add_component_type<dipole_angle>("dipole angle", "dipoleAngle");
   add_component_type<dihedral>("dihedral", "dihedral");
   add_component_type<h_bond>("hydrogen bond", "hBond");
-
-  if (proxy->check_atom_name_selections_available() == COLVARS_OK) {
-    add_component_type<alpha_angles>("alpha helix", "alpha");
-    add_component_type<dihedPC>("dihedral principal component", "dihedralPC");
-  }
-
+  add_component_type<alpha_angles>("alpha helix", "alpha");
+  add_component_type<dihedPC>("dihedral principal component", "dihedralPC");
   add_component_type<orientation>("orientation", "orientation");
   add_component_type<orientation_angle>("orientation angle", "orientationAngle");
   add_component_type<orientation_proj>("orientation projection", "orientationProj");
@@ -888,6 +936,8 @@ void colvar::define_component_types()
 
   add_component_type<neuralNetwork>("neural network CV for other CVs", "neuralNetwork");
 
+  add_component_type<torchANN>("CV defined by PyTorch artifical neural network models", "torchANN");
+
   if (proxy->check_volmaps_available() == COLVARS_OK) {
     add_component_type<map_total>("total value of atomic map", "mapTotal");
   }
@@ -1098,6 +1148,9 @@ int colvar::init_dependencies() {
     init_feature(f_cv_gradient, "gradient", f_type_dynamic);
     require_feature_children(f_cv_gradient, f_cvc_gradient);
 
+    init_feature(f_cv_apply_force, "apply_force", f_type_dynamic);
+    require_feature_alt(f_cv_apply_force, f_cv_gradient, f_cv_external);
+
     init_feature(f_cv_collect_gradient, "collect_gradient", f_type_dynamic);
     require_feature_self(f_cv_collect_gradient, f_cv_gradient);
     require_feature_self(f_cv_collect_gradient, f_cv_scalar);
@@ -1116,6 +1169,10 @@ int colvar::init_dependencies() {
     init_feature(f_cv_total_force, "total_force", f_type_dynamic);
     require_feature_alt(f_cv_total_force, f_cv_extended_Lagrangian, f_cv_total_force_calc);
 
+    // If this is active, the total force reported to biases (ABF / TI) is from the current step
+    // therefore it does not include Colvars biases -> it is a "system force"
+    init_feature(f_cv_total_force_current_step, "total_force_current_step", f_type_dynamic);
+
     // Deps for explicit total force calculation
     init_feature(f_cv_total_force_calc, "total_force_calculation", f_type_dynamic);
     require_feature_self(f_cv_total_force_calc, f_cv_scalar);
@@ -1134,13 +1191,15 @@ int colvar::init_dependencies() {
 
     init_feature(f_cv_extended_Lagrangian, "extended_Lagrangian", f_type_user);
     require_feature_self(f_cv_extended_Lagrangian, f_cv_scalar);
-    require_feature_self(f_cv_extended_Lagrangian, f_cv_gradient);
+    require_feature_self(f_cv_extended_Lagrangian, f_cv_apply_force);
 
     init_feature(f_cv_Langevin, "Langevin_dynamics", f_type_user);
     require_feature_self(f_cv_Langevin, f_cv_extended_Lagrangian);
 
-    init_feature(f_cv_external, "external", f_type_user);
+    init_feature(f_cv_external, "external_parameter", f_type_static);
     require_feature_self(f_cv_external, f_cv_single_cvc);
+    // External parameters always report the total force for current step
+    require_feature_self(f_cv_external, f_cv_total_force_current_step);
 
     init_feature(f_cv_single_cvc, "single_component", f_type_static);
 
@@ -1201,10 +1260,7 @@ int colvar::init_dependencies() {
     init_feature(f_cv_linear, "linear", f_type_static);
     init_feature(f_cv_homogeneous, "homogeneous", f_type_static);
 
-    // because total forces are obtained from the previous time step,
-    // we cannot (currently) have colvar values and total forces for the same timestep
     init_feature(f_cv_multiple_ts, "multiple_timestep", f_type_static);
-    exclude_feature_self(f_cv_multiple_ts, f_cv_total_force_calc);
 
     // check that everything is initialized
     for (i = 0; i < colvardeps::f_cv_ntot; i++) {
@@ -1225,6 +1281,10 @@ int colvar::init_dependencies() {
   feature_states[f_cv_fdiff_velocity].available =
     cvm::main()->proxy->simulation_running();
 
+  // Some back-ends report current total forces for all colvars
+  if (cvm::main()->proxy->total_forces_same_step())
+    enable(f_cv_total_force_current_step);
+
   return COLVARS_OK;
 }
 
@@ -1351,7 +1411,6 @@ int colvar::calc_cvcs(int first_cvc, size_t num_cvcs)
     cvm::log("Calculating colvar \""+this->name+"\", components "+
              cvm::to_str(first_cvc)+" through "+cvm::to_str(first_cvc+num_cvcs)+".\n");
 
-  colvarproxy *proxy = cvm::main()->proxy;
   int error_code = COLVARS_OK;
 
   error_code |= check_cvc_range(first_cvc, num_cvcs);
@@ -1359,7 +1418,7 @@ int colvar::calc_cvcs(int first_cvc, size_t num_cvcs)
     return error_code;
   }
 
-  if ((cvm::step_relative() > 0) && (!proxy->total_forces_same_step())){
+  if ((cvm::step_relative() > 0) && (!is_enabled(f_cv_total_force_current_step))){
     // Use Jacobian derivative from previous timestep
     error_code |= calc_cvc_total_force(first_cvc, num_cvcs);
   }
@@ -1367,7 +1426,7 @@ int colvar::calc_cvcs(int first_cvc, size_t num_cvcs)
   error_code |= calc_cvc_values(first_cvc, num_cvcs);
   error_code |= calc_cvc_gradients(first_cvc, num_cvcs);
   error_code |= calc_cvc_Jacobians(first_cvc, num_cvcs);
-  if (proxy->total_forces_same_step()){
+  if (is_enabled(f_cv_total_force_current_step)){
     // Use Jacobian derivative from this timestep
     error_code |= calc_cvc_total_force(first_cvc, num_cvcs);
   }
@@ -1384,10 +1443,9 @@ int colvar::collect_cvc_data()
   if (cvm::debug())
     cvm::log("Calculating colvar \""+this->name+"\"'s properties.\n");
 
-  colvarproxy *proxy = cvm::main()->proxy;
   int error_code = COLVARS_OK;
 
-  if ((cvm::step_relative() > 0) && (!proxy->total_forces_same_step())){
+  if ((cvm::step_relative() > 0) && (!is_enabled(f_cv_total_force_current_step))){
     // Total force depends on Jacobian derivative from previous timestep
     // collect_cvc_total_forces() uses the previous value of jd
     error_code |= collect_cvc_total_forces();
@@ -1395,7 +1453,7 @@ int colvar::collect_cvc_data()
   error_code |= collect_cvc_values();
   error_code |= collect_cvc_gradients();
   error_code |= collect_cvc_Jacobians();
-  if (proxy->total_forces_same_step()){
+  if (is_enabled(f_cv_total_force_current_step)){
     // Use Jacobian derivative from this timestep
     error_code |= collect_cvc_total_forces();
   }
@@ -1609,22 +1667,20 @@ int colvar::collect_cvc_total_forces()
   if (is_enabled(f_cv_total_force_calc)) {
     ft.reset();
 
-    if (cvm::step_relative() > 0) {
-      // get from the cvcs the total forces from the PREVIOUS step
-      for (size_t i = 0; i < cvcs.size();  i++) {
-        if (!cvcs[i]->is_enabled()) continue;
-            if (cvm::debug())
-            cvm::log("Colvar component no. "+cvm::to_str(i+1)+
-                " within colvar \""+this->name+"\" has total force "+
-                cvm::to_str((cvcs[i])->total_force(),
-                cvm::cv_width, cvm::cv_prec)+".\n");
-        // linear combination is assumed
-        ft += (cvcs[i])->total_force() * (cvcs[i])->sup_coeff / active_cvc_square_norm;
-      }
+    for (size_t i = 0; i < cvcs.size();  i++) {
+      if (!cvcs[i]->is_enabled()) continue;
+          if (cvm::debug())
+          cvm::log("Colvar component no. "+cvm::to_str(i+1)+
+              " within colvar \""+this->name+"\" has total force "+
+              cvm::to_str((cvcs[i])->total_force(),
+              cvm::cv_width, cvm::cv_prec)+".\n");
+      // linear combination is assumed
+      ft += (cvcs[i])->total_force() * (cvcs[i])->sup_coeff / active_cvc_square_norm;
     }
 
     if (!(is_enabled(f_cv_hide_Jacobian) && is_enabled(f_cv_subtract_applied_force))) {
-      // add the Jacobian force to the total force, and don't apply any silent
+      // This is by far the most common case
+      // Add the Jacobian force to the total force, and don't apply any silent
       // correction internally: biases such as colvarbias_abf will handle it
       // If f_cv_hide_Jacobian is enabled, a force of -fj is present in ft due to the
       // Jacobian-compensating force
@@ -1632,6 +1688,10 @@ int colvar::collect_cvc_total_forces()
     }
   }
 
+  if (is_enabled(f_cv_total_force_current_step)) {
+   // Report total force value without waiting for calc_colvar_properties()
+    ft_reported = ft;
+  }
   return COLVARS_OK;
 }
 
@@ -1733,12 +1793,15 @@ int colvar::calc_colvar_properties()
     // But we report values at the beginning of the timestep (value at t=0 on the first timestep)
     x_reported = x_ext;
     v_reported = v_ext;
-    // the "total force" with the extended Lagrangian is
-    // calculated in update_forces_energy() below
 
+    // the "total force" for the extended Lagrangian is calculated in update_forces_energy() below
+    // A future improvement could compute a "system force" here, borrowing a part of update_extended_Lagrangian()
+    // this would change the behavior of eABF with respect to other biases
+    // by enabling f_cv_total_force_current_step, and reducing the total force to a system force
+    // giving the behavior of f_cv_subtract_applied_force - this is correct for WTM-eABF etc.
   } else {
 
-    if (is_enabled(f_cv_subtract_applied_force)) {
+    if (is_enabled(f_cv_subtract_applied_force) && !cvm::proxy->total_forces_same_step()) {
       // correct the total force only if it has been measured
       // TODO add a specific test instead of relying on sq norm
       if (ft.norm2() > 0.0) {
@@ -1825,7 +1888,8 @@ void colvar::update_extended_Lagrangian()
   // Integrate with slow timestep (if time_step_factor != 1)
   cvm::real dt = cvm::dt() * cvm::real(time_step_factor);
 
-  colvarvalue f_ext(fr.type()); // force acting on the extended variable
+  // Force acting on the extended variable
+  colvarvalue f_ext(fr.type());
   f_ext.reset();
 
   if (is_enabled(f_cv_external)) {
@@ -1834,13 +1898,13 @@ void colvar::update_extended_Lagrangian()
     f += fb_actual;
   }
 
-  // fr: bias force on extended variable (without harmonic spring), for output in trajectory
-  fr = f;
-
   // External force has been scaled for an inner-timestep impulse (for the back-end integrator)
   // here we scale it back because this integrator uses only the outer (long) timestep
   f_ext = f / cvm::real(time_step_factor);
 
+  // fr: bias force on extended variable (without harmonic spring), for output in trajectory
+  fr = f_ext;
+
   colvarvalue f_system(fr.type()); // force exterted by the system on the extended DOF
 
   if (is_enabled(f_cv_external)) {
@@ -1863,14 +1927,18 @@ void colvar::update_extended_Lagrangian()
   }
   f_ext += f_system;
 
-  if (is_enabled(f_cv_subtract_applied_force)) {
-    // Report a "system" force without the biases on this colvar
-    // that is, just the spring force (or alchemical force)
-    ft_reported = f_system;
-  } else {
-    // The total force acting on the extended variable is f_ext
-    // This will be used in the next timestep
-    ft_reported = f_ext;
+  if ( ! is_enabled(f_cv_total_force_current_step)) {
+    if (is_enabled(f_cv_subtract_applied_force)) {
+      // Report a "system" force without the biases on this colvar
+      // that is, just the spring force (or alchemical force)
+      ft_reported = f_system;
+    } else {
+      // The total force acting on the extended variable is f_ext
+      // This will be used in the next timestep
+      ft_reported = f_ext;
+    }
+    // Since biases have already been updated, this ft_reported will only be
+    // communicated to biases at the next timestep
   }
 
   // backup in case we need to revert this integration timestep
@@ -2184,12 +2252,10 @@ int colvar::set_cvc_param(std::string const &param_name, void const *new_value)
 bool colvar::periodic_boundaries(colvarvalue const &lb, colvarvalue const &ub) const
 {
   if (period > 0.0) {
-    if ( ((cvm::sqrt(this->dist2(lb, ub))) / this->width)
-         < 1.0E-10 ) {
+    if (((cvm::sqrt(this->dist2(lb, ub))) / this->width) < colvar_boundaries_tol) {
       return true;
     }
   }
-
   return false;
 }
 
@@ -2347,6 +2413,11 @@ int colvar::set_state_params(std::string const &conf)
              cvm::to_str(x)+"\n");
     x_restart = x;
     after_restart = true;
+    // Externally driven cv (e.g. alchemical lambda) is imposed by restart value
+    if (is_enabled(f_cv_external) && is_enabled(f_cv_extended_Lagrangian)) {
+      // Request immediate sync of driven parameter to back-end code
+      cvcs[0]->set_value(x, true);
+    }
   }
 
   if (is_enabled(f_cv_extended_Lagrangian)) {
@@ -2489,8 +2560,14 @@ std::string const colvar::get_state_params() const
   os << "  name " << name << "\n"
      << "  x "
      << std::setprecision(cvm::cv_prec)
-     << std::setw(cvm::cv_width)
-     << x << "\n";
+     << std::setw(cvm::cv_width);
+  if (is_enabled(f_cv_external) && is_enabled(f_cv_extended_Lagrangian)) {
+    // For an external colvar, x is one timestep in the future after integration
+    // write x at beginning of timestep
+    os << x_reported << "\n";
+  } else {
+    os << x << "\n";
+  }
 
   if (is_enabled(f_cv_output_velocity)) {
     os << "  v "
diff --git a/lib/colvars/colvar.h b/lib/colvars/colvar.h
index 443e1e4bdd..1db313f416 100644
--- a/lib/colvars/colvar.h
+++ b/lib/colvars/colvar.h
@@ -263,6 +263,12 @@ public:
   /// Init defaults for grid options
   int init_grid_parameters(std::string const &conf);
 
+  /// Consistency check for the grid paramaters
+  int check_grid_parameters();
+
+  /// Read legacy wall keyword (these are biases now)
+  int parse_legacy_wall_params(std::string const &conf);
+
   /// Init extended Lagrangian parameters
   int init_extended_Lagrangian(std::string const &conf);
 
@@ -633,6 +639,7 @@ public:
   class euler_psi;
   class euler_theta;
   class neuralNetwork;
+  class torchANN;
   class customColvar;
 
   // non-scalar components
@@ -753,7 +760,7 @@ inline colvarvalue const & colvar::total_force() const
 
 inline void colvar::add_bias_force(colvarvalue const &force)
 {
-  check_enabled(f_cv_gradient,
+  check_enabled(f_cv_apply_force,
                 std::string("applying a force to the variable \""+name+"\""));
   if (cvm::debug()) {
     cvm::log("Adding biasing force "+cvm::to_str(force)+" to colvar \""+name+"\".\n");
@@ -778,4 +785,10 @@ inline void colvar::reset_bias_force() {
   fb_actual.reset();
 }
 
+
+namespace {
+  // Tolerance parameter to decide when two boundaries coincide
+  constexpr cvm::real colvar_boundaries_tol = 1.0e-10;
+}
+
 #endif
diff --git a/lib/colvars/colvar_rotation_derivative.h b/lib/colvars/colvar_rotation_derivative.h
index 50f4f1aa97..ceaf728c36 100644
--- a/lib/colvars/colvar_rotation_derivative.h
+++ b/lib/colvars/colvar_rotation_derivative.h
@@ -5,11 +5,21 @@
 #include <type_traits>
 #include <cstring>
 
+#ifndef _noalias
+#if defined(__INTEL_COMPILER) || (defined(__PGI) && !defined(__NVCOMPILER))
+#define _noalias restrict
+#elif defined(__GNUC__) || defined(__INTEL_LLVM_COMPILER) || defined(__NVCOMPILER)
+#define _noalias __restrict
+#else
+#define _noalias
+#endif
+#endif
+
 /// \brief Helper function for loading the ia-th atom in the vector pos to x, y and z (C++11 SFINAE is used)
 template <typename T, typename std::enable_if<std::is_same<T, cvm::atom_pos>::value, bool>::type = true>
 inline void read_atom_coord(
   size_t ia, const std::vector<T>& pos,
-  cvm::real* x, cvm::real* y, cvm::real* z) {
+  cvm::real* _noalias x, cvm::real* _noalias y, cvm::real* _noalias z) {
   *x = pos[ia].x;
   *y = pos[ia].y;
   *z = pos[ia].z;
@@ -18,7 +28,7 @@ inline void read_atom_coord(
 template <typename T, typename std::enable_if<std::is_same<T, cvm::atom>::value, bool>::type = true>
 inline void read_atom_coord(
   size_t ia, const std::vector<T>& pos,
-  cvm::real* x, cvm::real* y, cvm::real* z) {
+  cvm::real* _noalias x, cvm::real* _noalias y, cvm::real* _noalias z) {
   *x = pos[ia].pos.x;
   *y = pos[ia].pos.y;
   *z = pos[ia].pos.z;
@@ -26,9 +36,9 @@ inline void read_atom_coord(
 
 /// \brief Helper enum class for specifying options in rotation_derivative::prepare_derivative
 enum class rotation_derivative_dldq {
-  /// Require the derivative of the leading eigenvalue with respect to the atom coordinats
+  /// Require the derivative of the leading eigenvalue with respect to the atom coordinates
   use_dl = 1 << 0,
-  /// Require the derivative of the leading eigenvector with respect to the atom coordinats
+  /// Require the derivative of the leading eigenvector with respect to the atom coordinates
   use_dq = 1 << 1
 };
 
@@ -327,12 +337,13 @@ struct rotation_derivative {
     *  @param[out] dq0_out The output of derivative of Q
     *  @param[out] ds_out  The output of derivative of overlap matrix S
     */
+  template <bool use_dl, bool use_dq, bool use_ds>
   void calc_derivative_impl(
     const cvm::rvector (&ds)[4][4],
-    cvm::rvector* const dl0_out,
-    cvm::vector1d<cvm::rvector>* const dq0_out,
-    cvm::matrix2d<cvm::rvector>* const ds_out) const {
-    if (ds_out != nullptr) {
+    cvm::rvector* _noalias const dl0_out,
+    cvm::vector1d<cvm::rvector>* _noalias const dq0_out,
+    cvm::matrix2d<cvm::rvector>* _noalias const ds_out) const {
+    if (use_ds) {
       // this code path is for debug_gradients, so not necessary to unroll the loop
       *ds_out = cvm::matrix2d<cvm::rvector>(4, 4);
       for (int i = 0; i < 4; ++i) {
@@ -341,7 +352,7 @@ struct rotation_derivative {
         }
       }
     }
-    if (dl0_out != nullptr) {
+    if (use_dl) {
       /* manually loop unrolling of the following loop:
         dl0_1.reset();
         for (size_t i = 0; i < 4; i++) {
@@ -367,7 +378,7 @@ struct rotation_derivative {
                  tmp_Q0Q0[3][2] * ds[3][2] +
                  tmp_Q0Q0[3][3] * ds[3][3];
     }
-    if (dq0_out != nullptr) {
+    if (use_dq) {
       // we can skip this check if a fixed-size array is used
       if (dq0_out->size() != 4) dq0_out->resize(4);
       /* manually loop unrolling of the following loop:
@@ -462,32 +473,21 @@ struct rotation_derivative {
     *  @param[out] ds_1_out  The output of derivative of overlap matrix S with
     *                        respect to ia-th atom of group 1
     */
+  template <bool use_dl, bool use_dq, bool use_ds>
   void calc_derivative_wrt_group1(
-    size_t ia, cvm::rvector* const dl0_1_out = nullptr,
-    cvm::vector1d<cvm::rvector>* const dq0_1_out = nullptr,
-    cvm::matrix2d<cvm::rvector>* const ds_1_out = nullptr) const {
-      if (dl0_1_out == nullptr && dq0_1_out == nullptr) return;
+    size_t ia, cvm::rvector* _noalias const dl0_1_out = nullptr,
+    cvm::vector1d<cvm::rvector>* _noalias const dq0_1_out = nullptr,
+    cvm::matrix2d<cvm::rvector>* _noalias const ds_1_out = nullptr) const {
+      // if (dl0_1_out == nullptr && dq0_1_out == nullptr) return;
       cvm::real a2x, a2y, a2z;
       // we can get rid of the helper function read_atom_coord if C++17 (constexpr) is available
       read_atom_coord(ia, m_pos2, &a2x, &a2y, &a2z);
-      cvm::rvector ds_1[4][4];
-      ds_1[0][0].set( a2x,  a2y,  a2z);
-      ds_1[1][0].set( 0.0,  a2z, -a2y);
-      ds_1[0][1] = ds_1[1][0];
-      ds_1[2][0].set(-a2z,  0.0,  a2x);
-      ds_1[0][2] = ds_1[2][0];
-      ds_1[3][0].set( a2y, -a2x,  0.0);
-      ds_1[0][3] = ds_1[3][0];
-      ds_1[1][1].set( a2x, -a2y, -a2z);
-      ds_1[2][1].set( a2y,  a2x,  0.0);
-      ds_1[1][2] = ds_1[2][1];
-      ds_1[3][1].set( a2z,  0.0,  a2x);
-      ds_1[1][3] = ds_1[3][1];
-      ds_1[2][2].set(-a2x,  a2y, -a2z);
-      ds_1[3][2].set( 0.0,  a2z,  a2y);
-      ds_1[2][3] = ds_1[3][2];
-      ds_1[3][3].set(-a2x, -a2y,  a2z);
-      calc_derivative_impl(ds_1, dl0_1_out, dq0_1_out, ds_1_out);
+      const cvm::rvector ds_1[4][4] = {
+        {{ a2x,  a2y,  a2z}, { 0.0, a2z,  -a2y}, {-a2z,  0.0,  a2x}, { a2y, -a2x,  0.0}},
+        {{ 0.0,  a2z, -a2y}, { a2x, -a2y, -a2z}, { a2y,  a2x,  0.0}, { a2z,  0.0,  a2x}},
+        {{-a2z,  0.0,  a2x}, { a2y,  a2x,  0.0}, {-a2x,  a2y, -a2z}, { 0.0,  a2z,  a2y}},
+        {{ a2y, -a2x,  0.0}, { a2z,  0.0,  a2x}, { 0.0,  a2z,  a2y}, {-a2x, -a2y,  a2z}}};
+      calc_derivative_impl<use_dl, use_dq, use_ds>(ds_1, dl0_1_out, dq0_1_out, ds_1_out);
     }
   /*! @brief Calculate the derivatives of S, the leading eigenvalue L and
     *         the leading eigenvector Q with respect to `m_pos2`
@@ -499,32 +499,21 @@ struct rotation_derivative {
     *  @param[out] ds_2_out  The output of derivative of overlap matrix S with
     *                        respect to ia-th atom of group 2
     */
+  template <bool use_dl, bool use_dq, bool use_ds>
   void calc_derivative_wrt_group2(
-    size_t ia, cvm::rvector* const dl0_2_out = nullptr,
-    cvm::vector1d<cvm::rvector>* const dq0_2_out = nullptr,
-    cvm::matrix2d<cvm::rvector>* const ds_2_out = nullptr) const {
-    if (dl0_2_out == nullptr && dq0_2_out == nullptr) return;
+    size_t ia, cvm::rvector* _noalias const dl0_2_out = nullptr,
+    cvm::vector1d<cvm::rvector>* _noalias const dq0_2_out = nullptr,
+    cvm::matrix2d<cvm::rvector>* _noalias const ds_2_out = nullptr) const {
+    // if (dl0_2_out == nullptr && dq0_2_out == nullptr) return;
     cvm::real a1x, a1y, a1z;
     // we can get rid of the helper function read_atom_coord if C++17 (constexpr) is available
     read_atom_coord(ia, m_pos1, &a1x, &a1y, &a1z);
-    cvm::rvector ds_2[4][4];
-    ds_2[0][0].set( a1x,  a1y,  a1z);
-    ds_2[1][0].set( 0.0, -a1z,  a1y);
-    ds_2[0][1] = ds_2[1][0];
-    ds_2[2][0].set( a1z,  0.0, -a1x);
-    ds_2[0][2] = ds_2[2][0];
-    ds_2[3][0].set(-a1y,  a1x,  0.0);
-    ds_2[0][3] = ds_2[3][0];
-    ds_2[1][1].set( a1x, -a1y, -a1z);
-    ds_2[2][1].set( a1y,  a1x,  0.0);
-    ds_2[1][2] = ds_2[2][1];
-    ds_2[3][1].set( a1z,  0.0,  a1x);
-    ds_2[1][3] = ds_2[3][1];
-    ds_2[2][2].set(-a1x,  a1y, -a1z);
-    ds_2[3][2].set( 0.0,  a1z,  a1y);
-    ds_2[2][3] = ds_2[3][2];
-    ds_2[3][3].set(-a1x, -a1y,  a1z);
-    calc_derivative_impl(ds_2, dl0_2_out, dq0_2_out, ds_2_out);
+    const cvm::rvector ds_2[4][4] = {
+      {{ a1x,  a1y,  a1z}, { 0.0, -a1z,  a1y}, { a1z,  0.0, -a1x}, {-a1y,  a1x,  0.0}},
+      {{ 0.0, -a1z,  a1y}, { a1x, -a1y, -a1z}, { a1y,  a1x,  0.0}, { a1z,  0.0,  a1x}},
+      {{ a1z,  0.0, -a1x}, { a1y,  a1x,  0.0}, {-a1x,  a1y, -a1z}, { 0.0,  a1z,  a1y}},
+      {{-a1y,  a1x,  0.0}, { a1z,  0.0,  a1x}, { 0.0,  a1z,  a1y}, {-a1x, -a1y,  a1z}}};
+    calc_derivative_impl<use_dl, use_dq, use_ds>(ds_2, dl0_2_out, dq0_2_out, ds_2_out);
   }
 };
 
@@ -585,10 +574,7 @@ void debug_gradients(
   cvm::real S_new_eigval[4];
   cvm::real S_new_eigvec[4][4];
   for (size_t ia = 0; ia < pos2.size(); ++ia) {
-    // cvm::real const &a1x = pos1[ia].x;
-    // cvm::real const &a1y = pos1[ia].y;
-    // cvm::real const &a1z = pos1[ia].z;
-    deriv.calc_derivative_wrt_group2(ia, &dl0_2, &dq0_2, &ds_2);
+    deriv.template calc_derivative_wrt_group2<true, true, true>(ia, &dl0_2, &dq0_2, &ds_2);
     // make an infitesimal move along each cartesian coordinate of
     // this atom, and solve again the eigenvector problem
     for (size_t comp = 0; comp < 3; comp++) {
diff --git a/lib/colvars/colvaratoms.cpp b/lib/colvars/colvaratoms.cpp
index e15b9301a1..054db505f3 100644
--- a/lib/colvars/colvaratoms.cpp
+++ b/lib/colvars/colvaratoms.cpp
@@ -673,7 +673,7 @@ int cvm::atom_group::add_atom_numbers(std::string const &numbers_conf)
 }
 
 
-int cvm::atom_group::add_index_group(std::string const &index_group_name)
+int cvm::atom_group::add_index_group(std::string const &index_group_name, bool silent)
 {
   std::vector<std::string> const &index_group_names =
     cvm::main()->index_group_names;
@@ -687,7 +687,10 @@ int cvm::atom_group::add_index_group(std::string const &index_group_name)
   }
 
   if (i_group >= index_group_names.size()) {
-    return cvm::error("Error: could not find index group "+
+    if (silent)
+      return COLVARS_INPUT_ERROR;
+    else
+      return cvm::error("Error: could not find index group "+
                       index_group_name+" among those already provided.\n",
                       COLVARS_INPUT_ERROR);
   }
@@ -1055,6 +1058,14 @@ void cvm::atom_group::calc_apply_roto_translation()
     }
   }
 
+  if (is_enabled(f_ag_fit_gradients) && !b_dummy) {
+    // Save the unrotated frame for fit gradients
+    pos_unrotated.resize(size());
+    for (size_t i = 0; i < size(); ++i) {
+      pos_unrotated[i] = atoms[i].pos;
+    }
+  }
+
   if (is_enabled(f_ag_rotate)) {
     // rotate the group (around the center of geometry if f_ag_center is
     // enabled, around the origin otherwise)
@@ -1217,23 +1228,30 @@ void cvm::atom_group::calc_fit_gradients()
   if (cvm::debug())
     cvm::log("Calculating fit gradients.\n");
 
+  cvm::atom_group *group_for_fit = fitting_group ? fitting_group : this;
+
+  auto accessor_main = [this](size_t i){return atoms[i].grad;};
+  auto accessor_fitting = [&group_for_fit](size_t j, const cvm::rvector& grad){group_for_fit->fit_gradients[j] = grad;};
   if (is_enabled(f_ag_center) && is_enabled(f_ag_rotate))
-    calc_fit_gradients_impl<true, true>();
+    calc_fit_forces_impl<true, true>(accessor_main, accessor_fitting);
   if (is_enabled(f_ag_center) && !is_enabled(f_ag_rotate))
-    calc_fit_gradients_impl<true, false>();
+    calc_fit_forces_impl<true, false>(accessor_main, accessor_fitting);
   if (!is_enabled(f_ag_center) && is_enabled(f_ag_rotate))
-    calc_fit_gradients_impl<false, true>();
+    calc_fit_forces_impl<false, true>(accessor_main, accessor_fitting);
   if (!is_enabled(f_ag_center) && !is_enabled(f_ag_rotate))
-    calc_fit_gradients_impl<false, false>();
+    calc_fit_forces_impl<false, false>(accessor_main, accessor_fitting);
 
   if (cvm::debug())
     cvm::log("Done calculating fit gradients.\n");
 }
 
 
-template <bool B_ag_center, bool B_ag_rotate>
-void cvm::atom_group::calc_fit_gradients_impl() {
-  cvm::atom_group *group_for_fit = fitting_group ? fitting_group : this;
+template <bool B_ag_center, bool B_ag_rotate,
+          typename main_force_accessor_T, typename fitting_force_accessor_T>
+void cvm::atom_group::calc_fit_forces_impl(
+  main_force_accessor_T accessor_main,
+  fitting_force_accessor_T accessor_fitting) const {
+  const cvm::atom_group *group_for_fit = fitting_group ? fitting_group : this;
   // the center of geometry contribution to the gradients
   cvm::rvector atom_grad;
   // the rotation matrix contribution to the gradients
@@ -1243,17 +1261,13 @@ void cvm::atom_group::calc_fit_gradients_impl() {
   cvm::vector1d<cvm::rvector> dq0_1(4);
   // loop 1: iterate over the current atom group
   for (size_t i = 0; i < size(); i++) {
-    cvm::atom_pos pos_orig;
     if (B_ag_center) {
-      atom_grad += atoms[i].grad;
-      if (B_ag_rotate) pos_orig = rot_inv * (atoms[i].pos - ref_pos_cog);
-    } else {
-      if (B_ag_rotate) pos_orig = atoms[i].pos;
+      atom_grad += accessor_main(i);
     }
     if (B_ag_rotate) {
       // calculate \partial(R(q) \vec{x}_i)/\partial q) \cdot \partial\xi/\partial\vec{x}_i
       cvm::quaternion const dxdq =
-        rot.q.position_derivative_inner(pos_orig, atoms[i].grad);
+        rot.q.position_derivative_inner(pos_unrotated[i], accessor_main(i));
       sum_dxdq[0] += dxdq[0];
       sum_dxdq[1] += dxdq[1];
       sum_dxdq[2] += dxdq[2];
@@ -1261,26 +1275,45 @@ void cvm::atom_group::calc_fit_gradients_impl() {
     }
   }
   if (B_ag_center) {
-    if (B_ag_rotate) atom_grad = rot.inverse().matrix() * atom_grad;
+    if (B_ag_rotate) atom_grad = rot_inv * atom_grad;
     atom_grad *= (-1.0)/(cvm::real(group_for_fit->size()));
   }
   // loop 2: iterate over the fitting group
   if (B_ag_rotate) rot_deriv->prepare_derivative(rotation_derivative_dldq::use_dq);
   for (size_t j = 0; j < group_for_fit->size(); j++) {
+    cvm::rvector fitting_force_grad{0, 0, 0};
     if (B_ag_center) {
-      group_for_fit->fit_gradients[j] = atom_grad;
+      fitting_force_grad += atom_grad;
     }
     if (B_ag_rotate) {
-      rot_deriv->calc_derivative_wrt_group1(j, nullptr, &dq0_1);
+      rot_deriv->calc_derivative_wrt_group1<false, true, false>(j, nullptr, &dq0_1);
       // multiply by {\partial q}/\partial\vec{x}_j and add it to the fit gradients
-      group_for_fit->fit_gradients[j] += sum_dxdq[0] * dq0_1[0] +
-                                          sum_dxdq[1] * dq0_1[1] +
-                                          sum_dxdq[2] * dq0_1[2] +
-                                          sum_dxdq[3] * dq0_1[3];
+      fitting_force_grad += sum_dxdq[0] * dq0_1[0] +
+                            sum_dxdq[1] * dq0_1[1] +
+                            sum_dxdq[2] * dq0_1[2] +
+                            sum_dxdq[3] * dq0_1[3];
     }
+    if (cvm::debug()) {
+      cvm::log(cvm::to_str(fitting_force_grad));
+    }
+    accessor_fitting(j, fitting_force_grad);
   }
 }
 
+template <typename main_force_accessor_T, typename fitting_force_accessor_T>
+void cvm::atom_group::calc_fit_forces(
+  main_force_accessor_T accessor_main,
+  fitting_force_accessor_T accessor_fitting) const {
+  if (is_enabled(f_ag_center) && is_enabled(f_ag_rotate))
+    calc_fit_forces_impl<true, true, main_force_accessor_T, fitting_force_accessor_T>(accessor_main, accessor_fitting);
+  if (is_enabled(f_ag_center) && !is_enabled(f_ag_rotate))
+    calc_fit_forces_impl<true, false, main_force_accessor_T, fitting_force_accessor_T>(accessor_main, accessor_fitting);
+  if (!is_enabled(f_ag_center) && is_enabled(f_ag_rotate))
+    calc_fit_forces_impl<false, true, main_force_accessor_T, fitting_force_accessor_T>(accessor_main, accessor_fitting);
+  if (!is_enabled(f_ag_center) && !is_enabled(f_ag_rotate))
+    calc_fit_forces_impl<false, false, main_force_accessor_T, fitting_force_accessor_T>(accessor_main, accessor_fitting);
+}
+
 
 std::vector<cvm::atom_pos> cvm::atom_group::positions() const
 {
@@ -1452,17 +1485,72 @@ void cvm::atom_group::apply_force(cvm::rvector const &force)
     return;
   }
 
-  if (is_enabled(f_ag_rotate)) {
+  auto ag_force = get_group_force_object();
+  for (size_t i = 0; i < size(); ++i) {
+    ag_force.add_atom_force(i, atoms[i].mass / total_mass * force);
+  }
+}
 
-    const auto rot_inv = rot.inverse().matrix();
-    for (cvm::atom_iter ai = this->begin(); ai != this->end(); ai++) {
-      ai->apply_force(rot_inv * ((ai->mass/total_mass) * force));
+cvm::atom_group::group_force_object cvm::atom_group::get_group_force_object() {
+  return cvm::atom_group::group_force_object(this);
+}
+
+cvm::atom_group::group_force_object::group_force_object(cvm::atom_group* ag):
+m_ag(ag), m_group_for_fit(m_ag->fitting_group ? m_ag->fitting_group : m_ag),
+m_has_fitting_force(m_ag->is_enabled(f_ag_center) || m_ag->is_enabled(f_ag_rotate)) {
+  if (m_has_fitting_force) {
+    if (m_ag->group_forces.size() != m_ag->size()) {
+      m_ag->group_forces.assign(m_ag->size(), 0);
+    } else {
+      std::fill(m_ag->group_forces.begin(),
+                m_ag->group_forces.end(), 0);
     }
+  }
+}
 
+cvm::atom_group::group_force_object::~group_force_object() {
+  if (m_has_fitting_force) {
+    apply_force_with_fitting_group();
+  }
+}
+
+void cvm::atom_group::group_force_object::add_atom_force(size_t i, const cvm::rvector& force) {
+  if (m_has_fitting_force) {
+    m_ag->group_forces[i] += force;
   } else {
+    // Apply the force directly if we don't use fitting
+    (*m_ag)[i].apply_force(force);
+  }
+}
 
-    for (cvm::atom_iter ai = this->begin(); ai != this->end(); ai++) {
-      ai->apply_force((ai->mass/total_mass) * force);
+void cvm::atom_group::group_force_object::apply_force_with_fitting_group() {
+  const cvm::rmatrix rot_inv = m_ag->rot.inverse().matrix();
+  if (cvm::debug()) {
+    cvm::log("Applying force on main group " + m_ag->name + ":\n");
+  }
+  for (size_t ia = 0; ia < m_ag->size(); ++ia) {
+    const cvm::rvector f_ia = rot_inv * m_ag->group_forces[ia];
+    (*m_ag)[ia].apply_force(f_ia);
+    if (cvm::debug()) {
+      cvm::log(cvm::to_str(f_ia));
+    }
+  }
+  // Gradients are only available with scalar components, so for a scalar component,
+  // if f_ag_fit_gradients is disabled, then the forces on the fitting group is not
+  // computed. For a vector component, we can only know the forces on the fitting
+  // group, but checking this flag can mimic results that the users expect (if
+  // "enableFitGradients no" then there is no force on the fitting group).
+  if (!m_ag->b_dummy && m_ag->is_enabled(f_ag_fit_gradients)) {
+    auto accessor_main = [this](size_t i){return m_ag->group_forces[i];};
+    auto accessor_fitting = [this](size_t j, const cvm::rvector& fitting_force){
+      (*(m_group_for_fit))[j].apply_force(fitting_force);
+    };
+    if (cvm::debug()) {
+      cvm::log("Applying force on the fitting group of main group" + m_ag->name + ":\n");
+    }
+    m_ag->calc_fit_forces(accessor_main, accessor_fitting);
+    if (cvm::debug()) {
+      cvm::log("Done applying force on the fitting group of main group" + m_ag->name + ":\n");
     }
   }
 }
diff --git a/lib/colvars/colvaratoms.h b/lib/colvars/colvaratoms.h
index d16ca7bd56..528e849df0 100644
--- a/lib/colvars/colvaratoms.h
+++ b/lib/colvars/colvaratoms.h
@@ -194,7 +194,7 @@ public:
 
   int add_atom_numbers(std::string const &numbers_conf);
   int add_atoms_of_group(atom_group const * ag);
-  int add_index_group(std::string const &index_group_name);
+  int add_index_group(std::string const &index_group_name, bool silent = false);
   int add_atom_numbers_range(std::string const &range_conf);
   int add_atom_name_residue_range(std::string const &psf_segid,
                                   std::string const &range_conf);
@@ -257,8 +257,63 @@ protected:
   /// \brief Index in the colvarproxy arrays (if the group is scalable)
   int index;
 
+  /// \brief The temporary forces acting on the main group atoms.
+  ///        Currently this is only used for calculating the fitting group forces for
+  ///        non-scalar components.
+  std::vector<cvm::rvector> group_forces;
+
 public:
 
+  /*! @class group_force_object
+   *  @brief A helper class for applying forces on an atom group in a way that
+   *         is aware of the fitting group. NOTE: you are encouraged to use
+   *         get_group_force_object() to get an instance of group_force_object
+   *         instead of constructing directly.
+   */
+  class group_force_object {
+  public:
+    /*! @brief Constructor of group_force_object
+     *  @param ag The pointer to the atom group that forces will be applied on.
+     */
+    group_force_object(cvm::atom_group* ag);
+    /*! @brief Destructor of group_force_object
+     */
+    ~group_force_object();
+    /*! @brief Apply force to atom i
+     *  @param i The i-th of atom in the atom group.
+     *  @param force The force being added to atom i.
+     *
+     * The function can be used as follows,
+     * @code
+     *       // In your colvar::cvc::apply_force() loop of a component:
+     *       auto ag_force = atoms->get_group_force_object();
+     *       for (ia = 0; ia < atoms->size(); ia++) {
+     *         const cvm::rvector f = compute_force_on_atom_ia();
+     *         ag_force.add_atom_force(ia, f);
+     *       }
+     * @endcode
+     * There are actually two scenarios under the hood:
+     * (i) If the atom group does not have a fitting group, then the force is
+     *     added to atom i directly;
+     * (ii) If the atom group has a fitting group, the force on atom i will just
+     *      be temporary stashed into ag->group_forces. At the end of the loop
+     *      of apply_force(), the destructor ~group_force_object() will be called,
+     *      which then call apply_force_with_fitting_group(). The forces on the
+     *      main group will be rotated back by multiplying ag->group_forces with
+     *      the inverse rotation. The forces on the fitting group (if
+     *      enableFitGradients is on) will be calculated by calling
+     *      calc_fit_forces.
+     */
+    void add_atom_force(size_t i, const cvm::rvector& force);
+  private:
+    cvm::atom_group* m_ag;
+    cvm::atom_group* m_group_for_fit;
+    bool m_has_fitting_force;
+    void apply_force_with_fitting_group();
+  };
+
+  group_force_object get_group_force_object();
+
   inline cvm::atom & operator [] (size_t const i)
   {
     return atoms[i];
@@ -423,6 +478,9 @@ private:
   /// \brief Center of geometry before any fitting
   cvm::atom_pos cog_orig;
 
+  /// \brief Unrotated atom positions for fit gradients
+  std::vector<cvm::atom_pos> pos_unrotated;
+
 public:
 
   /// \brief Return the center of geometry of the atomic positions
@@ -497,15 +555,60 @@ public:
   /// \brief Calculate the derivatives of the fitting transformation
   void calc_fit_gradients();
 
-/*! @brief  Actual implementation of `calc_fit_gradients`. The template is
+/*! @brief  Actual implementation of `calc_fit_gradients` and
+ *          `calc_fit_forces`. The template is
  *          used to avoid branching inside the loops in case that the CPU
  *          branch prediction is broken (or further migration to GPU code).
  *  @tparam B_ag_center Centered the reference to origin? This should follow
  *          the value of `is_enabled(f_ag_center)`.
  *  @tparam B_ag_rotate Calculate the optimal rotation? This should follow
  *          the value of `is_enabled(f_ag_rotate)`.
+ *  @tparam main_force_accessor_T The type of accessor of the main
+ *          group forces or gradients acting on the rotated frame.
+ *  @tparam fitting_force_accessor_T The type of accessor of the fitting group
+ *          forces or gradients.
+ *  @param accessor_main The accessor of the main group forces or gradients.
+ *         accessor_main(i) should return the i-th force or gradient of the
+ *         rotated main group.
+ *  @param accessor_fitting The accessor of the fitting group forces or gradients.
+ *         accessor_fitting(j, v) should store/apply the j-th atom gradient or
+ *         force in the fitting group.
+ *
+ *  This function is used to (i) project the gradients of CV with respect to
+ *  rotated main group atoms to fitting group atoms, or (ii) project the forces
+ *  on rotated main group atoms to fitting group atoms, by the following two steps
+ *  (using the goal (ii) for example):
+ *  (1) Loop over the positions of main group atoms and call cvm::quaternion::position_derivative_inner
+ *      to project the forces on rotated main group atoms to the forces on quaternion.
+ *  (2) Loop over the positions of fitting group atoms, compute the gradients of
+ *      \f$\mathbf{q}\f$ with respect to the position of each atom, and then multiply
+ *      that with the force on \f$\mathbf{q}\f$ (chain rule).
  */
-  template <bool B_ag_center, bool B_ag_rotate> void calc_fit_gradients_impl();
+  template <bool B_ag_center, bool B_ag_rotate,
+            typename main_force_accessor_T, typename fitting_force_accessor_T>
+  void calc_fit_forces_impl(
+    main_force_accessor_T accessor_main,
+    fitting_force_accessor_T accessor_fitting) const;
+
+/*! @brief  Calculate or apply the fitting group forces from the main group forces.
+ *  @tparam main_force_accessor_T The type of accessor of the main
+ *          group forces or gradients.
+ *  @tparam fitting_force_accessor_T The type of accessor of the fitting group
+ *          forces or gradients.
+ *  @param accessor_main The accessor of the main group forces or gradients.
+ *         accessor_main(i) should return the i-th force or gradient of the
+ *         main group.
+ *  @param accessor_fitting The accessor of the fitting group forces or gradients.
+ *         accessor_fitting(j, v) should store/apply the j-th atom gradient or
+ *         force in the fitting group.
+ *
+ *  This function just dispatches the parameters to calc_fit_forces_impl that really
+ *  performs the calculations.
+ */
+  template <typename main_force_accessor_T, typename fitting_force_accessor_T>
+  void calc_fit_forces(
+    main_force_accessor_T accessor_main,
+    fitting_force_accessor_T accessor_fitting) const;
 
   /// \brief Derivatives of the fitting transformation
   std::vector<cvm::atom_pos> fit_gradients;
diff --git a/lib/colvars/colvarbias.cpp b/lib/colvars/colvarbias.cpp
index fdffdc1794..43b54d672f 100644
--- a/lib/colvars/colvarbias.cpp
+++ b/lib/colvars/colvarbias.cpp
@@ -93,6 +93,8 @@ int colvarbias::init(std::string const &conf)
     cvm::log("Reinitializing bias \""+name+"\".\n");
   }
 
+  feature_states[f_cvb_step_zero_data].available = true;
+
   colvar_values.resize(num_variables());
   for (i = 0; i < num_variables(); i++) {
     colvar_values[i].type(colvars[i]->value().type());
@@ -157,7 +159,7 @@ int colvarbias::init_dependencies() {
     init_feature(f_cvb_step_zero_data, "step_zero_data", f_type_user);
 
     init_feature(f_cvb_apply_force, "apply_force", f_type_user);
-    require_feature_children(f_cvb_apply_force, f_cv_gradient);
+    require_feature_children(f_cvb_apply_force, f_cv_apply_force);
 
     init_feature(f_cvb_bypass_ext_lagrangian, "bypass_extended_Lagrangian_coordinates", f_type_user);
 
@@ -199,6 +201,8 @@ int colvarbias::init_dependencies() {
 
     init_feature(f_cvb_extended, "Bias on extended-Lagrangian variables", f_type_static);
 
+    init_feature(f_cvb_smp, "smp_computation", f_type_user);
+
     // check that everything is initialized
     for (i = 0; i < colvardeps::f_cvb_ntot; i++) {
       if (is_not_set(i)) {
@@ -221,8 +225,9 @@ int colvarbias::init_dependencies() {
   // The feature f_cvb_bypass_ext_lagrangian is only implemented by some derived classes
   // (initially, harmonicWalls)
   feature_states[f_cvb_bypass_ext_lagrangian].available = false;
-  // disabled by default; can be changed by derived classes that implement it
-  feature_states[f_cvb_bypass_ext_lagrangian].enabled = false;
+
+  // Most biases cannot currently be processed in parallel over threads
+  feature_states[f_cvb_smp].available = false;
 
   return COLVARS_OK;
 }
@@ -704,7 +709,7 @@ int colvarbias::read_state_string(char const *buffer)
 
 
 std::ostream &colvarbias::write_state_data_key(std::ostream &os, std::string const &key,
-                                               bool header)
+                                               bool header) const
 {
   os << (header ? "\n" : "") << key << (header ? "\n" : " ");
   return os;
@@ -712,7 +717,7 @@ std::ostream &colvarbias::write_state_data_key(std::ostream &os, std::string con
 
 
 cvm::memory_stream &colvarbias::write_state_data_key(cvm::memory_stream &os, std::string const &key,
-                                                     bool /* header */)
+                                                     bool /* header */) const
 {
   os << std::string(key);
   return os;
@@ -792,6 +797,8 @@ int colvarbias_ti::init(std::string const &conf)
 {
   int error_code = COLVARS_OK;
 
+  key_lookup(conf, "grid", &grid_conf);
+
   get_keyval_feature(this, conf, "writeTISamples",
                      f_cvb_write_ti_samples,
                      is_enabled(f_cvb_write_ti_samples));
@@ -800,18 +807,16 @@ int colvarbias_ti::init(std::string const &conf)
                      f_cvb_write_ti_pmf,
                      is_enabled(f_cvb_write_ti_pmf));
 
+  if (is_enabled(f_cvb_write_ti_pmf)) {
+    enable(f_cvb_write_ti_samples);
+  }
+
   if ((num_variables() > 1) && is_enabled(f_cvb_write_ti_pmf)) {
     return cvm::error("Error: only 1-dimensional PMFs can be written "
                       "on the fly.\n"
                       "Consider using writeTISamples instead and "
                       "post-processing the sampled free-energy gradients.\n",
                       COLVARS_NOT_IMPLEMENTED);
-  } else {
-    error_code |= init_grids();
-  }
-
-  if (is_enabled(f_cvb_write_ti_pmf)) {
-    enable(f_cvb_write_ti_samples);
   }
 
   if (is_enabled(f_cvb_calc_ti_samples)) {
@@ -831,6 +836,8 @@ int colvarbias_ti::init(std::string const &conf)
     }
   }
 
+  error_code |= colvarbias_ti::init_grids();
+
   if (is_enabled(f_cvb_write_ti_pmf) || is_enabled(f_cvb_write_ti_samples)) {
     cvm::main()->cite_feature("Internal-forces free energy estimator");
   }
@@ -844,16 +851,15 @@ int colvarbias_ti::init_grids()
   if (is_enabled(f_cvb_calc_ti_samples)) {
     if (!ti_avg_forces) {
       ti_bin.resize(num_variables());
+      ti_bin.assign(ti_bin.size(), -1);
       ti_system_forces.resize(num_variables());
       for (size_t icv = 0; icv < num_variables(); icv++) {
         ti_system_forces[icv].type(variables(icv)->value());
         ti_system_forces[icv].is_derivative();
         ti_system_forces[icv].reset();
       }
-      ti_avg_forces.reset(new colvar_grid_gradient(colvars));
-      ti_count.reset(new colvar_grid_count(colvars));
-      ti_avg_forces->samples = ti_count;
-      ti_count->has_parent_data = true;
+      ti_count.reset(new colvar_grid_count(colvars, grid_conf));
+      ti_avg_forces.reset(new colvar_grid_gradient(colvars, ti_count));
     }
   }
 
@@ -884,8 +890,12 @@ int colvarbias_ti::update_system_forces(std::vector<colvarvalue> const
 
   size_t i;
 
-  if (proxy->total_forces_same_step()) {
-    for (i = 0; i < num_variables(); i++) {
+  if (cvm::debug()) {
+    cvm::log("TI bin for bias \"" + name + "\" = " + cvm::to_str(ti_bin) + ".\n");
+  }
+
+  for (i = 0; i < num_variables(); i++) {
+    if (variables(i)->is_enabled(f_cv_total_force_current_step)) {
       ti_bin[i] = ti_avg_forces->current_bin_scalar(i);
     }
   }
@@ -894,8 +904,10 @@ int colvarbias_ti::update_system_forces(std::vector<colvarvalue> const
   if ((cvm::step_relative() > 0) || proxy->total_forces_same_step()) {
     if (ti_avg_forces->index_ok(ti_bin)) {
       for (i = 0; i < num_variables(); i++) {
-        if (variables(i)->is_enabled(f_cv_subtract_applied_force)) {
+        if (variables(i)->is_enabled(f_cv_subtract_applied_force) ||
+          (cvm::proxy->total_forces_same_step() && !variables(i)->is_enabled(f_cv_external))) {
           // this colvar is already subtracting all applied forces
+          // or the "total force" is really a system force at current step
           ti_system_forces[i] = variables(i)->total_force();
         } else {
           ti_system_forces[i] = variables(i)->total_force() -
@@ -904,14 +916,17 @@ int colvarbias_ti::update_system_forces(std::vector<colvarvalue> const
         }
       }
       if (cvm::step_relative() > 0 || is_enabled(f_cvb_step_zero_data)) {
+        if (cvm::debug()) {
+          cvm::log("Accumulating TI forces for bias \"" + name + "\".\n");
+        }
         ti_avg_forces->acc_value(ti_bin, ti_system_forces);
       }
     }
   }
 
-  if (!proxy->total_forces_same_step()) {
-    // Set the index for use in the next iteration, when total forces come in
-    for (i = 0; i < num_variables(); i++) {
+  for (i = 0; i < num_variables(); i++) {
+    if (!variables(i)->is_enabled(f_cv_total_force_current_step)) {
+      // Set the index for use in the next iteration, when total forces come in
       ti_bin[i] = ti_avg_forces->current_bin_scalar(i);
     }
   }
diff --git a/lib/colvars/colvarbias.h b/lib/colvars/colvarbias.h
index 03f93f4315..35438752a1 100644
--- a/lib/colvars/colvarbias.h
+++ b/lib/colvars/colvarbias.h
@@ -174,14 +174,14 @@ public:
   /// \param[in,out] os Output stream
   /// \param[in] key  Keyword labeling the header block
   /// \param[in] header  Whether this is the header of a multi-line segment vs a single line
-  std::ostream &write_state_data_key(std::ostream &os, std::string const &key, bool header = true);
+  std::ostream &write_state_data_key(std::ostream &os, std::string const &key, bool header = true) const;
 
   /// Write a keyword header for a data sequence to an unformatted stream
   /// \param[in,out] os Output stream
   /// \param[in] key  Keyword labeling the header block
   /// \param[in] header  Ignored
   cvm::memory_stream &write_state_data_key(cvm::memory_stream &os, std::string const &key,
-                                           bool header = true);
+                                           bool header = true) const;
 
 private:
 
@@ -358,6 +358,9 @@ protected:
   /// \brief Forces exerted from the system to the associated variables
   std::vector<colvarvalue> ti_system_forces;
 
+  /// Grid configuration parameters (also used by grids in derived classes)
+  std::string grid_conf;
+
   /// Averaged system forces
   std::shared_ptr<colvar_grid_gradient> ti_avg_forces;
 
diff --git a/lib/colvars/colvarbias_abf.cpp b/lib/colvars/colvarbias_abf.cpp
index 6327650863..b01e9de853 100644
--- a/lib/colvars/colvarbias_abf.cpp
+++ b/lib/colvars/colvarbias_abf.cpp
@@ -87,24 +87,25 @@ int colvarbias_abf::init(std::string const &conf)
   get_keyval(conf, "shared", shared_on, false);
   if (shared_on) {
     cvm::main()->cite_feature("Multiple-walker ABF implementation");
-    if ((proxy->replica_enabled() != COLVARS_OK) ||
-        (proxy->num_replicas() <= 1)) {
-      return cvm::error("Error: shared ABF requires more than one replica.",
-                        COLVARS_INPUT_ERROR);
-    }
-    cvm::log("shared ABF will be applied among "+
-             cvm::to_str(proxy->num_replicas()) + " replicas.\n");
+    cvm::main()->cite_feature("Updated multiple-walker ABF implementation");
+
+
+    // Cannot check this here because the replica communicator is obtained later
+    // in Gromacs
+
+    // if ((proxy->check_replicas_enabled() != COLVARS_OK) ||
+    //     (proxy->num_replicas() <= 1)) {
+    //   return cvm::error("Error: shared ABF requires more than one replica.",
+    //                     COLVARS_INPUT_ERROR);
+    // }
+    // cvm::log("shared ABF will be applied among "+
+    //          cvm::to_str(proxy->num_replicas()) + " replicas.\n");
 
     // If shared_freq is not set, we default to output_freq
     get_keyval(conf, "sharedFreq", shared_freq, output_freq);
     if ( shared_freq && output_freq % shared_freq ) {
       return cvm::error("Error: outputFreq must be a multiple of sharedFreq.\n");
     }
-
-    // Allocate these at init time if possible
-    local_samples.reset(new colvar_grid_count(colvars));
-    local_gradients.reset(new colvar_grid_gradient(colvars, local_samples));
-    local_pmf.reset(new integrate_potential(colvars, local_gradients));
   }
 
   // ************* checking the associated colvars *******************
@@ -124,10 +125,17 @@ int colvarbias_abf::init(std::string const &conf)
       colvars[i]->enable(f_cv_hide_Jacobian);
     }
 
-    // If any colvar is extended-system, we need to collect the extended
-    // system gradient
-    if (colvars[i]->is_enabled(f_cv_extended_Lagrangian))
+    // If any colvar is extended-system (restrained, not driven external param), we are running eABF
+    if (colvars[i]->is_enabled(f_cv_extended_Lagrangian)
+        && !colvars[i]->is_enabled(f_cv_external)) {
       enable(f_cvb_extended);
+    }
+
+    if (!colvars[i]->is_enabled(f_cv_total_force_current_step)) {
+      // If any colvar does not have current-step total force, then
+      // we can't do step 0 data
+      provide(f_cvb_step_zero_data, false);
+    }
 
     // Cannot mix and match coarse time steps with ABF because it gives
     // wrong total force averages - total force needs to be averaged over
@@ -181,12 +189,23 @@ int colvarbias_abf::init(std::string const &conf)
     cvm::log("Allocating count and free energy gradient grids.\n");
   }
 
-  samples.reset(new colvar_grid_count(colvars));
-  gradients.reset(new colvar_grid_gradient(colvars, samples));
+  {
+    /// Optional custom configuration string for grid parameters
+    std::string grid_conf;
+    key_lookup(conf, "grid", &grid_conf);
+
+    samples.reset(new colvar_grid_count(colvars, grid_conf));
+  }
+  gradients.reset(new colvar_grid_gradient(colvars, samples)); // Also use samples as template for sizes
 
   gradients->full_samples = full_samples;
   gradients->min_samples = min_samples;
 
+  if (shared_on) {
+    local_samples.reset(new colvar_grid_count(colvars, samples));
+    local_gradients.reset(new colvar_grid_gradient(colvars, local_samples));
+  }
+
   // Data for eABF z-based estimator
   if (is_enabled(f_cvb_extended)) {
     get_keyval(conf, "CZARestimator", b_CZAR_estimator, true);
@@ -198,11 +217,11 @@ int colvarbias_abf::init(std::string const &conf)
                colvarparse::parse_silent);
 
     z_bin.assign(num_variables(), 0);
-    z_samples.reset(new colvar_grid_count(colvars));
+    z_samples.reset(new colvar_grid_count(colvars, samples));
     z_samples->request_actual_value();
     z_gradients.reset(new colvar_grid_gradient(colvars, z_samples));
     z_gradients->request_actual_value();
-    czar_gradients.reset(new colvar_grid_gradient(colvars));
+    czar_gradients.reset(new colvar_grid_gradient(colvars, nullptr, samples));
   }
 
   get_keyval(conf, "integrate", b_integrate, num_variables() <= 3); // Integrate for output if d<=3
@@ -216,6 +235,9 @@ int colvarbias_abf::init(std::string const &conf)
     if (b_CZAR_estimator) {
       czar_pmf.reset(new integrate_potential(colvars, czar_gradients));
     }
+    if (shared_on) {
+      local_pmf.reset(new integrate_potential(colvars, local_gradients));
+    }
     // Parameters for integrating initial (and final) gradient data
     get_keyval(conf, "integrateMaxIterations", integrate_iterations, 10000, colvarparse::parse_silent);
     get_keyval(conf, "integrateTol", integrate_tol, 1e-6, colvarparse::parse_silent);
@@ -228,9 +250,9 @@ int colvarbias_abf::init(std::string const &conf)
   if (b_CZAR_estimator && shared_on && cvm::main()->proxy->replica_index() == 0) {
     // The pointers below are used for outputting CZAR data
     // Allocate grids for collected global data, on replica 0 only
-    global_z_samples.reset(new colvar_grid_count(colvars));
+    global_z_samples.reset(new colvar_grid_count(colvars, samples));
     global_z_gradients.reset(new colvar_grid_gradient(colvars, global_z_samples));
-    global_czar_gradients.reset(new colvar_grid_gradient(colvars));
+    global_czar_gradients.reset(new colvar_grid_gradient(colvars, nullptr, samples));
     global_czar_pmf.reset(new integrate_potential(colvars, global_czar_gradients));
   } else {
     // otherwise they are just aliases for the local CZAR grids
@@ -244,10 +266,10 @@ int colvarbias_abf::init(std::string const &conf)
   // This used to be only if "shared" was defined,
   // but now we allow calling share externally (e.g. from Tcl).
   if (b_CZAR_estimator) {
-    z_samples_in.reset(new colvar_grid_count(colvars));
+    z_samples_in.reset(new colvar_grid_count(colvars, samples));
     z_gradients_in.reset(new colvar_grid_gradient(colvars, z_samples_in));
   }
-  last_samples.reset(new colvar_grid_count(colvars));
+  last_samples.reset(new colvar_grid_count(colvars, samples));
   last_gradients.reset(new colvar_grid_gradient(colvars, last_samples));
   // Any data collected after now is new for shared ABF purposes
   shared_last_step = cvm::step_absolute();
@@ -315,27 +337,36 @@ int colvarbias_abf::update()
   size_t i;
   for (i = 0; i < num_variables(); i++) {
     bin[i] = samples->current_bin_scalar(i);
+    if (colvars[i]->is_enabled(f_cv_total_force_current_step)) {
+      force_bin[i] = bin[i];
+    }
   }
 
-
   // ***********************************************************
   // ******  ABF Part I: update the FE gradient estimate  ******
   // ***********************************************************
 
 
-  if (cvm::proxy->total_forces_same_step()) {
-    // e.g. in LAMMPS, total forces are current
-    force_bin = bin;
+  // Share data first, so that 2d/3d PMF is refreshed using new data for mw-pABF.
+  // shared_on can be true with shared_freq 0 if we are sharing via script
+  if (shared_on && shared_freq &&
+      shared_last_step >= 0 &&                    // we have already collected some data
+      cvm::step_absolute() > shared_last_step &&  // time has passed since the last sharing timestep
+                                                  // (avoid re-sharing at last and first ts of successive run statements)
+      cvm::step_absolute() % shared_freq == 0) {
+    // Share gradients and samples for shared ABF.
+    replica_share();
   }
 
   if (can_accumulate_data() && is_enabled(f_cvb_history_dependent)) {
 
     if (cvm::step_relative() > 0 || cvm::proxy->total_forces_same_step()) {
+      // Note: this will skip step 0 data when available in some cases (extended system),
+      // but not doing so would make the code more complex
       if (samples->index_ok(force_bin)) {
         // Only if requested and within bounds of the grid...
 
-        // get total forces (lagging by 1 timestep) from colvars
-        // and subtract previous ABF force if necessary
+        // get total force and subtract previous ABF force if necessary
         update_system_force();
 
         gradients->acc_force(force_bin, system_force);
@@ -368,21 +399,11 @@ int colvarbias_abf::update()
     }
   }
 
-  if (!(cvm::proxy->total_forces_same_step())) {
-    // e.g. in NAMD, total forces will be available for next timestep
-    // hence we store the current colvar bin
-    force_bin = bin;
-  }
+  // In some cases, total forces are stored for next timestep
+  // hence we store the current colvar bin - this is overwritten on a per-colvar basis
+  // at the top of update()
+  force_bin = bin;
 
-  // Share data after force sample is collected for this time step
-  // shared_on can be true with shared_freq 0 if we are sharing via script
-  if (shared_on && shared_freq &&
-      cvm::step_absolute() > shared_last_step &&  // time has passed since the last sharing timestep
-                                                  // (avoid re-sharing at last and first ts of successive run statements)
-      cvm::step_absolute() % shared_freq == 0) {
-    // Share gradients and samples for shared ABF.
-    replica_share();
-  }
 
   // ******************************************************************
   // ******  ABF Part II: calculate and apply the biasing force  ******
@@ -452,10 +473,13 @@ int colvarbias_abf::update_system_force()
   // System force from atomic forces (or extended Lagrangian if applicable)
 
   for (i = 0; i < num_variables(); i++) {
-    if (colvars[i]->is_enabled(f_cv_subtract_applied_force)) {
+    if (colvars[i]->is_enabled(f_cv_subtract_applied_force)
+      || colvars[i]->is_enabled(f_cv_total_force_current_step)) {
       // this colvar is already subtracting the ABF force
+      // or the "total force" is from current step and cannot possibly contain Colvars biases
       system_force[i] = colvars[i]->total_force().real_value;
     } else {
+      // Subtract previous step's bias force from previous step's total force
       system_force[i] = colvars[i]->total_force().real_value
         - colvar_forces[i].real_value;
     }
@@ -525,7 +549,7 @@ int colvarbias_abf::replica_share() {
 
   colvarproxy *proxy = cvm::main()->proxy;
 
-  if (proxy->replica_enabled() != COLVARS_OK) {
+  if (proxy->check_replicas_enabled() != COLVARS_OK) {
     cvm::error("Error: shared ABF: No replicas.\n");
     return COLVARS_ERROR;
   }
@@ -542,7 +566,7 @@ int colvarbias_abf::replica_share() {
   if (!local_samples) {
     // We arrive here if sharing has just been enabled by a script
     // in which case local arrays have not been initialized yet
-    local_samples.reset(new colvar_grid_count(colvars));
+    local_samples.reset(new colvar_grid_count(colvars, samples));
     local_gradients.reset(new colvar_grid_gradient(colvars, local_samples));
     local_pmf.reset(new integrate_potential(colvars, local_gradients));
   }
@@ -662,9 +686,9 @@ int colvarbias_abf::replica_share_CZAR() {
       // We arrive here if sharing has just been enabled by a script
       // Allocate grids for collective data, on replica 0 only
       // overriding CZAR grids that are equal to local ones by default
-      global_z_samples.reset(new colvar_grid_count(colvars));
+      global_z_samples.reset(new colvar_grid_count(colvars, samples));
       global_z_gradients.reset(new colvar_grid_gradient(colvars, global_z_samples));
-      global_czar_gradients.reset(new colvar_grid_gradient(colvars));
+      global_czar_gradients.reset(new colvar_grid_gradient(colvars, nullptr, samples));
       global_czar_pmf.reset(new integrate_potential(colvars, global_czar_gradients));
     }
 
diff --git a/lib/colvars/colvarbias_histogram.cpp b/lib/colvars/colvarbias_histogram.cpp
index 98de275304..aab2c8f593 100644
--- a/lib/colvars/colvarbias_histogram.cpp
+++ b/lib/colvars/colvarbias_histogram.cpp
@@ -98,10 +98,10 @@ int colvarbias_histogram::init(std::string const &conf)
   }
 
   {
-    std::string grid_conf;
-    if (key_lookup(conf, "histogramGrid", &grid_conf)) {
+    if (key_lookup(conf, "histogramGrid", &grid_conf) ||
+        key_lookup(conf, "grid", &grid_conf)) {
       grid->parse_params(grid_conf);
-      grid->check_keywords(grid_conf, "histogramGrid");
+      grid->check_keywords(grid_conf, "grid");
     }
   }
 
diff --git a/lib/colvars/colvarbias_histogram.h b/lib/colvars/colvarbias_histogram.h
index 2c6ee84d1f..ed3ee346a6 100644
--- a/lib/colvars/colvarbias_histogram.h
+++ b/lib/colvars/colvarbias_histogram.h
@@ -38,6 +38,7 @@ protected:
 
   /// n-dim histogram
   colvar_grid_scalar *grid;
+  std::string grid_conf;
   std::vector<int> bin;
   std::string out_name, out_name_dx;
 
diff --git a/lib/colvars/colvarbias_histogram_reweight_amd.cpp b/lib/colvars/colvarbias_histogram_reweight_amd.cpp
index de2f6d9b8a..3c31e175f2 100644
--- a/lib/colvars/colvarbias_histogram_reweight_amd.cpp
+++ b/lib/colvars/colvarbias_histogram_reweight_amd.cpp
@@ -11,43 +11,9 @@
 #include "colvarproxy.h"
 #include "colvars_memstream.h"
 
-colvarbias_reweightaMD::colvarbias_reweightaMD(char const *key)
-  : colvarbias_histogram(key), grid_count(NULL), grid_dV(NULL),
-    grid_dV_square(NULL), pmf_grid_exp_avg(NULL), pmf_grid_cumulant(NULL),
-    grad_grid_exp_avg(NULL), grad_grid_cumulant(NULL)
-{
-}
+colvarbias_reweightaMD::colvarbias_reweightaMD(char const *key) : colvarbias_histogram(key) {}
 
-colvarbias_reweightaMD::~colvarbias_reweightaMD() {
-  if (grid_dV) {
-    delete grid_dV;
-    grid_dV = NULL;
-  }
-  if (grid_dV_square) {
-    delete grid_dV_square;
-    grid_dV_square = NULL;
-  }
-  if (grid_count) {
-    delete grid_count;
-    grid_count = NULL;
-  }
-  if (pmf_grid_exp_avg) {
-    delete pmf_grid_exp_avg;
-    pmf_grid_exp_avg = NULL;
-  }
-  if (pmf_grid_cumulant) {
-    delete pmf_grid_cumulant;
-    pmf_grid_cumulant = NULL;
-  }
-  if (grad_grid_exp_avg) {
-    delete grad_grid_exp_avg;
-    grad_grid_exp_avg = NULL;
-  }
-  if (grad_grid_cumulant) {
-    delete grad_grid_cumulant;
-    grad_grid_cumulant = NULL;
-  }
-}
+colvarbias_reweightaMD::~colvarbias_reweightaMD() {}
 
 int colvarbias_reweightaMD::init(std::string const &conf) {
   if (cvm::proxy->accelMD_enabled() == false) {
@@ -60,21 +26,21 @@ int colvarbias_reweightaMD::init(std::string const &conf) {
   get_keyval(conf, "WritePMFGradients", b_write_gradients, true);
   get_keyval(conf, "historyFreq", history_freq, 0);
   b_history_files = (history_freq > 0);
-  grid_count = new colvar_grid_scalar(colvars);
+  grid_count.reset(new colvar_grid_scalar(colvars, nullptr, false, grid_conf));
   grid_count->request_actual_value();
   grid->request_actual_value();
-  pmf_grid_exp_avg = new colvar_grid_scalar(colvars);
+  pmf_grid_exp_avg.reset(new colvar_grid_scalar(colvars, grid_count));
   if (b_write_gradients) {
-    grad_grid_exp_avg = new colvar_grid_gradient(colvars);
+    grad_grid_exp_avg.reset(new colvar_grid_gradient(colvars, nullptr, grid_count));
   }
   if (b_use_cumulant_expansion) {
-    grid_dV = new colvar_grid_scalar(colvars);
-    grid_dV_square = new colvar_grid_scalar(colvars);
-    pmf_grid_cumulant = new colvar_grid_scalar(colvars);
+    grid_dV.reset(new colvar_grid_scalar(colvars, grid_count));
+    grid_dV_square.reset(new colvar_grid_scalar(colvars, grid_count));
+    pmf_grid_cumulant.reset(new colvar_grid_scalar(colvars, grid_count));
     grid_dV->request_actual_value();
     grid_dV_square->request_actual_value();
     if (b_write_gradients) {
-      grad_grid_cumulant = new colvar_grid_gradient(colvars);
+      grad_grid_cumulant.reset(new colvar_grid_gradient(colvars, nullptr, grid_count));
     }
   }
   previous_bin.assign(num_variables(), -1);
@@ -193,7 +159,7 @@ int colvarbias_reweightaMD::write_exponential_reweighted_pmf(
       pmf_grid_exp_avg->set_value(i, tmp / count);
     }
   }
-  hist_to_pmf(pmf_grid_exp_avg, grid_count);
+  hist_to_pmf(pmf_grid_exp_avg.get(), grid_count.get());
   pmf_grid_exp_avg->write_multicol(pmf_grid_os);
   if (!keep_open) {
     cvm::proxy->close_output_stream(output_pmf);
@@ -231,9 +197,9 @@ int colvarbias_reweightaMD::write_cumulant_expansion_pmf(
   if (!pmf_grid_cumulant_os) {
     return COLVARS_FILE_ERROR;
   }
-  compute_cumulant_expansion_factor(grid_dV, grid_dV_square,
-                                    grid_count, pmf_grid_cumulant);
-  hist_to_pmf(pmf_grid_cumulant, grid_count);
+  compute_cumulant_expansion_factor(grid_dV.get(), grid_dV_square.get(),
+                                    grid_count.get(), pmf_grid_cumulant.get());
+  hist_to_pmf(pmf_grid_cumulant.get(), grid_count.get());
   pmf_grid_cumulant->write_multicol(pmf_grid_cumulant_os);
   if (!keep_open) {
     cvm::proxy->close_output_stream(output_pmf);
diff --git a/lib/colvars/colvarbias_histogram_reweight_amd.h b/lib/colvars/colvarbias_histogram_reweight_amd.h
index 43759b3bde..3e3bdede26 100644
--- a/lib/colvars/colvarbias_histogram_reweight_amd.h
+++ b/lib/colvars/colvarbias_histogram_reweight_amd.h
@@ -68,9 +68,9 @@ protected:
 
   /// Use cumulant expansion to second order?
   bool b_use_cumulant_expansion;
-  colvar_grid_scalar* grid_count;
-  colvar_grid_scalar* grid_dV;
-  colvar_grid_scalar* grid_dV_square;
+  std::shared_ptr<colvar_grid_scalar> grid_count;
+  std::unique_ptr<colvar_grid_scalar> grid_dV;
+  std::unique_ptr<colvar_grid_scalar> grid_dV_square;
 
   /// Number of timesteps between recording data in history files (if non-zero)
   size_t history_freq;
@@ -90,10 +90,10 @@ protected:
 
 private:
   /// temporary grids for evaluating PMFs
-  colvar_grid_scalar  *pmf_grid_exp_avg;
-  colvar_grid_scalar  *pmf_grid_cumulant;
-  colvar_grid_gradient *grad_grid_exp_avg;
-  colvar_grid_gradient *grad_grid_cumulant;
+  std::unique_ptr<colvar_grid_scalar> pmf_grid_exp_avg;
+  std::unique_ptr<colvar_grid_scalar> pmf_grid_cumulant;
+  std::unique_ptr<colvar_grid_gradient> grad_grid_exp_avg;
+  std::unique_ptr<colvar_grid_gradient> grad_grid_cumulant;
 };
 
 #endif // COLVARBIAS_HISTOGRAM_REWEIGHT_AMD
diff --git a/lib/colvars/colvarbias_meta.cpp b/lib/colvars/colvarbias_meta.cpp
index 905cd17883..1131c88ec5 100644
--- a/lib/colvars/colvarbias_meta.cpp
+++ b/lib/colvars/colvarbias_meta.cpp
@@ -11,27 +11,10 @@
 #include <iomanip>
 #include <algorithm>
 
-// Define function to get the absolute path of a replica file
-#if defined(_WIN32) && !defined(__CYGWIN__)
-#include <direct.h>
-#define GETCWD(BUF, SIZE) ::_getcwd(BUF, SIZE)
-#define PATHSEP "\\"
-#else
-#include <unistd.h>
-#define GETCWD(BUF, SIZE) ::getcwd(BUF, SIZE)
-#define PATHSEP "/"
-#endif
-
-#ifdef __cpp_lib_filesystem
-// When std::filesystem is available, use it
-#include <filesystem>
-#undef GETCWD
-#define GETCWD(BUF, SIZE) (std::filesystem::current_path().string().c_str())
-#endif
-
 #include "colvarmodule.h"
 #include "colvarproxy.h"
 #include "colvar.h"
+#include "colvargrid.h"
 #include "colvarbias_meta.h"
 #include "colvars_memstream.h"
 
@@ -49,8 +32,6 @@ colvarbias_meta::colvarbias_meta(char const *key)
   use_grids = true;
   grids_freq = 0;
   rebin_grids = false;
-  hills_energy = NULL;
-  hills_energy_gradients = NULL;
 
   dump_fes = true;
   keep_hills = false;
@@ -161,9 +142,9 @@ int colvarbias_meta::init(std::string const &conf)
     get_keyval(conf, "keepHills", keep_hills, keep_hills);
     get_keyval(conf, "keepFreeEnergyFiles", dump_fes_save, dump_fes_save);
 
-    if (hills_energy == NULL) {
-      hills_energy           = new colvar_grid_scalar(colvars);
-      hills_energy_gradients = new colvar_grid_gradient(colvars);
+    if (!hills_energy) {
+      hills_energy.reset(new colvar_grid_scalar(colvars, nullptr, false, grid_conf));
+      hills_energy_gradients.reset(new colvar_grid_gradient(colvars, nullptr, hills_energy));
     }
 
   } else {
@@ -209,7 +190,7 @@ int colvarbias_meta::init_replicas_params(std::string const &conf)
 
     get_keyval(conf, "replicaID", replica_id, replica_id);
     if (!replica_id.size()) {
-      if (proxy->replica_enabled() == COLVARS_OK) {
+      if (proxy->check_replicas_enabled() == COLVARS_OK) {
         // Obtain replicaID from the communicator
         replica_id = cvm::to_str(proxy->replica_index());
         cvm::log("Setting replicaID from communication layer: replicaID = "+
@@ -272,7 +253,6 @@ int colvarbias_meta::init_ebmeta_params(std::string const &conf)
 {
   int error_code = COLVARS_OK;
   // for ebmeta
-  target_dist = NULL;
   get_keyval(conf, "ebMeta", ebmeta, false);
   if(ebmeta){
     cvm::main()->cite_feature("Ensemble-biased metadynamics (ebMetaD)");
@@ -283,7 +263,7 @@ int colvarbias_meta::init_ebmeta_params(std::string const &conf)
                                "targetDistFile accordingly.\n",
                                COLVARS_INPUT_ERROR);
     }
-    target_dist = new colvar_grid_scalar();
+    target_dist.reset(new colvar_grid_scalar());
     error_code |= target_dist->init_from_colvars(colvars);
     std::string target_dist_file;
     get_keyval(conf, "targetDistFile", target_dist_file);
@@ -336,33 +316,15 @@ colvarbias_meta::~colvarbias_meta()
 {
   colvarbias_meta::clear_state_data();
   colvarproxy *proxy = cvm::main()->proxy;
-
   proxy->close_output_stream(replica_hills_file);
-
   proxy->close_output_stream(hills_traj_file_name());
-
-  if (target_dist) {
-    delete target_dist;
-    target_dist = NULL;
-  }
 }
 
 
 int colvarbias_meta::clear_state_data()
 {
-  if (hills_energy) {
-    delete hills_energy;
-    hills_energy = NULL;
-  }
-
-  if (hills_energy_gradients) {
-    delete hills_energy_gradients;
-    hills_energy_gradients = NULL;
-  }
-
   hills.clear();
   hills_off_grid.clear();
-
   return COLVARS_OK;
 }
 
@@ -451,8 +413,11 @@ int colvarbias_meta::update()
   error_code |= update_grid_params();
   // add new biasing energy/forces
   error_code |= update_bias();
-  // update grid content to reflect new bias
-  error_code |= update_grid_data();
+
+  if (use_grids) {
+    // update grid content to reflect new bias
+    error_code |= update_grid_data();
+  }
 
   if (comm != single_replica &&
       (cvm::step_absolute() % replica_update_freq) == 0) {
@@ -539,9 +504,9 @@ int colvarbias_meta::update_grid_params()
         // map everything into new grids
 
         colvar_grid_scalar *new_hills_energy =
-          new colvar_grid_scalar(*hills_energy);
+            new colvar_grid_scalar(*hills_energy);
         colvar_grid_gradient *new_hills_energy_gradients =
-          new colvar_grid_gradient(*hills_energy_gradients);
+            new colvar_grid_gradient(*hills_energy_gradients);
 
         // supply new boundaries to the new grids
 
@@ -556,10 +521,8 @@ int colvarbias_meta::update_grid_params()
         new_hills_energy->map_grid(*hills_energy);
         new_hills_energy_gradients->map_grid(*hills_energy_gradients);
 
-        delete hills_energy;
-        delete hills_energy_gradients;
-        hills_energy = new_hills_energy;
-        hills_energy_gradients = new_hills_energy_gradients;
+        hills_energy.reset(new_hills_energy);
+        hills_energy_gradients.reset(new_hills_energy_gradients);
 
         curr_bin = hills_energy->get_colvars_index();
         if (cvm::debug())
@@ -641,8 +604,7 @@ int colvarbias_meta::update_grid_data()
 {
   if ((cvm::step_absolute() % grids_freq) == 0) {
     // map the most recent gaussians to the grids
-    project_hills(new_hills_begin, hills.end(),
-                  hills_energy,    hills_energy_gradients);
+    project_hills(new_hills_begin, hills.end(), hills_energy.get(), hills_energy_gradients.get());
     new_hills_begin = hills.end();
 
     // TODO: we may want to condense all into one replicas array,
@@ -651,8 +613,8 @@ int colvarbias_meta::update_grid_data()
       for (size_t ir = 0; ir < replicas.size(); ir++) {
         replicas[ir]->project_hills(replicas[ir]->new_hills_begin,
                                     replicas[ir]->hills.end(),
-                                    replicas[ir]->hills_energy,
-                                    replicas[ir]->hills_energy_gradients);
+                                    replicas[ir]->hills_energy.get(),
+                                    replicas[ir]->hills_energy_gradients.get());
         replicas[ir]->new_hills_begin = replicas[ir]->hills.end();
       }
     }
@@ -670,11 +632,20 @@ int colvarbias_meta::calc_energy(std::vector<colvarvalue> const *values)
     replicas[ir]->bias_energy = 0.0;
   }
 
-  std::vector<int> const curr_bin = values ?
-    hills_energy->get_colvars_index(*values) :
-    hills_energy->get_colvars_index();
+  bool index_ok = false;
+  std::vector<int> curr_bin;
 
-  if (hills_energy->index_ok(curr_bin)) {
+  if (use_grids) {
+
+    curr_bin = values ?
+      hills_energy->get_colvars_index(*values) :
+      hills_energy->get_colvars_index();
+
+    index_ok = hills_energy->index_ok(curr_bin);
+
+  }
+
+  if ( index_ok ) {
     // index is within the grid: get the energy from there
     for (ir = 0; ir < replicas.size(); ir++) {
 
@@ -723,11 +694,20 @@ int colvarbias_meta::calc_forces(std::vector<colvarvalue> const *values)
     }
   }
 
-  std::vector<int> const curr_bin = values ?
-    hills_energy->get_colvars_index(*values) :
-    hills_energy->get_colvars_index();
+  bool index_ok = false;
+  std::vector<int> curr_bin;
 
-  if (hills_energy->index_ok(curr_bin)) {
+  if (use_grids) {
+
+    curr_bin = values ?
+      hills_energy->get_colvars_index(*values) :
+      hills_energy->get_colvars_index();
+
+    index_ok = hills_energy->index_ok(curr_bin);
+
+  }
+
+  if ( index_ok ) {
     for (ir = 0; ir < replicas.size(); ir++) {
       cvm::real const *f = &(replicas[ir]->hills_energy_gradients->value(curr_bin));
       for (ic = 0; ic < num_variables(); ic++) {
@@ -959,8 +939,7 @@ void colvarbias_meta::project_hills(colvarbias_meta::hill_iter  h_first,
 
 
 void colvarbias_meta::recount_hills_off_grid(colvarbias_meta::hill_iter  h_first,
-                                             colvarbias_meta::hill_iter  h_last,
-                                             colvar_grid_scalar         * /* he */)
+                                             colvarbias_meta::hill_iter  h_last)
 {
   hills_off_grid.clear();
 
@@ -1078,9 +1057,13 @@ int colvarbias_meta::update_replicas_registry()
         (replicas.back())->comm = multiple_replicas;
 
         if (use_grids) {
-          (replicas.back())->hills_energy           = new colvar_grid_scalar(colvars);
-          (replicas.back())->hills_energy_gradients = new colvar_grid_gradient(colvars);
+          (replicas.back())
+              ->hills_energy.reset(new colvar_grid_scalar(colvars, hills_energy));
+          (replicas.back())
+              ->hills_energy_gradients.reset(
+                  new colvar_grid_gradient(colvars, nullptr, hills_energy));
         }
+
         if (is_enabled(f_cvb_calc_ti_samples)) {
           (replicas.back())->enable(f_cvb_calc_ti_samples);
           (replicas.back())->colvarbias_ti::init_grids();
@@ -1336,34 +1319,40 @@ template <typename IST> IST &colvarbias_meta::read_state_data_template_(IST &is)
 {
   if (use_grids) {
 
-    colvar_grid_scalar   *hills_energy_backup = NULL;
-    colvar_grid_gradient *hills_energy_gradients_backup = NULL;
+    std::shared_ptr<colvar_grid_scalar> hills_energy_backup;
+    std::shared_ptr<colvar_grid_gradient> hills_energy_gradients_backup;
 
-    if (has_data) {
+    bool const need_backup = has_data;
+
+    if (need_backup) {
       if (cvm::debug())
-        cvm::log("Backupping grids for metadynamics bias \""+
-                 this->name+"\""+
-                 ((comm != single_replica) ? ", replica \""+replica_id+"\"" : "")+".\n");
-      hills_energy_backup           = hills_energy;
-      hills_energy_gradients_backup = hills_energy_gradients;
-      hills_energy                  = new colvar_grid_scalar(colvars);
-      hills_energy_gradients        = new colvar_grid_gradient(colvars);
+        cvm::log("Backing up grids for metadynamics bias \"" + this->name + "\"" +
+                 ((comm != single_replica) ? ", replica \"" + replica_id + "\"" : "") + ".\n");
+
+      hills_energy_backup = std::move(hills_energy);
+      hills_energy_gradients_backup = std::move(hills_energy_gradients);
+      hills_energy.reset(new colvar_grid_scalar(colvars, hills_energy));
+      hills_energy_gradients.reset(new colvar_grid_gradient(colvars, nullptr, hills_energy));
     }
 
-    read_grid_data_template_<IST, colvar_grid_scalar>(is, "hills_energy", hills_energy,
-                                                      hills_energy_backup);
+    read_grid_data_template_<IST, colvar_grid_scalar>(is, "hills_energy", hills_energy.get(),
+                                                      hills_energy_backup.get());
 
-    read_grid_data_template_<IST, colvar_grid_gradient>(
-        is, "hills_energy_gradients", hills_energy_gradients, hills_energy_gradients_backup);
+    read_grid_data_template_<IST, colvar_grid_gradient>(is, "hills_energy_gradients",
+                                                        hills_energy_gradients.get(),
+                                                        hills_energy_gradients_backup.get());
 
     if (is) {
       cvm::log("  successfully read the biasing potential and its gradients from grids.\n");
-      if (hills_energy_backup != nullptr) {
-        // Now that we have successfully updated the grids, delete the backup copies
-        delete hills_energy_backup;
-        delete hills_energy_gradients_backup;
-      }
     } else {
+      if (need_backup) {
+        if (cvm::debug())
+          cvm::log("Restoring grids from backup for metadynamics bias \"" + this->name + "\"" +
+                   ((comm != single_replica) ? ", replica \"" + replica_id + "\"" : "") + ".\n");
+        // Restoring content from original grid
+        hills_energy->copy_grid(*hills_energy_backup);
+        hills_energy_gradients->copy_grid(*hills_energy_gradients_backup);
+      }
       return is;
     }
   }
@@ -1451,10 +1440,12 @@ void colvarbias_meta::rebin_grids_after_restart()
     // read from the configuration file), and project onto them the
     // grids just read from the restart file
 
-    colvar_grid_scalar   *new_hills_energy =
-      new colvar_grid_scalar(colvars);
-    colvar_grid_gradient *new_hills_energy_gradients =
-      new colvar_grid_gradient(colvars);
+    // Create new grids based on the configuration parameters, because reading from the state
+    // file automatically sets the old parameters
+    std::shared_ptr<colvar_grid_scalar> new_hills_energy(
+        new colvar_grid_scalar(colvars, nullptr, false, grid_conf));
+    std::shared_ptr<colvar_grid_gradient> new_hills_energy_gradients(
+        new colvar_grid_gradient(colvars, nullptr, new_hills_energy));
 
     if (cvm::debug()) {
       std::ostringstream tmp_os;
@@ -1468,9 +1459,9 @@ void colvarbias_meta::rebin_grids_after_restart()
     if (restart_keep_hills && !hills.empty()) {
       // if there are hills, recompute the new grids from them
       cvm::log("Rebinning the energy and forces grids from "+
-               cvm::to_str(hills.size())+" hills (this may take a while)...\n");
-      project_hills(hills.begin(), hills.end(),
-                    new_hills_energy, new_hills_energy_gradients, true);
+               cvm::to_str(hills.size())+" hills (this may take a bit)...\n");
+      project_hills(hills.begin(), hills.end(), new_hills_energy.get(),
+                    new_hills_energy_gradients.get(), true);
       cvm::log("rebinning done.\n");
 
     } else {
@@ -1481,15 +1472,13 @@ void colvarbias_meta::rebin_grids_after_restart()
       new_hills_energy_gradients->map_grid(*hills_energy_gradients);
     }
 
-    delete hills_energy;
-    delete hills_energy_gradients;
-    hills_energy = new_hills_energy;
-    hills_energy_gradients = new_hills_energy_gradients;
+    hills_energy = std::move(new_hills_energy);
+    hills_energy_gradients = std::move(new_hills_energy_gradients);
 
     // assuming that some boundaries have expanded, eliminate those
     // off-grid hills that aren't necessary any more
     if (!hills.empty())
-      recount_hills_off_grid(hills.begin(), hills.end(), hills_energy);
+      recount_hills_off_grid(hills.begin(), hills.end());
   }
 }
 
@@ -1718,29 +1707,17 @@ int colvarbias_meta::setup_output()
 
   if (comm == multiple_replicas) {
 
-    // TODO: one may want to specify the path manually for intricated filesystems?
-    char *pwd = new char[3001];
-    if (GETCWD(pwd, 3000) == nullptr) {
-      if (pwd != nullptr) { //
-        delete[] pwd;
-      }
-      return cvm::error("Error: cannot get the path of the current working directory.\n",
-                        COLVARS_BUG_ERROR);
-    }
-
+    auto const pwd = cvm::main()->proxy->get_current_work_dir();
     replica_list_file =
-      (std::string(pwd)+std::string(PATHSEP)+
-       this->name+"."+replica_id+".files.txt");
+        cvm::main()->proxy->join_paths(pwd, this->name + "." + replica_id + ".files.txt");
     // replica_hills_file and replica_state_file are those written
     // by the current replica; within the mirror biases, they are
     // those by another replica
-    replica_hills_file =
-      (std::string(pwd)+std::string(PATHSEP)+
-       cvm::output_prefix()+".colvars."+this->name+"."+replica_id+".hills");
-    replica_state_file =
-      (std::string(pwd)+std::string(PATHSEP)+
-       cvm::output_prefix()+".colvars."+this->name+"."+replica_id+".state");
-    delete[] pwd;
+    replica_hills_file = cvm::main()->proxy->join_paths(
+        pwd, cvm::output_prefix() + ".colvars." + this->name + "." + replica_id + ".hills");
+
+    replica_state_file = cvm::main()->proxy->join_paths(
+        pwd, cvm::output_prefix() + ".colvars." + this->name + "." + replica_id + ".state");
 
     // now register this replica
 
@@ -1842,7 +1819,7 @@ template <typename OST> OST &colvarbias_meta::write_state_data_template_(OST &os
 
     // this is a very good time to project hills, if you haven't done
     // it already!
-    project_hills(new_hills_begin, hills.end(), hills_energy, hills_energy_gradients);
+    project_hills(new_hills_begin, hills.end(), hills_energy.get(), hills_energy_gradients.get());
     new_hills_begin = hills.end();
 
     // write down the grids to the restart file
diff --git a/lib/colvars/colvarbias_meta.h b/lib/colvars/colvarbias_meta.h
index a765a60c71..57aa21ed6b 100644
--- a/lib/colvars/colvarbias_meta.h
+++ b/lib/colvars/colvarbias_meta.h
@@ -10,12 +10,16 @@
 #ifndef COLVARBIAS_META_H
 #define COLVARBIAS_META_H
 
-#include <vector>
-#include <list>
 #include <iosfwd>
+#include <list>
+#include <memory>
+#include <vector>
 
 #include "colvarbias.h"
-#include "colvargrid.h"
+
+class colvar_grid_scalar;
+class colvar_grid_gradient;
+
 
 
 /// Metadynamics bias (implementation of \link colvarbias \endlink)
@@ -123,8 +127,7 @@ protected:
   hill_iter new_hills_off_grid_begin;
 
   /// Regenerate the hills_off_grid list
-  void recount_hills_off_grid(hill_iter h_first, hill_iter h_last,
-                              colvar_grid_scalar *ge);
+  void recount_hills_off_grid(hill_iter h_first, hill_iter h_last);
 
   template <typename OST> OST &write_hill_template_(OST &os, colvarbias_meta::hill const &h);
 
@@ -211,7 +214,7 @@ protected:
   bool       ebmeta;
 
   /// Target distribution for EBmeta
-  colvar_grid_scalar* target_dist;
+  std::unique_ptr<colvar_grid_scalar> target_dist;
 
   /// Number of equilibration steps for EBmeta
   cvm::step_number ebmeta_equil_steps;
@@ -223,15 +226,14 @@ protected:
   bool       safely_read_restart;
 
   /// Hill energy, cached on a grid
-  colvar_grid_scalar    *hills_energy;
+  std::shared_ptr<colvar_grid_scalar> hills_energy;
 
   /// Hill forces, cached on a grid
-  colvar_grid_gradient  *hills_energy_gradients;
+  std::shared_ptr<colvar_grid_gradient> hills_energy_gradients;
 
-  /// \brief Project the selected hills onto grids
-  void project_hills(hill_iter h_first, hill_iter h_last,
-                      colvar_grid_scalar *ge, colvar_grid_gradient *gf,
-                      bool print_progress = false);
+  /// Project the selected hills onto grids
+  void project_hills(hill_iter h_first, hill_iter h_last, colvar_grid_scalar *ge,
+                     colvar_grid_gradient *gf, bool print_progress = false);
 
 
   // Multiple Replicas variables and functions
diff --git a/lib/colvars/colvarbias_opes.cpp b/lib/colvars/colvarbias_opes.cpp
new file mode 100644
index 0000000000..9caeb967e0
--- /dev/null
+++ b/lib/colvars/colvarbias_opes.cpp
@@ -0,0 +1,1996 @@
+// This code is mainly adapted from the PLUMED opes module, which uses the
+// LGPLv3 license as shown below:
+/* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+   Copyright (c) 2020-2021 of Michele Invernizzi.
+
+   This file is part of the OPES plumed module.
+
+   The OPES plumed module is free software: you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   The OPES plumed module is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with plumed.  If not, see <http://www.gnu.org/licenses/>.
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
+
+#include "colvarbias_opes.h"
+#include "colvarbias.h"
+#include "colvardeps.h"
+#include "colvarproxy.h"
+#include "colvars_memstream.h"
+#include "colvargrid.h"
+
+#include <exception>
+#include <iomanip>
+#include <ios>
+#include <stdexcept>
+#include <algorithm>
+#include <numeric>
+#include <unordered_set>
+#include <limits>
+#include <sstream>
+
+colvarbias_opes::colvarbias_opes(char const *key):
+  colvarbias(key), m_kbt(0), m_barrier(0), m_biasfactor(0),
+  m_bias_prefactor(0), m_temperature(0),
+  m_pace(0), m_adaptive_sigma_stride(0),
+  m_adaptive_counter(0), m_counter(1),
+  m_compression_threshold(0), m_compression_threshold2(0),
+  m_adaptive_sigma(false), m_fixed_sigma(false),
+  m_no_zed(false), m_nlist(false), m_recursive_merge(true),
+  m_nlist_param(2, 0), m_epsilon(0), m_sum_weights(0),
+  m_sum_weights2(0), m_cutoff(0), m_cutoff2(0),
+  m_zed(1), m_old_kdenorm(0), m_kdenorm(0),
+  m_val_at_cutoff(0), m_nlist_center(0), m_nlist_index(0),
+  m_nlist_steps(0), m_nlist_update(false),
+  m_nlist_pace_reset(false), m_nker(0), m_calc_work(false),
+  m_work(0), comm(single_replica), m_num_walkers(1),
+  m_num_threads(1), m_nlker(0), m_traj_output_frequency(0),
+  m_traj_line(traj_line{0}), m_is_first_step(true),
+  m_pmf_grid_on(false), m_reweight_grid(nullptr),
+  m_pmf_grid(nullptr), m_pmf_hist_freq(0), m_pmf_shared(true),
+  m_explore(false), m_inf_biasfactor(false)
+{
+#ifdef OPES_THREADING
+  provide(f_cvb_smp, cvm::proxy->get_smp_mode() == colvarproxy_smp::smp_mode_t::inner_loop);
+  if (is_available(f_cv_smp)){
+    enable(f_cvb_smp); // Enabled by default
+  }
+#endif
+}
+
+int colvarbias_opes::init(const std::string& conf) {
+  int error_code = colvarbias::init(conf);
+  enable(f_cvb_scalar_variables);
+  get_keyval_feature(this, conf, "applyBias", f_cvb_apply_force, true);
+  m_temperature = cvm::proxy->target_temperature();
+  m_kbt = m_temperature * cvm::proxy->boltzmann();
+  get_keyval(conf, "newHillFrequency", m_pace);
+  get_keyval(conf, "barrier", m_barrier);
+  get_keyval(conf, "explore", m_explore, false);
+  if (m_barrier < 0) {
+    return cvm::error("the barrier should be greater than zero", COLVARS_INPUT_ERROR);
+  }
+  std::string biasfactor_str;
+  get_keyval(conf, "biasfactor", biasfactor_str);
+  if ((cvm::proxy->target_temperature() == 0.0) && cvm::proxy->simulation_running()) {
+    cvm::log("WARNING: OPES should not be run without a thermostat or at 0 Kelvin!\n");
+  }
+  m_biasfactor = m_barrier / m_kbt;
+  m_inf_biasfactor = biasfactor_str == "inf" || biasfactor_str == "INF";
+  if (m_inf_biasfactor) {
+    m_biasfactor = std::numeric_limits<cvm::real>::infinity();
+    m_bias_prefactor = 1;
+    if (m_explore) {
+      return cvm::error("biasfactor cannot be infinity in the explore mode.");
+    }
+  } else {
+    if (biasfactor_str.size() > 0) {
+      try {
+        m_biasfactor = std::stod(biasfactor_str);
+      } catch (const std::exception& e) {
+        return cvm::error(e.what(), COLVARS_INPUT_ERROR);
+      }
+    }
+    if (m_biasfactor <= 1.0) {
+      return cvm::error("biasfactor must be greater than one (use \"inf\" for uniform target)");
+    }
+    m_bias_prefactor = 1 - 1.0 / m_biasfactor;
+  }
+  if (m_explore) {
+    m_bias_prefactor = m_biasfactor - 1;
+  }
+  get_keyval(conf, "adaptiveSigma", m_adaptive_sigma, false);
+  m_sigma0.resize(num_variables());
+  get_keyval(conf, "gaussianSigma", m_sigma0, std::vector<cvm::real>(num_variables()));
+  m_av_cv.assign(num_variables(), 0);
+  m_av_M2.assign(num_variables(), 0);
+  if (m_adaptive_sigma) {
+    get_keyval(conf, "adaptiveSigmaStride", m_adaptive_sigma_stride, 0);
+    if (m_inf_biasfactor) {
+      return cvm::error("cannot use infinite biasfactor with adaptive sigma",
+                        COLVARS_INPUT_ERROR);
+    }
+    if (m_adaptive_sigma_stride == 0) {
+      m_adaptive_sigma_stride = m_pace * 10;
+    }
+    if (m_adaptive_sigma_stride < m_pace) {
+      return cvm::error("It is better to choose an adaptiveSigmaStride >= newHillFrequency.\n", COLVARS_INPUT_ERROR);
+    }
+  } else {
+    if (m_sigma0.size() != num_variables()) {
+      return cvm::error("number of sigma parameters does not match the number of variables",
+                        COLVARS_INPUT_ERROR);
+    }
+    if (m_explore) {
+      for (size_t i = 0; i < num_variables(); ++i) {
+        m_sigma0[i] *= std::sqrt(m_biasfactor);
+      }
+    }
+  }
+  get_keyval(conf, "gaussianSigmaMin", m_sigma_min);
+  if ((m_sigma_min.size() != 0) && (m_sigma_min.size() != num_variables())) {
+    return cvm::error("incorrect number of parameters of gaussianSigmaMin");
+  }
+  if (m_sigma_min.size() > 0 && !m_adaptive_sigma) {
+    for (size_t i = 0; i < num_variables(); ++i) {
+      if (m_sigma_min[i] > m_sigma0[i]) {
+        return cvm::error("gaussianSigmaMin of variable " + cvm::to_str(i) + " should be smaller than sigma");
+      }
+    }
+  }
+  get_keyval(conf, "epsilon", m_epsilon, std::exp(-m_barrier/m_bias_prefactor/m_kbt));
+  if (m_epsilon <= 0) {
+    return cvm::error("you must choose a value of epsilon greater than zero");
+  }
+  m_sum_weights = std::pow(m_epsilon, m_bias_prefactor);
+  m_sum_weights2 = m_sum_weights * m_sum_weights;
+  if (m_explore) {
+    get_keyval(conf, "kernelCutoff", m_cutoff, std::sqrt(2.0*m_barrier/m_kbt));
+  } else {
+    get_keyval(conf, "kernelCutoff", m_cutoff, std::sqrt(2.0*m_barrier/m_bias_prefactor/m_kbt));
+  }
+  if (m_cutoff <= 0) {
+    return cvm::error("you must choose a value of kernelCutoff greater than zero");
+  }
+  m_cutoff2 = m_cutoff * m_cutoff;
+  m_val_at_cutoff = std::exp(-0.5 * m_cutoff2);
+  get_keyval(conf, "compressionThreshold", m_compression_threshold, 1);
+  if (m_compression_threshold != 0) {
+    if (m_compression_threshold < 0 || m_compression_threshold > m_cutoff) {
+      return cvm::error("compressionThreshold cannot be smaller than 0 or larger than kernelCutoff", COLVARS_INPUT_ERROR);
+    }
+  }
+  m_compression_threshold2 = m_compression_threshold * m_compression_threshold;
+  get_keyval(conf, "neighborList", m_nlist, false);
+  if (m_nlist) {
+    get_keyval(conf, "neighborListNewHillReset", m_nlist_pace_reset, false);
+    std::vector<cvm::real> nlist_param;
+    get_keyval(conf, "neighborListParameters", nlist_param, std::vector<cvm::real>());
+    if (nlist_param.empty()) {
+      m_nlist_param[0] = 3.0; //*cutoff2_ -> max distance of neighbors
+      m_nlist_param[1] = 0.5; //*nlist_dev2_[i] -> condition for rebuilding
+    } else {
+      if (nlist_param.size() != 2) {
+        return cvm::error("two cutoff parameters are needed for the neighbor list", COLVARS_INPUT_ERROR);
+      }
+      if (nlist_param[0] <= 1.0) {
+        return cvm::error("the first of neighborListParam must be greater than 1.0. The smaller the first, the smaller should be the second as well", COLVARS_INPUT_ERROR);
+      }
+      const cvm::real min_PARAM_1 = (1.-1./std::sqrt(nlist_param[0]))+0.16;
+      if (nlist_param[1] <= 0) {
+        return cvm::error("the second of neighborListParam must be greater than 0", COLVARS_INPUT_ERROR);
+      }
+      if (nlist_param[1] > min_PARAM_1) {
+        return cvm::error("the second of neighborListParam must be smaller to avoid systematic errors. Largest suggested value is: 1.16-1/sqrt(param_0) = " + cvm::to_str(min_PARAM_1), COLVARS_INPUT_ERROR);
+      }
+      m_nlist_param = nlist_param;
+    }
+    m_nlist_center.resize(num_variables());
+    m_nlist_dev2.resize(num_variables(), 0);
+    m_nlist_steps = 0;
+    m_nlist_update = true;
+  }
+  get_keyval(conf, "noZed", m_no_zed, false);
+  if (m_no_zed) {
+    m_sum_weights = 1;
+    m_sum_weights2 = 1;
+  }
+  get_keyval(conf, "fixedGaussianSigma", m_fixed_sigma, false);
+  get_keyval(conf, "recursiveMerge", m_recursive_merge, true);
+  get_keyval(conf, "calcWork", m_calc_work, false);
+  bool b_replicas = false;
+  get_keyval(conf, "multipleReplicas", b_replicas, false);
+
+#ifdef OPES_THREADING
+  get_keyval_feature(this, conf, "smp", f_cvb_smp, is_enabled(f_cvb_smp));
+  if (is_enabled(f_cv_smp)) {
+    m_num_threads = cvm::proxy->smp_num_threads();
+  } else {
+    m_num_threads = 1;
+  }
+#else
+  // if (m_num_threads > 1) {
+  //   return cvm::error("Multithreading in OPES is not compiled.\n");
+  // }
+  m_num_threads = 1;
+#endif
+  bool serial = false;
+  get_keyval(conf, "serial", serial, false);
+  if (serial) m_num_threads = 1;
+  comm = b_replicas ? multiple_replicas : single_replica;
+  if (comm == multiple_replicas) {
+    colvarproxy *proxy = cvm::main()->proxy;
+    get_keyval(conf, "replicaID", replica_id, replica_id);
+    get_keyval(conf, "sharedFreq", shared_freq, output_freq);
+    if (!replica_id.size()) {
+      if (proxy->check_replicas_enabled() == COLVARS_OK) {
+        // Obtain replicaID from the communicator
+        replica_id = cvm::to_str(proxy->replica_index());
+        cvm::log("Setting replicaID from communication layer: replicaID = "+
+                 replica_id+".\n");
+      } else {
+        return cvm::error("Error: using more than one replica, but replicaID "
+                          "could not be obtained.\n", COLVARS_INPUT_ERROR);
+      }
+    }
+    m_num_walkers = proxy->num_replicas();
+  }
+  get_keyval(conf, "pmf", m_pmf_grid_on, false);
+  if (m_pmf_grid_on) {
+    std::vector<std::string> pmf_cv_name;
+    get_keyval(conf, "pmfColvars", pmf_cv_name);
+    for (auto it = pmf_cv_name.begin(); it != pmf_cv_name.end(); ++it) {
+      bool found = false;
+      for (size_t i = 0; i < num_variables(); ++i) {
+        if (variables(i)->name == (*it)) {
+          if (variables(i)->enable(f_cv_grid) != COLVARS_OK) {
+            return cvm::error("CV " + (*it) + " does not support grid\n");
+          }
+          m_pmf_cvs.push_back(variables(i));
+          found = true;
+          break;
+        }
+      }
+      if (!found) {
+        return cvm::error("CV " + (*it) + " not found\n");
+      }
+    }
+    key_lookup(conf, "grid", &grid_conf);
+    m_reweight_grid.reset(new colvar_grid_scalar(m_pmf_cvs, nullptr, false, grid_conf));
+    m_pmf_grid.reset(new colvar_grid_scalar(m_pmf_cvs, m_reweight_grid));
+    get_keyval(conf, "pmfHistoryFrequency", m_pmf_hist_freq, 0);
+    if (comm == multiple_replicas) {
+      get_keyval(conf, "pmfShared", m_pmf_shared, true);
+      if (m_pmf_shared) {
+        m_global_reweight_grid.reset(new colvar_grid_scalar(m_pmf_cvs, m_reweight_grid));
+        m_global_pmf_grid.reset(new colvar_grid_scalar(m_pmf_cvs, m_reweight_grid));
+      }
+    }
+  }
+  m_kdenorm = m_explore? m_counter : m_sum_weights;
+  m_old_kdenorm = m_kdenorm;
+  m_traj_line.rct = m_kbt * cvm::logn(m_sum_weights / m_counter);
+  m_traj_line.zed = m_zed;
+  m_traj_line.neff = (1 + m_sum_weights) * (1 + m_sum_weights) / (1 + m_sum_weights2);
+  m_traj_line.nker = m_kernels.size();
+  get_keyval(conf, "printTrajectoryFrequency", m_traj_output_frequency, cvm::cv_traj_freq);
+  m_cv.resize(num_variables(), 0);
+  showInfo();
+  return error_code;
+}
+
+void colvarbias_opes::showInfo() const {
+  // Print information about this bias
+  auto printInfo = [&](const std::string& info, const std::string& val){
+    cvm::log(this->name + ": " + info + val + "\n");
+  };
+  printInfo("temperature = ", cvm::to_str(m_kbt / cvm::proxy->boltzmann()));
+  printInfo("beta = ", cvm::to_str(1.0 / m_kbt));
+  printInfo("depositing new kernels with newHillFrequency = ", cvm::to_str(m_pace));
+  printInfo("expected barrier is ", cvm::to_str(m_barrier));
+  printInfo("using target distribution with biasfactor (gamma) = ", m_inf_biasfactor ? "inf" : cvm::to_str(m_biasfactor));
+  if (m_inf_biasfactor) {
+    cvm::log("  (thus a uniform flat target distribution, no well-tempering)\n");
+    cvm::log(this->name + ": " + "the equivalent bias temperature = inf\n");
+  } else {
+    cvm::log(this->name + ": " + "the equivalent bias temperature = " + cvm::to_str(cvm::proxy->target_temperature() * (m_biasfactor - 1)));
+  }
+  if (m_adaptive_sigma) {
+    printInfo("adaptive sigma will be used, with adaptiveSigmaStride = ", cvm::to_str(m_adaptive_sigma_stride));
+    size_t x = std::ceil(m_adaptive_sigma_stride / m_pace);
+    printInfo("  thus the first x kernel depositions will be skipped, x = adaptiveSigmaStride/newHillFrequency = ", cvm::to_str(x));
+  } else {
+    std::string sigmas;
+    for (size_t i = 0; i < num_variables(); ++i) {
+      sigmas += " " + cvm::to_str(m_sigma0[i]);
+    }
+    cvm::log(this->name + ": kernels have initial gaussianSigma = " + sigmas + "\n");
+  }
+  if (m_fixed_sigma) {
+    cvm::log(this->name + " fixedGaussianSigma: gaussianSigma will not decrease as the simulation proceeds\n");
+  }
+  printInfo("kernels are truncated with kernelCutoff = ", cvm::to_str(m_cutoff));
+  if (m_cutoff < 3.5) {
+    cvm::log(this->name + " +++ WARNING +++ probably kernels are truncated too much\n");
+  }
+  printInfo("the value at cutoff is = ", cvm::to_str(m_val_at_cutoff));
+  printInfo("regularization epsilon = ", cvm::to_str(m_epsilon));
+  if (m_val_at_cutoff > m_epsilon*(1+1e-6)) {
+    cvm::log(this->name + " +++ WARNING +++ the kernelCutoff might be too small for the given epsilon\n");
+  }
+  printInfo("kernels will be compressed when closer than compression_threshold = ", cvm::to_str(m_compression_threshold));
+  if (m_compression_threshold2 == 0) {
+    cvm::log(this->name + " +++ WARNING +++ kernels will never merge, expect slowdowns\n");
+  }
+  if (!m_recursive_merge) {
+    cvm::log(this->name + " -- RECURSIVE_MERGE_OFF: only one merge for each new kernel will be attempted. This is faster only if total number of kernels does not grow too much\n");
+  }
+  if (m_nlist) {
+    cvm::log(this->name + " neighborList: using neighbor list for kernels, with parameters: " + cvm::to_str(m_nlist_param[0]) + " " + cvm::to_str(m_nlist_param[1]) + "\n");
+    if (m_nlist_pace_reset) {
+      cvm::log(this->name + " neighborListNewHillReset: forcing the neighbor list to update every time when depositing a new hill\n");
+    }
+  }
+  if (m_no_zed) {
+    printInfo("noZed: using fixed normalization factor = ", cvm::to_str(m_zed));
+  }
+  if (comm == multiple_replicas && m_num_walkers > 1) {
+    cvm::log(this->name + " if multiple replicas are present, they will share the same bias\n");
+  }
+  if (m_num_threads > 1) {
+    printInfo("using multiple threads per simulation: ", cvm::to_str(m_num_threads));
+  }
+  cvm::main()->cite_feature("OPES");
+  if (m_adaptive_sigma || m_explore) {
+    cvm::main()->cite_feature("OPES explore or adaptive kernels");
+  }
+}
+
+cvm::real colvarbias_opes::evaluateKernel(
+  const colvarbias_opes::kernel& G,
+  const std::vector<cvm::real>& x) const {
+  cvm::real norm2 = 0;
+  for (size_t i = 0; i < num_variables(); ++i) {
+    const cvm::real dist2_i = variables(i)->dist2(G.m_center[i], x[i]) / (G.m_sigma[i] * G.m_sigma[i]);
+    norm2 += dist2_i;
+    if (norm2 >= m_cutoff2) {
+      return 0;
+    }
+  }
+  return G.m_height * (std::exp(-0.5 * norm2) - m_val_at_cutoff);
+}
+
+cvm::real colvarbias_opes::evaluateKernel(
+  const colvarbias_opes::kernel& G,
+  const std::vector<cvm::real>& x,
+  std::vector<cvm::real>& accumulated_derivative,
+  std::vector<cvm::real>& dist) const {
+  cvm::real norm2 = 0;
+  for (size_t i = 0; i < num_variables(); ++i) {
+    dist[i] = 0.5 * variables(i)->dist2_lgrad(x[i], G.m_center[i]) / G.m_sigma[i];
+    norm2 += dist[i] * dist[i];
+    if (norm2 >= m_cutoff2) {
+      return 0;
+    }
+  }
+  const cvm::real val = G.m_height * (std::exp(-0.5 * norm2) - m_val_at_cutoff);
+  // The derivative of norm2 with respect to x
+  for (size_t i = 0; i < num_variables(); ++i) {
+    accumulated_derivative[i] -= val * dist[i] / G.m_sigma[i];
+  }
+  return val;
+}
+
+cvm::real colvarbias_opes::getProbAndDerivatives(
+  const std::vector<cvm::real>& cv, std::vector<cvm::real>& der_prob) const {
+  double prob = 0.0;
+  std::vector<cvm::real> dist(num_variables(), 0);
+  if (!m_nlist) {
+    if (m_num_threads == 1 || m_kernels.size() < 2 * m_num_threads) {
+      for (size_t k = 0; k < m_kernels.size(); ++k) {
+        prob += evaluateKernel(m_kernels[k], cv, der_prob, dist);
+      }
+    } else {
+#if defined(_OPENMP)
+      #pragma omp parallel num_threads(m_num_threads)
+      {
+        std::vector<cvm::real> omp_deriv(der_prob.size(), 0);
+        std::vector<cvm::real> tmp_dist(num_variables());
+        #pragma omp for reduction(+:prob) nowait
+        for (int k = 0; k < static_cast<int>(m_kernels.size()); ++k) {
+          prob += evaluateKernel(m_kernels[k], cv, omp_deriv, tmp_dist);
+        }
+        #pragma omp critical
+        for (int i = 0; i < static_cast<int>(num_variables()); ++i) {
+          der_prob[i]+=omp_deriv[i];
+        }
+        #pragma omp single
+        for (int i = 0; i < static_cast<int>(num_variables()); ++i) {
+          dist[i] = tmp_dist[i];
+        }
+      }
+#elif defined(CMK_SMP) && defined(USE_CKLOOP)
+      // TODO: Test this once fine-grained parallelization is enabled
+      std::vector<std::vector<cvm::real>> derivs(m_num_threads, std::vector<cvm::real>(num_variables(), 0));
+      std::vector<std::vector<cvm::real>> dists(m_num_threads, std::vector<cvm::real>(num_variables(), 0));
+      auto worker = [&](int start, int end, void* result){
+        const int tid = cvm::proxy->smp_thread_id();
+        double tmp_prob = 0;
+        for (int i = start; i <= end; ++i) {
+          tmp_prob += evaluateKernel(m_kernels[i], cv, derivs[tid], dists[tid]);
+        }
+        *(double *)result = tmp_prob;
+      };
+      const size_t numChunks = m_kernels.size();
+      const size_t lowerRange = 0;
+      const size_t upperRange = numChunks - 1;
+      CkLoop_Parallelize(
+        numChunks, lowerRange, upperRange,
+        worker, &prob, CKLOOP_DOUBLE_SUM, NULL);
+      for (size_t i = 0; i < num_variables(); ++i) {
+        for (size_t j = 0; j < m_num_threads; ++j) {
+          if (j == 0) dist[i] = dists[j][i];
+          der_prob[i] += derivs[j][i];
+        }
+      }
+#else
+      cvm::error("multiple threads required in OPES, but this binary is not linked with a supported threading library.\n");
+#endif
+    }
+  } else {
+    if (m_num_threads == 1 || m_nlist_index.size() < 2 * m_num_threads) {
+      for (size_t nk = 0; nk < m_nlist_index.size(); ++nk) {
+        const size_t k = m_nlist_index[nk];
+        prob += evaluateKernel(m_kernels[k], cv, der_prob, dist);
+      }
+    } else {
+#if defined(_OPENMP)
+      #pragma omp parallel num_threads(m_num_threads)
+      {
+        std::vector<cvm::real> omp_deriv(der_prob.size(), 0);
+        std::vector<cvm::real> tmp_dist(num_variables());
+        #pragma omp for reduction(+:prob) nowait
+        for (int nk = 0; nk < static_cast<int>(m_nlist_index.size()); ++nk) {
+          const size_t k = m_nlist_index[nk];
+          prob += evaluateKernel(m_kernels[k], cv, omp_deriv, tmp_dist);
+        }
+        #pragma omp critical
+        for (int i = 0; i < static_cast<int>(num_variables()); ++i) {
+          der_prob[i]+=omp_deriv[i];
+        }
+        #pragma omp single
+        for (int i = 0; i < static_cast<int>(num_variables()); ++i) {
+          dist[i] = tmp_dist[i];
+        }
+      }
+#elif defined(CMK_SMP) && defined(USE_CKLOOP)
+      // TODO: Test this once fine-grained parallelization is enabled
+      std::vector<std::vector<cvm::real>> derivs(m_num_threads, std::vector<cvm::real>(num_variables(), 0));
+      std::vector<std::vector<cvm::real>> dists(m_num_threads, std::vector<cvm::real>(num_variables(), 0));
+      auto worker = [&](int start, int end, void* result){
+        const int tid = cvm::proxy->smp_thread_id();
+        double tmp_prob = 0;
+        for (int i = start; i <= end; ++i) {
+          const size_t k = m_nlist_index[i];
+          tmp_prob += evaluateKernel(m_kernels[k], cv, derivs[tid], dists[tid]);
+        }
+        *(double *)result = tmp_prob;
+      };
+      const size_t numChunks = m_nlist_index.size();
+      const size_t lowerRange = 0;
+      const size_t upperRange = numChunks - 1;
+      CkLoop_Parallelize(
+        numChunks, lowerRange, upperRange,
+        worker, &prob, CKLOOP_DOUBLE_SUM, NULL);
+      for (size_t i = 0; i < num_variables(); ++i) {
+        for (size_t j = 0; j < m_num_threads; ++j) {
+          if (j == 0) dist[i] = dists[j][i];
+          der_prob[i] += derivs[j][i];
+        }
+      }
+#else
+      cvm::error("multiple threads required in OPES, but this binary is not linked with a supported threading library.\n");
+#endif
+    }
+  }
+  prob /= m_kdenorm;
+  for (size_t i = 0; i < num_variables(); ++i) {
+    der_prob[i] /= m_kdenorm;
+  }
+  return prob;
+}
+
+int colvarbias_opes::calculate_opes() {
+  if (m_nlist) {
+    ++m_nlist_steps;
+    const bool exchange_step =
+      (comm == multiple_replicas) &&
+      cvm::step_absolute() % shared_freq == 0;
+    if (exchange_step) {
+      m_nlist_update = true;
+    } else {
+      for (size_t i = 0; i < num_variables(); ++i) {
+        const cvm::real diff_i2 = variables(i)->dist2(m_cv[i], m_nlist_center[i]);
+        if (diff_i2 > m_nlist_param[1] * m_nlist_dev2[i]) {
+          m_nlist_update = true;
+          break;
+        }
+      }
+    }
+    if (m_nlist_update) {
+      updateNlist(m_cv);
+    }
+  }
+  std::vector<cvm::real> der_prob(num_variables(), 0);
+  const cvm::real prob = getProbAndDerivatives(m_cv, der_prob);
+  const cvm::real bias = m_kbt * m_bias_prefactor * cvm::logn(prob / m_zed + m_epsilon);
+  bias_energy = bias;
+  if (is_enabled(f_cvb_apply_force)) {
+    for (size_t i = 0; i < num_variables(); ++i) {
+      colvar_forces[i] = -m_kbt * m_bias_prefactor / (prob / m_zed + m_epsilon) * der_prob[i] / m_zed;
+    }
+  }
+  return COLVARS_OK;
+}
+
+int colvarbias_opes::update_opes() {
+  if (m_adaptive_sigma) {
+    m_adaptive_counter++;
+    cvm::step_number tau = m_adaptive_sigma_stride;
+    if (m_adaptive_counter < m_adaptive_sigma_stride) tau = m_adaptive_counter;
+    for (size_t i = 0; i < num_variables(); ++i) {
+      // Welford's online algorithm for standard deviation
+      const cvm::real diff_i = 0.5 * variables(i)->dist2_lgrad(m_cv[i], m_av_cv[i]);
+      m_av_cv[i] += diff_i / tau;
+      m_av_M2[i] += diff_i * 0.5 * variables(i)->dist2_lgrad(m_cv[i], m_av_cv[i]);
+    }
+    if (m_adaptive_counter < m_adaptive_sigma_stride && m_counter == 1) {
+      return COLVARS_OK;;
+    }
+  }
+  if (cvm::step_absolute() % m_pace == 0) {
+    m_old_kdenorm = m_kdenorm;
+    m_delta_kernels.clear();
+    const size_t old_nker = m_kernels.size();
+    // TODO: how could I account for extra biases in Colvars?
+    const cvm::real log_weight = bias_energy / m_kbt;
+    cvm::real height = cvm::exp(log_weight);
+    cvm::real sum_heights = height;
+    cvm::real sum_heights2 = height * height;
+    if (m_num_walkers > 1) {
+      std::vector<cvm::real> replica_sum_heights(cvm::proxy->num_replicas() - 1, 0);
+      // Send all sum_heights to PE 0
+      if (cvm::proxy->replica_index() == 0) {
+        for (int p = 1; p < cvm::proxy->num_replicas(); ++p) {
+          if (cvm::proxy->replica_comm_recv((char*)&(replica_sum_heights[p - 1]), sizeof(cvm::real), p) != sizeof(cvm::real)) {
+            return cvm::error("Error: receiving sum of weights from replica " + cvm::to_str(p));
+          }
+        }
+      } else {
+        if (cvm::proxy->replica_comm_send((char*)&sum_heights, sizeof(cvm::real), 0) != sizeof(cvm::real)) {
+          return cvm::error("Error: sending sum of weights to replica 0.");
+        }
+      }
+      cvm::proxy->replica_comm_barrier();
+      // PE 0 sum all sum_heights and broadcast
+      if (cvm::proxy->replica_index() == 0) {
+        for (auto it = replica_sum_heights.begin(); it != replica_sum_heights.end(); ++it) {
+          sum_heights += (*it);
+        }
+        for (int p = 1; p < cvm::proxy->num_replicas(); ++p) {
+          if (cvm::proxy->replica_comm_send((char*)&sum_heights, sizeof(cvm::real), p) != sizeof(cvm::real)) {
+            return cvm::error("Error: sending sum of weights to replica " + cvm::to_str(p));
+          }
+        }
+      } else {
+        if (cvm::proxy->replica_comm_recv((char*)&sum_heights, sizeof(cvm::real), 0) != sizeof(cvm::real)) {
+          return cvm::error("Error: receiving sum of weights from replica 0.");
+        }
+      }
+      cvm::proxy->replica_comm_barrier();
+      // Send all sum_heights2 to PE 0
+      std::vector<cvm::real> replica_sum_heights2(cvm::proxy->num_replicas() - 1, 0);
+      if (cvm::proxy->replica_index() == 0) {
+        for (int p = 1; p < cvm::proxy->num_replicas(); ++p) {
+          if (cvm::proxy->replica_comm_recv((char*)&(replica_sum_heights2[p - 1]), sizeof(cvm::real), p) != sizeof(cvm::real)) {
+            return cvm::error("Error: getting sum of weights2 from replica " + cvm::to_str(p));
+          }
+        }
+      } else {
+        if (cvm::proxy->replica_comm_send((char*)&sum_heights2, sizeof(cvm::real), 0) != sizeof(cvm::real)) {
+          return cvm::error("Error: sending sum of weights2 from replica.");
+        }
+      }
+      cvm::proxy->replica_comm_barrier();
+      // PE 0 sum all sum_heights2 and broadcast
+      if (cvm::proxy->replica_index() == 0) {
+        for (auto it = replica_sum_heights2.begin(); it != replica_sum_heights2.end(); ++it) {
+          sum_heights2 += (*it);
+        }
+        for (int p = 1; p < cvm::proxy->num_replicas(); ++p) {
+          if (cvm::proxy->replica_comm_send((char*)&sum_heights2, sizeof(cvm::real), p) != sizeof(cvm::real)) {
+            return cvm::error("Error: sending sum of weights2 to replica " + cvm::to_str(p));
+          }
+        }
+      } else {
+        if (cvm::proxy->replica_comm_recv((char*)&sum_heights2, sizeof(cvm::real), 0) != sizeof(cvm::real)) {
+          return cvm::error("Error: receiving sum of weights2 from replica.");
+        }
+      }
+      cvm::proxy->replica_comm_barrier();
+    }
+    m_counter += m_num_walkers;
+    m_sum_weights += sum_heights;
+    m_sum_weights2 += sum_heights2;
+    m_neff = (1 + m_sum_weights) * (1 + m_sum_weights) / (1 + m_sum_weights2);
+    m_rct = m_kbt * cvm::logn(m_sum_weights / m_counter);
+    m_traj_line.neff = m_neff;
+    m_traj_line.rct = m_rct;
+    if (m_explore) {
+      m_kdenorm = m_counter;
+      height = 1.0;
+    } else {
+      m_kdenorm = m_sum_weights;
+    }
+    std::vector<cvm::real> sigma = m_sigma0;
+    if (m_adaptive_sigma) {
+      const cvm::real factor = m_explore ? 1.0 : m_biasfactor;
+      if (m_counter == 1 + m_num_walkers) {
+        for (size_t i = 0; i < num_variables(); ++i) {
+          m_av_M2[i] *= m_biasfactor;
+        }
+        for (size_t i = 0; i < num_variables(); ++i) {
+          m_sigma0[i] = std::sqrt(m_av_M2[i] / m_adaptive_counter / factor);
+        }
+        if (m_sigma_min.size() == 0) {
+          for (size_t i = 0; i < num_variables(); ++i) {
+            if (m_sigma0[i] < 1e-6) {
+              cvm::error("Adaptive sigma is suspiciously small for CV " + cvm::to_str(i) + "\nManually provide sigma or set a safe sigma_min to avoid possible issues\n");
+              return COLVARS_ERROR;
+            }
+          }
+        } else {
+          for (size_t i = 0; i < num_variables(); ++i) {
+            m_sigma0[i] = std::max(m_sigma0[i], m_sigma_min[i]);
+          }
+        }
+      }
+      for (size_t i = 0; i < num_variables(); ++i) {
+        sigma[i] = std::sqrt(m_av_M2[i] / m_adaptive_counter / factor);
+      }
+      if (m_sigma_min.size() == 0) {
+        bool sigma_less_than_threshold = false;
+        for (size_t i = 0; i < num_variables(); ++i) {
+          if (sigma[i] < 1e-6) {
+            cvm::log("The adaptive sigma is suspiciously small, you should set a safe sigma_min. 1e-6 will be used here\n");
+            sigma[i] = 1e-6;
+            sigma_less_than_threshold = true;
+          }
+        }
+        if (sigma_less_than_threshold) {
+          m_sigma_min.assign(num_variables(), 1e-6);
+        }
+      } else {
+        for (size_t i = 0; i < num_variables(); ++i) {
+          sigma[i] = std::max(sigma[i], m_sigma_min[i]);
+        }
+      }
+    }
+    if (!m_fixed_sigma) {
+      const cvm::real size = m_explore ? m_counter : m_neff;
+      const size_t ncv = num_variables();
+      const cvm::real s_rescaling = std::pow(size * (ncv + 2.0) / 4, -1.0 / (4.0 + ncv));
+      for (size_t i = 0; i < num_variables(); ++i) {
+        sigma[i] *= s_rescaling;
+      }
+      if (m_sigma_min.size() > 0) {
+        for (size_t i = 0; i < num_variables(); ++i) {
+          sigma[i] = std::max(sigma[i], m_sigma_min[i]);
+        }
+      }
+    }
+    // the height should be divided by sqrt(2*pi)*sigma0_,
+    // but this overall factor would be canceled when dividing by Zed
+    // thus we skip it altogether, but keep any other sigma rescaling
+    for (size_t i = 0; i < num_variables(); ++i) {
+      height *= (m_sigma0[i] / sigma[i]);
+    }
+    if (m_num_walkers == 1) {
+      addKernel(height, m_cv, sigma, log_weight);
+    } else {
+      std::vector<cvm::real> all_height(m_num_walkers, 0.0);
+      std::vector<cvm::real> all_center(m_num_walkers * num_variables(), 0.0);
+      std::vector<cvm::real> all_sigma(m_num_walkers * num_variables(), 0.0);
+      std::vector<cvm::real> all_logweight(m_num_walkers, 0.0);
+      const int my_replica = cvm::proxy->replica_index();
+
+      // Allgather of heights
+      if (my_replica == 0) {
+        all_height[0] = height;
+        for (int p = 1; p < cvm::proxy->num_replicas(); ++p) {
+          if (cvm::proxy->replica_comm_recv((char*)&(all_height[p]), sizeof(decltype(all_height)::value_type), p) != sizeof(decltype(all_height)::value_type)) {
+            return cvm::error("Error: on receiving height on replica 0 from replica " + cvm::to_str(p));
+          }
+        }
+      } else {
+        if (cvm::proxy->replica_comm_send((char*)&height, sizeof(decltype(height)), 0) != sizeof(cvm::real)) {
+          return cvm::error("Error: on sending height to replica 0 from replica " + cvm::to_str(my_replica));
+        }
+      }
+      cvm::proxy->replica_comm_barrier();
+      // Broadcast heights
+      if (my_replica == 0) {
+        const int send_size = sizeof(decltype(all_height)::value_type) * all_height.size();
+        for (int p = 1; p < cvm::proxy->num_replicas(); ++p) {
+          if (cvm::proxy->replica_comm_send((char*)all_height.data(), send_size, p) != send_size) {
+            return cvm::error("Error: on sending heights from replica 0 to replica " + cvm::to_str(p));
+          }
+        }
+      } else {
+        const int recv_size = sizeof(decltype(all_height)::value_type) * all_height.size();
+        if (cvm::proxy->replica_comm_recv((char*)all_height.data(), recv_size, 0) != recv_size) {
+          return cvm::error("Error: on receiving heights from replica 0 to replica " + cvm::to_str(my_replica));
+        }
+      }
+      cvm::proxy->replica_comm_barrier();
+
+      // Allgather of centers
+      if (my_replica == 0) {
+        std::copy(m_cv.begin(), m_cv.end(), all_center.begin());
+        const int recv_size = sizeof(decltype(m_cv)::value_type) * m_cv.size();
+        for (int p = 1; p < cvm::proxy->num_replicas(); ++p) {
+          cvm::real* recv_start_ptr = &(all_center[p * m_cv.size()]);
+          if (cvm::proxy->replica_comm_recv((char*)recv_start_ptr, recv_size, p) != recv_size) {
+            return cvm::error("Error on receiving centers from replica 0 to replica " + cvm::to_str(p));
+          }
+        }
+      } else {
+        const int send_size = sizeof(decltype(m_cv)::value_type) * m_cv.size();
+        if (cvm::proxy->replica_comm_send((char*)m_cv.data(), send_size, 0) != send_size) {
+          return cvm::error("Error on sending centers to replica 0 from replica " + cvm::to_str(my_replica));
+        }
+      }
+      cvm::proxy->replica_comm_barrier();
+      // Broadcast centers
+      if (my_replica == 0) {
+        const int send_size = sizeof(decltype(all_center)::value_type) * all_center.size();
+        for (int p = 1; p < cvm::proxy->num_replicas(); ++p) {
+          if (cvm::proxy->replica_comm_send((char*)all_center.data(), send_size, p) != send_size) {
+            return cvm::error("Error on sending centers from replica 0 to replica " + cvm::to_str(p));
+          }
+        }
+      } else {
+        const int recv_size = sizeof(decltype(all_center)::value_type) * all_center.size();
+        if (cvm::proxy->replica_comm_recv((char*)all_center.data(), recv_size, 0) != recv_size) {
+          return cvm::error("Error on receiving centers from replica 0 to replica " + cvm::to_str(my_replica));
+        }
+      }
+      cvm::proxy->replica_comm_barrier();
+
+      // Allgather of sigmas
+      if (my_replica == 0) {
+        std::copy(sigma.begin(), sigma.end(), all_sigma.begin());
+        const int recv_size = sizeof(decltype(sigma)::value_type) * sigma.size();
+        for (int p = 1; p < cvm::proxy->num_replicas(); ++p) {
+          cvm::real* recv_start_ptr = &(all_sigma[p * m_cv.size()]);
+          if (cvm::proxy->replica_comm_recv((char*)recv_start_ptr, recv_size, p) != recv_size) {
+            return cvm::error("Error on receiving sigmas from replica 0 to replica " + cvm::to_str(p));
+          }
+        }
+      } else {
+        const int send_size = sizeof(decltype(sigma)::value_type) * sigma.size();
+        if (cvm::proxy->replica_comm_send((char*)sigma.data(), send_size, 0) != send_size) {
+          return cvm::error("Error on sending sigmas to replica 0 from replica " + cvm::to_str(my_replica));
+        }
+      }
+      cvm::proxy->replica_comm_barrier();
+      // Broadcast sigmas
+      if (my_replica == 0) {
+        const int send_size = sizeof(decltype(all_sigma)::value_type) * all_sigma.size();
+        for (int p = 1; p < cvm::proxy->num_replicas(); ++p) {
+          if (cvm::proxy->replica_comm_send((char*)all_sigma.data(), send_size, p) != send_size) {
+            return cvm::error("Error on sending sigmas from replica 0 to replica " + cvm::to_str(p));
+          }
+        }
+      } else {
+        const int recv_size = sizeof(decltype(all_sigma)::value_type) * all_sigma.size();
+        if (cvm::proxy->replica_comm_recv((char*)all_sigma.data(), recv_size, 0) != recv_size) {
+          return cvm::error("Error on receiving sigmas from replica 0 to replica " + cvm::to_str(my_replica));
+        }
+      }
+      cvm::proxy->replica_comm_barrier();
+
+      // Allgather of logweights
+      if (my_replica == 0) {
+        all_logweight[0] = log_weight;
+        for (int p = 1; p < cvm::proxy->num_replicas(); ++p) {
+          if (cvm::proxy->replica_comm_recv((char*)&(all_logweight[p]), sizeof(decltype(all_logweight)::value_type), p) != sizeof(decltype(all_logweight)::value_type)) {
+            return cvm::error("Error on receiving log_weight on replica 0 from replica " + cvm::to_str(p));
+          }
+        }
+      } else {
+        if (cvm::proxy->replica_comm_send((char*)&log_weight, sizeof(decltype(log_weight)), 0) != sizeof(cvm::real)) {
+          return cvm::error("Error on sending log_weight to replica 0 from replica " + cvm::to_str(my_replica));
+        }
+      }
+      cvm::proxy->replica_comm_barrier();
+      // Broadcast log_weight
+      if (my_replica == 0) {
+        const int send_size = sizeof(decltype(all_logweight)::value_type) * all_logweight.size();
+        for (int p = 1; p < cvm::proxy->num_replicas(); ++p) {
+          if (cvm::proxy->replica_comm_send((char*)all_logweight.data(), send_size, p) != send_size) {
+            return cvm::error("Error on sending log_weight from replica 0 to replica " + cvm::to_str(p));
+          }
+        }
+      } else {
+        const int recv_size = sizeof(decltype(all_logweight)::value_type) * all_logweight.size();
+        if (cvm::proxy->replica_comm_recv((char*)all_logweight.data(), recv_size, 0) != recv_size) {
+          return cvm::error("Error on receiving log_weight from replica 0 to replica " + cvm::to_str(my_replica));
+        }
+      }
+      cvm::proxy->replica_comm_barrier();
+
+      if (m_nlist) {
+        std::vector<int> all_nlist_size(m_num_walkers);
+        const int my_replica = cvm::proxy->replica_index();
+        // Get the size of the neighbor list of each replica
+        if (my_replica == 0) {
+          all_nlist_size[0] = m_nlist_index.size();
+          for (int p = 1; p < cvm::proxy->num_replicas(); ++p) {
+            if (cvm::proxy->replica_comm_recv((char*)&(all_nlist_size[p]), sizeof(int), p) != sizeof(int)) {
+              return cvm::error("Error on receiving neighbor list size from replica " + cvm::to_str(p));
+            }
+          }
+        } else {
+          const int nlist_size = m_nlist_index.size();
+          if (cvm::proxy->replica_comm_send((char*)&nlist_size, sizeof(int), 0) != sizeof(int)) {
+            return cvm::error("Error on sending neighbor list size from replica " + cvm::to_str(my_replica));
+          }
+        }
+        cvm::proxy->replica_comm_barrier();
+        // Broadcast the neighbor list sizes to all replicas
+        if (my_replica == 0) {
+          const int send_size = sizeof(int) * all_nlist_size.size();
+          for (int p = 1; p < cvm::proxy->num_replicas(); ++p) {
+            if (cvm::proxy->replica_comm_send((char*)all_nlist_size.data(), send_size, p) != send_size) {
+              return cvm::error("Error on sending neighbor list sizes from replica 0 to replica " + cvm::to_str(p));
+            }
+          }
+        } else {
+          const int recv_size = sizeof(int) * all_nlist_size.size();
+          if (cvm::proxy->replica_comm_recv((char*)all_nlist_size.data(), recv_size, 0) != recv_size) {
+            return cvm::error("Error on receiving neighbor list sizes to replica " + cvm::to_str(my_replica));
+          }
+        }
+        cvm::proxy->replica_comm_barrier();
+        // Gather all neighbor lists from replicas
+        const int tot_size = std::accumulate(all_nlist_size.begin(), all_nlist_size.end(), 0);
+        if (tot_size > 0) {
+          // Allgatherv all neighbor lists from replicas
+          std::vector<size_t> all_nlist_index(tot_size);
+          if (my_replica == 0) {
+            std::vector<int> recv_start(m_num_walkers);
+            // Accumulative sum
+            recv_start[0] = 0;
+            std::partial_sum(all_nlist_size.begin(), all_nlist_size.end() - 1, recv_start.begin() + 1);
+            std::copy(m_nlist_index.begin(), m_nlist_index.end(), all_nlist_index.begin());
+            for (int p = 1; p < cvm::proxy->num_replicas(); ++p) {
+              size_t* recv_start_ptr = &(all_nlist_index[recv_start[p]]);
+              const int recv_size = all_nlist_size[p] * sizeof(decltype(all_nlist_index)::value_type);
+              if (cvm::proxy->replica_comm_recv((char*)recv_start_ptr, recv_size, p) != recv_size) {
+                return cvm::error("Error on receiving neighbor list from replica " + cvm::to_str(p));
+              }
+            }
+          } else {
+            const int send_size = sizeof(decltype(m_nlist_index)::value_type) * m_nlist_index.size();
+            if (cvm::proxy->replica_comm_send((char*)m_nlist_index.data(), send_size, 0) != send_size) {
+              return cvm::error("Error on sending neighbor list from replica " + cvm::to_str(my_replica));
+            }
+          }
+          cvm::proxy->replica_comm_barrier();
+          // Broadcast the neighbor list
+          if (my_replica == 0) {
+            const int send_size = sizeof(decltype(all_nlist_index)::value_type) * tot_size;
+            for (int p = 1; p < cvm::proxy->num_replicas(); ++p) {
+              if (cvm::proxy->replica_comm_send((char*)all_nlist_index.data(), send_size, p) != send_size) {
+                return cvm::error("Error on sending total neighbor list to replica " + cvm::to_str(p));
+              }
+            }
+          } else {
+            const int recv_size = sizeof(decltype(all_nlist_index)::value_type) * tot_size;
+            if (cvm::proxy->replica_comm_recv((char*)all_nlist_index.data(), recv_size, 0) != recv_size) {
+              return cvm::error("Error on receiving total neighbor list on replica " + cvm::to_str(my_replica));
+            }
+          }
+          cvm::proxy->replica_comm_barrier();
+          // Deduplicate and sort the merged neighbor list
+          std::unordered_set<size_t> all_nlist_index_set;
+          for (auto it = all_nlist_index.cbegin(); it != all_nlist_index.cend(); ++it) {
+            all_nlist_index_set.insert(*it);
+          }
+          m_nlist_index.assign(all_nlist_index_set.begin(), all_nlist_index_set.end());
+          std::sort(m_nlist_index.begin(), m_nlist_index.end());
+        }
+      }
+      for (size_t w = 0; w < m_num_walkers; ++w) {
+        std::vector<cvm::real> center_w(
+          all_center.begin() + num_variables() * w,
+          all_center.begin() + num_variables() * (w + 1));
+        std::vector<cvm::real> sigma_w(
+          all_sigma.begin() + num_variables() * w,
+          all_sigma.begin() + num_variables() * (w + 1));
+        addKernel(all_height[w], center_w, sigma_w, all_logweight[w]);
+      }
+    }
+    m_nker = m_kernels.size();
+    m_traj_line.nker = m_nker;
+    if (m_nlist) {
+      m_nlker = m_nlist_index.size();
+      m_traj_line.nlker = m_nlker;
+      if (m_nlist_pace_reset) {
+        m_nlist_update = true;
+      }
+    }
+    if (!m_no_zed) {
+      cvm::real sum_uprob = 0;
+      const size_t ks = m_kernels.size();
+      const size_t ds = m_delta_kernels.size();
+      const int num_parallel = 1; // Always 1
+      const bool few_kernels = (ks * ks < (3 * ks * ds + 2 * ds * ds * num_parallel + 100));
+      if (few_kernels) {
+        if (m_num_threads == 1) {
+          for (size_t k = 0; k < m_kernels.size(); ++k) {
+            for (size_t kk = 0; kk < m_kernels.size(); ++kk) {
+              sum_uprob += evaluateKernel(m_kernels[kk], m_kernels[k].m_center);
+            }
+          }
+        } else {
+#if defined(_OPENMP)
+          #pragma omp parallel num_threads(m_num_threads)
+          {
+            #pragma omp for reduction(+:sum_uprob) nowait
+            for (int k = 0; k < static_cast<int>(m_kernels.size()); ++k) {
+              for (int kk = 0; kk < static_cast<int>(m_kernels.size()); ++kk) {
+                sum_uprob += evaluateKernel(m_kernels[kk], m_kernels[k].m_center);
+              }
+            }
+          }
+#elif defined(CMK_SMP) && defined(USE_CKLOOP)
+          // TODO: Does this work??
+          auto worker = [&](int start, int end, void* result) {
+            double tmp_prob = 0;
+            for (int i = start; i <= end; ++i) {
+              for (size_t kk = 0; kk < m_kernels.size(); ++kk) {
+                tmp_prob += evaluateKernel(m_kernels[kk], m_kernels[i].m_center);
+              }
+            }
+            *(double *)result = tmp_prob;
+          };
+          const size_t numChunks = m_kernels.size();
+          const size_t lowerRange = 0;
+          const size_t upperRange = numChunks - 1;
+          CkLoop_Parallelize(
+            numChunks, lowerRange, upperRange,
+            worker, &sum_uprob, CKLOOP_DOUBLE_SUM, NULL);
+#else
+          cvm::error("OPES cannot run because this binary is not linked with a supported threading library.\n");
+#endif
+        }
+        if (num_parallel > 1) {
+          return cvm::error("Unimplemented feature: OPES in parallel running.\n");
+        }
+      } else {
+        cvm::real delta_sum_uprob = 0;
+        if (!m_nlist) {
+          if (m_num_threads == 1) {
+            for (size_t i = 0; i < m_kernels.size(); ++i) {
+              for (size_t d = 0; d < m_delta_kernels.size(); ++d) {
+                const int sign = m_delta_kernels[d].m_height < 0 ? -1 : 1;
+                delta_sum_uprob += evaluateKernel(m_delta_kernels[d], m_kernels[i].m_center) + sign * evaluateKernel(m_kernels[i], m_delta_kernels[d].m_center);
+              }
+            }
+          } else {
+#if defined(_OPENMP)
+            #pragma omp parallel num_threads(m_num_threads)
+            {
+              #pragma omp for reduction(+:delta_sum_uprob) nowait
+              for (int i = 0; i < static_cast<int>(m_kernels.size()); ++i) {
+                for (int d = 0; d < static_cast<int>(m_delta_kernels.size()); ++d) {
+                  const int sign = m_delta_kernels[d].m_height < 0 ? -1 : 1;
+                  delta_sum_uprob += evaluateKernel(m_delta_kernels[d], m_kernels[i].m_center) + sign * evaluateKernel(m_kernels[i], m_delta_kernels[d].m_center);
+                }
+              }
+            }
+#elif defined(CMK_SMP) && defined(USE_CKLOOP)
+            auto worker = [&](int start, int end, void* result) {
+              double tmp_prob = 0;
+              for (int i = start; i <= end; ++i) {
+                for (size_t d = 0; d < m_delta_kernels.size(); ++d) {
+                  const int sign = m_delta_kernels[d].m_height < 0 ? -1 : 1;
+                  tmp_prob += evaluateKernel(m_delta_kernels[d], m_kernels[i].m_center) + sign * evaluateKernel(m_kernels[i], m_delta_kernels[d].m_center);
+                }
+              }
+              *(double *)result = tmp_prob;
+            };
+            const size_t numChunks = m_kernels.size();
+            const size_t lowerRange = 0;
+            const size_t upperRange = numChunks - 1;
+            CkLoop_Parallelize(
+              numChunks, lowerRange, upperRange,
+              worker, &delta_sum_uprob, CKLOOP_DOUBLE_SUM, NULL);
+#else
+            cvm::error("OPES cannot run because this binary is not linked with a supported threading library.\n");
+#endif
+          }
+        } else {
+          if (m_num_threads == 1) {
+            for (size_t i = 0; i < m_nlist_index.size(); ++i) {
+              const size_t k = m_nlist_index[i];
+              for (size_t d = 0; d < m_delta_kernels.size(); ++d) {
+                const double sign = m_delta_kernels[d].m_height < 0 ? -1 : 1;
+                delta_sum_uprob += evaluateKernel(m_delta_kernels[d], m_kernels[k].m_center) + sign * evaluateKernel(m_kernels[k], m_delta_kernels[d].m_center);
+              }
+            }
+          } else {
+#if defined(_OPENMP)
+            #pragma omp parallel num_threads(m_num_threads)
+            {
+              #pragma omp for reduction(+:delta_sum_uprob) nowait
+              for (int i = 0; i < static_cast<int>(m_nlist_index.size()); ++i) {
+                const size_t k = m_nlist_index[i];
+                for (int d = 0; d < static_cast<int>(m_delta_kernels.size()); ++d) {
+                  const double sign = m_delta_kernels[d].m_height < 0 ? -1 : 1;
+                  delta_sum_uprob += evaluateKernel(m_delta_kernels[d], m_kernels[k].m_center) + sign * evaluateKernel(m_kernels[k], m_delta_kernels[d].m_center);
+                }
+              }
+            }
+#elif defined(CMK_SMP) && defined(USE_CKLOOP)
+            auto worker = [&](int start, int end, void* result) {
+              double tmp_prob = 0;
+              for (int i = start; i <= end; ++i) {
+                const size_t k = m_nlist_index[i];
+                for (size_t d = 0; d < m_delta_kernels.size(); ++d) {
+                  const double sign = m_delta_kernels[d].m_height < 0 ? -1 : 1;
+                  tmp_prob += evaluateKernel(m_delta_kernels[d], m_kernels[k].m_center) + sign * evaluateKernel(m_kernels[k], m_delta_kernels[d].m_center);
+                }
+              }
+              *(double *)result = tmp_prob;
+            };
+            const size_t numChunks = m_nlist_index.size();
+            const size_t lowerRange = 0;
+            const size_t upperRange = numChunks - 1;
+            CkLoop_Parallelize(
+              numChunks, lowerRange, upperRange,
+              worker, &delta_sum_uprob, CKLOOP_DOUBLE_SUM, NULL);
+#else
+            cvm::error("OPES cannot run because this binary is not linked with a supported threading library.\n");
+#endif
+          }
+        }
+        if (num_parallel > 1) {
+          return cvm::error("Unimplemented feature: OPES in parallel running.\n");
+        }
+        if (m_num_threads == 1) {
+          for (size_t d = 0; d < m_delta_kernels.size(); ++d) {
+            for (size_t dd = 0; dd < m_delta_kernels.size(); ++dd) {
+              const int sign = m_delta_kernels[d].m_height < 0 ? -1 : 1;
+              delta_sum_uprob -= sign *evaluateKernel(m_delta_kernels[dd], m_delta_kernels[d].m_center);
+            }
+          }
+        } else {
+#if defined(_OPENMP)
+          #pragma omp parallel num_threads(m_num_threads)
+          {
+            #pragma omp for reduction(+:delta_sum_uprob)
+            for (int d = 0; d < static_cast<int>(m_delta_kernels.size()); ++d) {
+              for (int dd = 0; dd < static_cast<int>(m_delta_kernels.size()); ++dd) {
+                const int sign = m_delta_kernels[d].m_height < 0 ? -1 : 1;
+                delta_sum_uprob -= sign * evaluateKernel(m_delta_kernels[dd], m_delta_kernels[d].m_center);
+              }
+            }
+          }
+#elif defined(CMK_SMP) && defined(USE_CKLOOP)
+          auto worker = [&](int start, int end, void* result) {
+            double tmp_prob = 0;
+            for (int d = start; d <= end; ++d) {
+              for (size_t dd = 0; dd < m_delta_kernels.size(); ++dd) {
+                const int sign = m_delta_kernels[d].m_height < 0 ? -1 : 1;
+                tmp_prob += sign * evaluateKernel(m_delta_kernels[dd], m_delta_kernels[d].m_center);
+              }
+            }
+            *(double *)result = tmp_prob;
+          };
+          const size_t numChunks = m_delta_kernels.size();
+          const size_t lowerRange = 0;
+          const size_t upperRange = numChunks - 1;
+          double tmp = 0;
+          CkLoop_Parallelize(
+            numChunks, lowerRange, upperRange,
+            worker, &tmp, CKLOOP_DOUBLE_SUM, NULL);
+          delta_sum_uprob -= tmp;
+#else
+          cvm::error("OPES cannot run because this binary is not linked with a supported threading library.\n");
+#endif
+        }
+        sum_uprob = m_zed * m_old_kdenorm * old_nker + delta_sum_uprob;
+      }
+      m_zed = sum_uprob / m_kdenorm / m_kernels.size();
+      m_traj_line.zed = m_zed;
+    }
+    if (m_calc_work) {
+      std::vector<cvm::real> dummy(num_variables());
+      const cvm::real prob = getProbAndDerivatives(m_cv, dummy);
+      const cvm::real new_bias = m_kbt * m_bias_prefactor * cvm::logn(prob / m_zed + m_epsilon);
+      m_work += new_bias - bias_energy;
+      m_traj_line.work = m_work;
+    }
+  }
+  return COLVARS_OK;
+}
+
+void colvarbias_opes::save_state() {
+  if (cvm::step_absolute() % cvm::restart_out_freq == 0) {
+    m_saved_zed = m_zed;
+    m_saved_sum_weights = m_sum_weights;
+    m_saved_sum_weights2 = m_sum_weights2;
+    m_saved_kernels = m_kernels;
+  }
+}
+
+int colvarbias_opes::update() {
+  int error_code = COLVARS_OK;
+  for (size_t i = 0; i < num_variables(); ++i) {
+    m_cv[i] = variables(i)->value();
+  }
+  error_code |= calculate_opes();
+  // NOTE: I don't think that calling dumpStateToFile() after update in
+  //       the PLUMED implementation is correct for step 0, so I save the
+  //       data after calculate() that does not modify the internal state
+  //       of the bias.
+  save_state();
+  if (error_code != COLVARS_OK) return error_code;
+  if (m_is_first_step) {
+    // NOTE: Colvars does not allow chainned biases, so I have to implement
+    //       the PRINT here. Even if OPESmetad::update() is skipped we should
+    //       still call Print::update()
+    writeTrajBuffer();
+    if (m_pmf_grid_on) error_code |= collectSampleToPMFGrid();
+    m_is_first_step = false;
+    return COLVARS_OK;
+  }
+  error_code |= update_opes();
+  if (error_code != COLVARS_OK) return error_code;
+  writeTrajBuffer(); // Print::update()
+  if (m_pmf_grid_on) error_code |= collectSampleToPMFGrid();
+  return error_code;
+}
+
+int colvarbias_opes::collectSampleToPMFGrid() {
+  if (m_reweight_grid) {
+    // Get the bin index
+    std::vector<int> bin(m_pmf_cvs.size(), 0);
+    for (size_t i = 0; i < m_pmf_cvs.size(); ++i) {
+      bin[i] = m_reweight_grid->current_bin_scalar(i);
+    }
+    const cvm::real reweighting_factor = cvm::exp(bias_energy / m_kbt);
+    if (m_reweight_grid->index_ok(bin)) {
+      m_reweight_grid->acc_value(bin, reweighting_factor);
+    }
+  }
+  return COLVARS_OK;
+}
+
+template <typename OST> OST& colvarbias_opes::write_state_data_template_(OST &os) const {
+  std::ios_base::fmtflags f;
+  const bool formatted = !std::is_same<OST, cvm::memory_stream>::value;
+  if (formatted) {
+    f = os.flags();
+    os.setf(std::ios::scientific, std::ios::floatfield);
+  }
+  write_state_data_key(os, "opes_metad_" + this->name);
+  auto printFieldReal = [&](const std::string& s, cvm::real x){
+    write_state_data_key(os, s, false);
+    if (formatted)
+      os << std::setprecision(cvm::en_prec) << std::setw(cvm::en_width);
+    os << x;
+    if (formatted)
+      os << "\n";
+  };
+  auto printFieldULL = [&](const std::string& s, unsigned long long x){
+    write_state_data_key(os, s, false);
+    if (formatted)
+      os << std::setprecision(cvm::en_prec) << std::setw(cvm::en_width);
+    os << x;
+    if (formatted)
+      os << "\n";
+  };
+  auto printFieldString = [&](const std::string& s, const std::string& x){
+    write_state_data_key(os, s, false);
+    if (formatted)
+      os << std::setprecision(cvm::en_prec) << std::setw(cvm::en_width);
+    os << x;
+    if (formatted)
+      os << "\n";
+  };
+  std::ostringstream oss;
+  if (m_inf_biasfactor) {
+    oss << "inf";
+  } else {
+    oss << m_biasfactor;
+  }
+  printFieldString("biasfactor", oss.str());
+  printFieldReal("epsilon", m_epsilon);
+  printFieldReal("kernel_cutoff", cvm::sqrt(m_cutoff2));
+  printFieldReal("compression_threshold", m_compression_threshold);
+  printFieldReal("zed", m_saved_zed);
+  printFieldReal("sum_weights", m_saved_sum_weights);
+  printFieldReal("sum_weights2", m_saved_sum_weights2);
+  printFieldULL("counter", m_counter);
+  if (m_adaptive_sigma) {
+    printFieldULL("adaptive_counter", m_adaptive_counter);
+    for (size_t i = 0; i < num_variables(); ++i) {
+      printFieldReal("sigma0_" + variables(i)->name, m_sigma0[i]);
+      printFieldReal("av_cv_" + variables(i)->name, m_av_cv[i]);
+      printFieldReal("av_M2_" + variables(i)->name, m_av_M2[i]);
+    }
+  }
+  printFieldULL("num_hills", m_saved_kernels.size());
+  write_state_data_key(os, "hills", false);
+  if (formatted) os << "{\n";
+  for (size_t k = 0; k < m_saved_kernels.size(); ++k) {
+    if (formatted) os << "{ ";
+    os << k;
+    if (formatted) os << " ";
+    for (size_t i = 0; i < num_variables(); ++i) {
+      os << m_saved_kernels[k].m_center[i];
+      if (formatted) os << " ";
+    }
+    for (size_t i = 0; i < num_variables(); ++i) {
+      os << m_saved_kernels[k].m_sigma[i];
+      if (formatted) os << " ";
+    }
+    os << m_saved_kernels[k].m_height;
+    if (formatted) os << " }\n";
+  }
+  if (formatted) os << "}\n";
+  if (formatted) os.setf(f);
+  if (m_pmf_grid_on) {
+    write_state_data_key(os, "probability_grid");
+    m_reweight_grid->write_raw(os, 8);
+  }
+  return os;
+}
+
+std::ostream& colvarbias_opes::write_state_data(std::ostream &os) {
+  try {
+    auto& s = write_state_data_template_<std::ostream>(os);
+    return s;
+  } catch (const std::exception& e) {
+    cvm::error(e.what());
+  }
+  return os;
+}
+
+cvm::memory_stream& colvarbias_opes::write_state_data(cvm::memory_stream& os) {
+  try {
+    auto& s = write_state_data_template_<cvm::memory_stream>(os);
+    return s;
+  } catch (const std::exception& e) {
+    cvm::error(e.what());
+  }
+  return os;
+}
+
+template <typename IST> IST& colvarbias_opes::read_state_data_template_(IST &is) {
+  bool const formatted = !std::is_same<IST, cvm::memory_stream>::value;
+  std::string tmp_name;
+  is >> tmp_name;
+  if (tmp_name.rfind("opes_metad_", 0) != 0) {
+    throw std::runtime_error("Unknown action name: " + tmp_name + "\n");
+  }
+  auto readFieldString = [&](const std::string& s, std::string& x){
+    std::string field;
+    is >> field;
+    if (field.compare(s) == 0) {
+      is >> x;
+    } else {
+      throw std::runtime_error("Expect field \"" + s + "\" , but got \"" + field + "\"\n");
+    }
+  };
+  auto readFieldReal = [&](const std::string& s, cvm::real& x){
+    std::string field;
+    is >> field;
+    if (field.compare(s) == 0) {
+      is >> x;
+    } else {
+      throw std::runtime_error("Expect field \"" + s + "\" , but got \"" + field + "\"\n");
+    }
+  };
+  auto readFieldULL = [&](const std::string& s, unsigned long long& x){
+    std::string field;
+    is >> field;
+    if (field.compare(s) == 0) {
+      is >> x;
+    } else {
+      throw std::runtime_error("Expect field \"" + s + "\" , but got \"" + field + "\"\n");
+    }
+  };
+  std::string old_biasfactor_str;
+  cvm::real old_biasfactor;
+  readFieldString("biasfactor", old_biasfactor_str);
+  if (old_biasfactor_str == "inf" || old_biasfactor_str == "-inf" || old_biasfactor_str == "+inf" ||
+      old_biasfactor_str == "INF" || old_biasfactor_str == "-INF" || old_biasfactor_str == "+INF") {
+    old_biasfactor = std::numeric_limits<cvm::real>::infinity();
+    m_inf_biasfactor = true;
+  } else {
+    old_biasfactor = std::stod(old_biasfactor_str);
+    m_inf_biasfactor = false;
+  }
+  if (std::abs(old_biasfactor - m_biasfactor) > 1e-6 * m_biasfactor) {
+    cvm::log("WARNING: previous bias factor was " + cvm::to_str(old_biasfactor) +
+             " while now it is " + cvm::to_str(m_biasfactor) +
+             " (the new one is used).\n");
+  }
+  cvm::real old_epsilon;
+  readFieldReal("epsilon", old_epsilon);
+  if (std::abs(old_epsilon - m_epsilon) > 1e-6 * m_epsilon) {
+    cvm::log("WARNING: previous epsilon was " + cvm::to_str(old_epsilon) +
+             " while now it is " + cvm::to_str(m_epsilon) +
+             " (the new one is used).\n");
+  }
+  cvm::real old_cutoff;
+  readFieldReal("kernel_cutoff", old_cutoff);
+  if (std::abs(old_cutoff - m_cutoff) > 1e-6 * m_cutoff) {
+    cvm::log("WARNING: previous cutoff was " + cvm::to_str(old_cutoff) +
+             " while now it is " + cvm::to_str(m_cutoff) +
+             " (the new one is used).\n");
+  }
+  m_cutoff2 = m_cutoff * m_cutoff;
+  cvm::real old_compression_threshold;
+  readFieldReal("compression_threshold", old_compression_threshold);
+  if (std::abs(old_compression_threshold - m_compression_threshold) > 1e-6 * m_compression_threshold) {
+    cvm::log("WARNING: previous cutoff was " + cvm::to_str(old_compression_threshold) +
+             " while now it is " + cvm::to_str(m_compression_threshold) +
+             " (the new one is used).\n");
+  }
+  m_compression_threshold2 = m_compression_threshold * m_compression_threshold;
+  readFieldReal("zed", m_zed);
+  readFieldReal("sum_weights", m_sum_weights);
+  readFieldReal("sum_weights2", m_sum_weights2);
+  unsigned long long tmp_counter = 1;
+  readFieldULL("counter", tmp_counter);
+  m_counter = tmp_counter;
+  if (m_adaptive_sigma) {
+    readFieldULL("adaptive_counter", tmp_counter);
+    m_adaptive_counter = tmp_counter;
+    for (size_t i = 0; i < num_variables(); ++i) {
+      readFieldReal("sigma0_" + variables(i)->name, m_sigma0[i]);
+      readFieldReal("av_cv_" + variables(i)->name, m_av_cv[i]);
+      readFieldReal("av_M2_" + variables(i)->name, m_av_M2[i]);
+    }
+  }
+  unsigned long long kernel_size = 0;
+  readFieldULL("num_hills", kernel_size);
+  if (kernel_size > 0) m_kernels.resize(kernel_size);
+  read_state_data_key(is, "hills");
+  auto consume = [&](const std::string& expected_token){
+    if (formatted) {
+      std::string field;
+      is >> field;
+      if (field.compare(expected_token) != 0) {
+        throw std::runtime_error("Expect " + expected_token + " but got " + field + "\n");
+      }
+    }
+  };
+  consume("{");
+  for (size_t k = 0; k < m_kernels.size(); ++k) {
+    consume("{");
+    unsigned long long tmp_k = 0;
+    is >> tmp_k;
+    if (formatted && k != tmp_k) {
+      throw std::runtime_error("Corrupt hill data\n");
+    }
+    kernel current_kernel;
+    current_kernel.m_center.resize(num_variables());
+    current_kernel.m_sigma.resize(num_variables());
+    for (size_t i = 0; i < num_variables(); ++i) {
+      is >> current_kernel.m_center[i];
+    }
+    for (size_t i = 0; i < num_variables(); ++i) {
+      is >> current_kernel.m_sigma[i];
+    }
+    is >> current_kernel.m_height;
+    m_kernels[k] = current_kernel;
+    consume("}");
+  }
+  consume("}");
+  if (m_pmf_grid_on) {
+    read_state_data_key(is, "probability_grid");
+    m_reweight_grid->read_raw(is);
+  }
+  m_kdenorm = m_explore ? m_counter : m_sum_weights;
+  m_traj_line.rct = m_kbt * cvm::logn(m_sum_weights / m_counter);
+  m_traj_line.zed = m_zed;
+  m_traj_line.neff = (1 + m_sum_weights) * (1 + m_sum_weights) / (1 + m_sum_weights2);
+  m_traj_line.nker = m_kernels.size();
+  showInfo();
+  return is;
+}
+
+std::istream& colvarbias_opes::read_state_data(std::istream &is) {
+  try {
+    auto& ret = read_state_data_template_<std::istream>(is);
+    return ret;
+  } catch (const std::exception& e) {
+    cvm::error(e.what());
+  }
+  return is;
+}
+
+cvm::memory_stream& colvarbias_opes::read_state_data(cvm::memory_stream &is) {
+  try {
+    auto& ret = read_state_data_template_<cvm::memory_stream>(is);
+    return ret;
+  } catch (const std::exception& e) {
+    cvm::error(e.what());
+  }
+  return is;
+}
+
+void colvarbias_opes::addKernel(const double height, const std::vector<cvm::real>& center, const std::vector<cvm::real>& sigma, const double logweight) {
+  addKernel(height,center,sigma);
+  const std::ios_base::fmtflags f = m_kernels_output.flags();
+  m_kernels_output << std::right;
+  // simulation time in ps
+  m_kernels_output << std::setw(24) << (cvm::step_absolute() * cvm::dt()) * 1e-3;
+  for (size_t i = 0; i < num_variables(); ++i) {
+    m_kernels_output << " " << std::setw(24) << std::setprecision(16) <<  center[i];
+  }
+  for (size_t i = 0; i < num_variables(); ++i) {
+    m_kernels_output << " " << std::setw(24) << std::setprecision(16) << sigma[i];
+  }
+  m_kernels_output << " " << std::setw(24) << std::setprecision(16) << height;
+  m_kernels_output << " " << std::setw(24) << std::setprecision(16) << logweight;
+  m_kernels_output << std::endl;
+  m_kernels_output.flags(f);
+}
+
+void colvarbias_opes::addKernel(const double height, const std::vector<cvm::real>& center, const std::vector<cvm::real>& sigma) {
+  bool no_match = true;
+  if (m_compression_threshold2 != 0) {
+    size_t taker_k = getMergeableKernel(center, m_kernels.size());
+    if (taker_k < m_kernels.size()) {
+      no_match = false;
+      m_delta_kernels.emplace_back(-1 * m_kernels[taker_k].m_height, m_kernels[taker_k].m_center, m_kernels[taker_k].m_sigma);
+      mergeKernels(m_kernels[taker_k], kernel(height, center, sigma));
+      m_delta_kernels.push_back(m_kernels[taker_k]);
+      if (m_recursive_merge) {
+        size_t giver_k = taker_k;
+        taker_k = getMergeableKernel(m_kernels[giver_k].m_center, giver_k);
+        while (taker_k < m_kernels.size()) {
+          m_delta_kernels.pop_back();
+          m_delta_kernels.emplace_back(-1 * m_kernels[taker_k].m_height, m_kernels[taker_k].m_center, m_kernels[taker_k].m_sigma);
+          if (taker_k > giver_k) std::swap(taker_k, giver_k);
+          mergeKernels(m_kernels[taker_k], m_kernels[giver_k]);
+          m_delta_kernels.push_back(m_kernels[taker_k]);
+          m_kernels.erase(m_kernels.begin() + giver_k);
+          if (m_nlist) {
+            size_t giver_nk = 0;
+            bool found_giver = false;
+            for (size_t nk = 0; nk < m_nlist_index.size(); ++nk) {
+              if (found_giver) m_nlist_index[nk]--;
+              if (m_nlist_index[nk] == giver_k) {
+                giver_nk = nk;
+                found_giver = true;
+              }
+            }
+            if (found_giver == false) {
+              cvm::error("problem with merging and nlist\n");
+            }
+            m_nlist_index.erase(m_nlist_index.begin() + giver_nk);
+          }
+          giver_k = taker_k;
+          taker_k = getMergeableKernel(m_kernels[giver_k].m_center, giver_k);
+        }
+      }
+    }
+  }
+  if (no_match) {
+    m_kernels.emplace_back(height, center, sigma);
+    m_delta_kernels.emplace_back(height, center, sigma);
+    if (m_nlist) m_nlist_index.push_back(m_kernels.size() - 1);
+  }
+}
+
+void colvarbias_opes::mergeKernels(kernel& k1, const kernel& k2) const {
+  const double h = k1.m_height + k2.m_height;
+  for (size_t i = 0; i < k1.m_center.size(); ++i) {
+    const bool isPeriodic_i = variables(i)->is_enabled(f_cv_periodic);
+    if (isPeriodic_i) {
+      k1.m_center[i] = k2.m_center[i] + 0.5 * variables(i)->dist2_lgrad(k1.m_center[i], k2.m_center[i]).real_value;
+    }
+    const cvm::real c_i = (k1.m_height * k1.m_center[i] +
+                           k2.m_height * k2.m_center[i]) / h;
+    const cvm::real ss_k1_part = k1.m_height * (k1.m_sigma[i] * k1.m_sigma[i] + k1.m_center[i] * k1.m_center[i]);
+    const cvm::real ss_k2_part = k2.m_height * (k2.m_sigma[i] * k2.m_sigma[i] + k2.m_center[i] * k2.m_center[i]);
+    const cvm::real ss_i = (ss_k1_part + ss_k2_part) / h - c_i * c_i;
+    if (isPeriodic_i) {
+      colvarvalue tmp(c_i);
+      variables(i)->wrap(tmp);
+      k1.m_center[i] = tmp.real_value;
+    } else {
+      k1.m_center[i] = c_i;
+    }
+    k1.m_sigma[i] = cvm::sqrt(ss_i);
+  }
+  k1.m_height = h;
+}
+
+size_t colvarbias_opes::getMergeableKernel(const std::vector<cvm::real>& giver_center, const size_t giver_k) const {
+  size_t min_k = m_kernels.size();
+  cvm::real min_norm2 = m_compression_threshold2;
+  const int num_parallel = 1;
+  if (!m_nlist) {
+    if (m_num_threads == 1) {
+      for (size_t k = 0; k < m_kernels.size(); ++k) {
+        if (k == giver_k) continue;
+        double norm2 = 0;
+        for (size_t i = 0; i < num_variables(); ++i) {
+          norm2 += variables(i)->dist2(giver_center[i], m_kernels[k].m_center[i]) / (m_kernels[k].m_sigma[i] * m_kernels[k].m_sigma[i]);
+          if (norm2 >= min_norm2) break;
+        }
+        if (norm2 < min_norm2) {
+          min_norm2 = norm2;
+          min_k = k;
+        }
+      }
+    } else {
+#if defined(_OPENMP)
+      #pragma omp parallel num_threads(m_num_threads)
+      {
+        int min_k_omp = min_k;
+        cvm::real min_norm2_omp = m_compression_threshold2;
+        #pragma omp for nowait
+        for (int k = 0; k < static_cast<int>(m_kernels.size()); ++k) {
+          if (k == static_cast<int>(giver_k)) continue;
+          double norm2 = 0;
+          for (int i = 0; i < static_cast<int>(num_variables()); ++i) {
+            norm2 += variables(i)->dist2( giver_center[i], m_kernels[k].m_center[i]) / (m_kernels[k].m_sigma[i] * m_kernels[k].m_sigma[i]);
+            if (norm2 >= min_norm2_omp) break;
+          }
+          if (norm2 < min_norm2_omp) {
+            min_norm2_omp = norm2;
+            min_k_omp = k;
+          }
+        }
+        #pragma omp critical
+        {
+          if (min_norm2_omp < min_norm2) {
+            min_norm2 = min_norm2_omp;
+            min_k = min_k_omp;
+          }
+        }
+      }
+#elif defined(CMK_SMP) && defined(USE_CKLOOP)
+      // NOTE: No existing reduction type for finding the minimum, so I have
+      //       to use such a workaround.
+      std::vector<size_t> min_k_smp(m_num_threads, min_k);
+      std::vector<cvm::real> min_norm2_smp(m_num_threads, m_compression_threshold2);
+      auto worker = [&](int start, int end, void* unused) {
+        const int tid = cvm::proxy->smp_thread_id();
+        for (int k = start; k <= end; ++k) {
+          if (k == giver_k) continue;
+          double norm2 = 0;
+          for (size_t j = 0; j < num_variables(); ++j) {
+            norm2 += variables(i)->dist2( giver_center[i], m_kernels[k].m_center[i]) / (m_kernels[k].m_sigma[i] * m_kernels[k].m_sigma[i]);
+            if (norm2 >= min_norm2_smp[tid]) break;
+          }
+          if (norm2 < min_norm2_smp[tid]) {
+            min_norm2_smp[tid] = norm2;
+            min_k_smp[tid] = k;
+          }
+        }
+      };
+      const size_t numChunks = m_kernels.size();
+      const size_t lowerRange = 0;
+      const size_t upperRange = numChunks - 1;
+      CkLoop_Parallelize(
+        numChunks, lowerRange, upperRange,
+        worker, NULL, CKLOOP_NONE, NULL);
+      const auto it_min = std::min_element(min_norm2_smp.begin(), min_norm2_smp.end());
+      min_norm2 = *it_min;
+      min_k = min_k_smp[std::distance(min_norm2_smp.begin(), it_min)];
+#else
+      cvm::error("OPES cannot run because this binary is not linked with a supported threading library.\n");
+#endif
+    }
+  } else {
+    if (m_num_threads == 1) {
+      // size_t min_k_omp = min_k;
+      // cvm::real min_norm2_omp = m_compression_threshold2;
+      for (size_t nk = 0; nk < m_nlist_index.size(); ++nk) {
+        const size_t k = m_nlist_index[nk];
+        if (k == giver_k) continue;
+        double norm2 = 0;
+        for (size_t i = 0; i < num_variables(); ++i) {
+          norm2 += variables(i)->dist2(giver_center[i], m_kernels[k].m_center[i]) / (m_kernels[k].m_sigma[i] * m_kernels[k].m_sigma[i]);
+          if (norm2 >= min_norm2) break;
+        }
+        if (norm2 < min_norm2) {
+          min_norm2 = norm2;
+          min_k = k;
+        }
+      }
+    } else {
+#if defined(_OPENMP)
+      #pragma omp parallel num_threads(m_num_threads)
+      {
+        size_t min_k_omp = min_k;
+        cvm::real min_norm2_omp = m_compression_threshold2;
+        #pragma omp for nowait
+        for (int nk = 0; nk < static_cast<int>(m_nlist_index.size()); ++nk) {
+          const size_t k = m_nlist_index[nk];
+          if (k == giver_k) continue;
+          double norm2 = 0;
+          for (int i = 0; i < static_cast<int>(num_variables()); ++i) {
+            norm2 += variables(i)->dist2(giver_center[i], m_kernels[k].m_center[i]) / (m_kernels[k].m_sigma[i] * m_kernels[k].m_sigma[i]);
+            if (norm2 >= min_norm2_omp) break;
+          }
+          if (norm2 < min_norm2_omp) {
+            min_norm2_omp = norm2;
+            min_k_omp = k;
+          }
+        }
+        #pragma omp critical
+        {
+          if (min_norm2_omp < min_norm2) {
+            min_norm2 = min_norm2_omp;
+            min_k = min_k_omp;
+          }
+        }
+      }
+#elif defined(CMK_SMP) && defined(USE_CKLOOP)
+      // NOTE: No existing reduction type for finding the minimum, so I have
+      //       to use such a workaround.
+      std::vector<size_t> min_k_smp(m_num_threads, min_k);
+      std::vector<cvm::real> min_norm2_smp(m_num_threads, m_compression_threshold2);
+      auto worker = [&](int start, int end, void* unused) {
+        const int tid = cvm::proxy->smp_thread_id();
+        for (int nk = start; nk <= end; ++nk) {
+          const size_t k = m_nlist_index[nk];
+          if (k == giver_k) continue;
+          double norm2 = 0;
+          for (size_t j = 0; j < num_variables(); ++j) {
+            norm2 += variables(i)->dist2( giver_center[i], m_kernels[k].m_center[i]) / (m_kernels[k].m_sigma[i] * m_kernels[k].m_sigma[i]);
+            if (norm2 >= min_norm2_smp[tid]) break;
+          }
+          if (norm2 < min_norm2_smp[tid]) {
+            min_norm2_smp[tid] = norm2;
+            min_k_smp[tid] = k;
+          }
+        }
+      };
+      const size_t numChunks = m_nlist_index.size();
+      const size_t lowerRange = 0;
+      const size_t upperRange = numChunks - 1;
+      CkLoop_Parallelize(
+        numChunks, lowerRange, upperRange,
+        worker, NULL, CKLOOP_NONE, NULL);
+      const auto it_min = std::min_element(min_norm2_smp.begin(), min_norm2_smp.end());
+      min_norm2 = *it_min;
+      min_k = min_k_smp[std::distance(min_norm2_smp.begin(), it_min)];
+#else
+      cvm::error("OPES cannot run because this binary is not linked with a supported threading library.\n");
+#endif
+    }
+  }
+  if (num_parallel > 1) {
+    cvm::error("The Colvars OPES implementation does not support running OPES in parallel across nodes.\n");
+  }
+  return min_k;
+}
+
+std::string const colvarbias_opes::traj_file_name(const std::string& suffix) const {
+  return std::string(cvm::output_prefix()+
+                     ".colvars."+this->name+
+                     ( (comm != single_replica) ?
+                       ("."+replica_id) :
+                       ("") )+
+                     suffix);
+}
+
+int colvarbias_opes::write_output_files() {
+  int error_code = COLVARS_OK;
+  thread_local static bool firsttime = true;
+  // Write the kernels
+  const std::string kernels_filename = traj_file_name(".kernels.dat");
+  std::ostream& os_kernels = cvm::proxy->output_stream(kernels_filename, "kernels file");
+  const std::ios_base::fmtflags format_kernels = os_kernels.flags();
+  if (firsttime) {
+    os_kernels << "#! FIELDS time ";
+    for (size_t i = 0; i < num_variables(); ++i) {
+      os_kernels << variables(i)->name + " ";
+    }
+    for (size_t i = 0; i < num_variables(); ++i) {
+      os_kernels << "sigma_" + variables(i)->name + " ";
+    }
+    os_kernels << "height logweight\n";
+    // Make sure the action name compatible with the script in https://github.com/invemichele/opes/blob/master/postprocessing/State_from_Kernels.py
+    if (m_explore) os_kernels << "#! SET action OPES_METAD_EXPLORE_kernels\n";
+    else os_kernels << "#! SET action OPES_METAD_kernels\n";
+    if (m_inf_biasfactor) {
+      os_kernels << "#! SET biasfactor " << "inf" << "\n";
+    } else {
+      os_kernels << "#! SET biasfactor " << m_biasfactor << "\n";
+    }
+    os_kernels << "#! SET epsilon " << m_epsilon << "\n";
+    os_kernels << "#! SET kernel_cutoff " << m_cutoff << "\n";
+    os_kernels << "#! SET compression_threshold " << m_compression_threshold << "\n";
+    for (size_t i = 0; i < num_variables(); ++i) {
+      if (variables(i)->is_enabled(f_cv_periodic)) {
+        if (variables(i)->is_enabled(f_cv_lower_boundary)) {
+          os_kernels << "#! SET min_" + variables(i)->name + " " << variables(i)->lower_boundary.real_value << "\n";
+        }
+        if (variables(i)->is_enabled(f_cv_upper_boundary)) {
+          os_kernels << "#! SET max_" + variables(i)->name + " " << variables(i)->upper_boundary.real_value << "\n";
+        }
+      }
+    }
+  }
+  os_kernels << m_kernels_output.str();
+  os_kernels.setf(format_kernels);
+  error_code |= cvm::proxy->flush_output_stream(kernels_filename);
+  m_kernels_output.str("");
+  m_kernels_output.clear();
+
+  // Write the trajectory
+  const std::string traj_filename = traj_file_name(".misc.traj");
+  std::ostream& os_traj = cvm::proxy->output_stream(traj_filename, "trajectory of various OPES properties");
+  const std::ios_base::fmtflags format_traj = os_traj.flags();
+  if (firsttime) {
+    os_traj << "#! FIELDS time ";
+    for (size_t i = 0; i < num_variables(); ++i) {
+      os_traj << variables(i)->name + " ";
+    }
+    os_traj << this->name + ".bias ";
+    os_traj << this->name + ".rct ";
+    if (!m_no_zed) os_traj << this->name + ".zed ";
+    os_traj << this->name + ".neff ";
+    if (m_calc_work) if (!m_no_zed) os_traj << this->name + ".work ";
+    os_traj << this->name + ".nker ";
+    if (m_nlist) os_traj << this->name + ".nlker ";
+    if (m_nlist) os_traj << this->name + ".nlsteps ";
+    os_traj << "\n";
+    for (size_t i = 0; i < num_variables(); ++i) {
+      if (variables(i)->is_enabled(f_cv_lower_boundary)) {
+        os_traj << "#! SET min_" + variables(i)->name + " " << variables(i)->lower_boundary.real_value << "\n";
+      }
+      if (variables(i)->is_enabled(f_cv_upper_boundary)) {
+        os_traj << "#! SET max_" + variables(i)->name + " " << variables(i)->upper_boundary.real_value << "\n";
+      }
+    }
+  }
+  os_traj << m_traj_oss.str();
+  os_traj.setf(format_traj);
+  error_code |= cvm::proxy->flush_output_stream(traj_filename);
+  m_traj_oss.str("");
+  m_traj_oss.clear();
+  if (firsttime) firsttime = false;
+  if (m_pmf_grid_on) {
+    error_code |= computePMF();
+    const std::string pmf_filename = traj_file_name(".pmf");
+    error_code |= writePMF(m_pmf_grid, pmf_filename, false);
+    if (comm == multiple_replicas && m_pmf_shared) {
+      if (cvm::proxy->replica_index() == 0) {
+        const std::string global_pmf_filename = traj_file_name(".global.pmf");
+        error_code |= writePMF(m_global_pmf_grid, global_pmf_filename, false);
+      }
+    }
+    if (m_pmf_hist_freq > 0 && cvm::step_absolute() % m_pmf_hist_freq == 0) {
+      const std::string pmf_hist_filename = traj_file_name(".hist.pmf");
+      error_code |= writePMF(m_pmf_grid, pmf_hist_filename, true);
+      if (comm == multiple_replicas && m_pmf_shared) {
+        if (cvm::proxy->replica_index() == 0) {
+          const std::string global_hist_pmf_filename = traj_file_name(".global.hist.pmf");
+          error_code |= writePMF(m_global_pmf_grid, global_hist_pmf_filename, true);
+        }
+      }
+    }
+  }
+  // To prevent the case that one replica exits earlier and then destroys all streams
+  if (comm == multiple_replicas) cvm::proxy->replica_comm_barrier();
+  return error_code;
+}
+
+void hist_to_pmf(const cvm::real kbt, const colvar_grid_scalar *hist, std::unique_ptr<colvar_grid_scalar>& pmf) {
+  // Get the sum of probabilities of all grids
+  cvm::real norm_factor = 0;
+  cvm::real max_prob = 0;
+  auto& prob_data = hist->data;
+  for (auto it = prob_data.begin(); it != prob_data.end(); ++it) {
+    norm_factor += (*it);
+    if ((*it) > max_prob) max_prob = (*it);
+  }
+  if (norm_factor > 0) {
+    const cvm::real min_pmf = (max_prob > 0) ? -1.0 * kbt * cvm::logn(max_prob / norm_factor) : 0;
+    auto& pmf_data = pmf->data;
+    for (size_t i = 0; i < pmf_data.size(); ++i) {
+      if (prob_data[i] > 0) {
+        pmf_data[i] = -1.0 * kbt * cvm::logn(prob_data[i] / norm_factor) - min_pmf;
+      }
+    }
+    auto max_pmf = *std::max_element(pmf_data.begin(), pmf_data.end());
+    for (size_t i = 0; i < pmf_data.size(); ++i) {
+      if (!(prob_data[i] > 0)) {
+        pmf_data[i] = max_pmf;
+      }
+    }
+  }
+}
+
+int colvarbias_opes::computePMF() {
+  // Multiple replica: collect all samples from other replicas
+  if (comm == multiple_replicas && m_pmf_shared) {
+    const size_t samples_n = m_reweight_grid->raw_data_num();
+    const int msg_size = samples_n * sizeof(cvm::real);
+    std::vector<cvm::real> buffer;
+    if (cvm::main()->proxy->replica_index() == 0) {
+      buffer.resize(samples_n * (cvm::proxy->num_replicas() - 1));
+      for (int p = 1; p < cvm::proxy->num_replicas(); p++) {
+        const size_t start_pos = (p - 1) * samples_n;
+        if (cvm::proxy->replica_comm_recv((char*)&(buffer[start_pos]), msg_size, p) != msg_size) {
+          return cvm::error("Error getting shared OPES reweighting histogram from replica " + cvm::to_str(p));
+        }
+      }
+    } else {
+      if (cvm::proxy->replica_comm_send((char*)(&(m_reweight_grid->data[0])), msg_size, 0) != msg_size) {
+        return cvm::error("Error sending shared OPES reweighting histogram from replica " + cvm::to_str(cvm::main()->proxy->replica_index()));
+      }
+    }
+    cvm::proxy->replica_comm_barrier();
+    // Broadcast m_reweight_grid to all replicas
+    auto& global_data = m_global_reweight_grid->data;
+    if (cvm::main()->proxy->replica_index() == 0) {
+      global_data = m_reweight_grid->data;
+      // Sum the samples on PE 0
+      for (int p = 1; p < cvm::proxy->num_replicas(); p++) {
+        const size_t start_pos = (p - 1) * samples_n;
+        for (size_t i = 0 ; i < samples_n; ++i) {
+          global_data[i] += buffer[start_pos+i];
+        }
+      }
+    }
+  }
+  // Get the sum of probabilities of all grids
+  hist_to_pmf(m_kbt, m_reweight_grid.get(), m_pmf_grid);
+  if (comm == multiple_replicas && m_pmf_shared) {
+    if (cvm::main()->proxy->replica_index() == 0) {
+      hist_to_pmf(m_kbt, m_global_reweight_grid.get(), m_global_pmf_grid);
+    }
+  }
+  if (comm == multiple_replicas) {
+    cvm::proxy->replica_comm_barrier();
+  }
+  return COLVARS_OK;
+}
+
+int colvarbias_opes::writePMF(const std::unique_ptr<colvar_grid_scalar>& pmf_grid, const std::string &filename, bool keep_open) {
+  std::ostream& os = cvm::proxy->output_stream(filename, "output stream of " + filename);
+  if (!os) {
+    return COLVARS_FILE_ERROR;
+  }
+  pmf_grid->write_multicol(os);
+  if (!keep_open) {
+    cvm::proxy->close_output_stream(filename);
+  } else {
+    cvm::proxy->flush_output_stream(filename);
+  }
+  return COLVARS_OK;
+}
+
+void colvarbias_opes::writeTrajBuffer() {
+  if (m_traj_output_frequency > 0 && cvm::step_absolute() % m_traj_output_frequency == 0) {
+    m_traj_oss << std::right;
+    m_traj_oss << std::scientific << " " << std::setw(cvm::cv_width) << std::setprecision(cvm::cv_prec) << (cvm::step_absolute() * cvm::dt()) * 1e-3;
+    for (size_t i = 0; i < num_variables(); ++i) {
+      m_traj_oss << std::scientific << " " << std::setw(cvm::cv_width) << std::setprecision(cvm::cv_prec) << variables(i)->value().real_value;
+    }
+    m_traj_oss << std::scientific << " " << std::setw(cvm::cv_width) << std::setprecision(cvm::cv_prec) << bias_energy;
+    m_traj_oss << std::scientific << " " << std::setw(cvm::cv_width) << std::setprecision(cvm::cv_prec) << m_traj_line.rct;
+    if (!m_no_zed) m_traj_oss << std::scientific << " " << std::setw(cvm::cv_width) << std::setprecision(cvm::cv_prec) << m_traj_line.zed;
+    m_traj_oss << std::scientific << " " << std::setw(cvm::cv_width) << std::setprecision(cvm::cv_prec) << m_traj_line.neff;
+    if (m_calc_work) m_traj_oss << std::scientific << " " << std::setw(cvm::cv_width) << std::setprecision(cvm::cv_prec) << m_traj_line.work;
+    m_traj_oss << " " << m_traj_line.nker;
+    if (m_nlist) m_traj_oss << " " << m_traj_line.nlker;
+    if (m_nlist) m_traj_oss << " " << m_traj_line.nlsteps;
+    m_traj_oss << "\n";
+  }
+}
+
+void colvarbias_opes::updateNlist(const std::vector<cvm::real>& center) {
+  if (m_kernels.empty()) return;
+  m_nlist_center = center;
+  m_nlist_index.clear();
+  if (m_num_threads == 1 || m_kernels.size() < 2 * m_num_threads) {
+    for (size_t k = 0; k < m_kernels.size(); ++k) {
+      cvm::real norm2_k = 0;
+      for (size_t i = 0; i < num_variables(); ++i) {
+        norm2_k += variables(i)->dist2(m_nlist_center[i], m_kernels[k].m_center[i]) / (m_kernels[k].m_sigma[i] * m_kernels[k].m_sigma[i]);
+      }
+      if (norm2_k <= m_nlist_param[0] * m_cutoff2) {
+        m_nlist_index.push_back(k);
+      }
+    }
+  } else {
+#if defined (_OPENMP)
+    #pragma omp parallel num_threads(m_num_threads)
+    {
+      std::vector<size_t> private_nlist_index;
+      #pragma omp for nowait
+      for (int k = 0; k < static_cast<int>(m_kernels.size()); ++k) {
+        cvm::real norm2_k = 0;
+        for (int i = 0; i < static_cast<int>(num_variables()); ++i) {
+          norm2_k += variables(i)->dist2(m_nlist_center[i], m_kernels[k].m_center[i]) / (m_kernels[k].m_sigma[i] * m_kernels[k].m_sigma[i]);
+        }
+        if (norm2_k <= m_nlist_param[0] * m_cutoff2) {
+          private_nlist_index.push_back(static_cast<size_t>(k));
+        }
+      }
+      #pragma omp critical
+      m_nlist_index.insert(m_nlist_index.end(), private_nlist_index.begin(), private_nlist_index.end());
+    }
+#elif defined(CMK_SMP) && defined(USE_CKLOOP)
+    std::vector<std::vector<size_t>> private_nlist_index(m_num_threads);
+    auto worker = [&](int start, int end, void* unused){
+      const int tid = cvm::proxy->smp_thread_id();
+      for (int k = start; k <= end; ++k) {
+        cvm::real norm2_k = 0;
+        for (size_t i = 0; i < num_variables(); ++i) {
+          norm2_k += variables(i)->dist2(m_nlist_center[i], m_kernels[k].m_center[i]) / (m_kernels[k].m_sigma[i] * m_kernels[k].m_sigma[i]);
+        }
+        if (norm2_k <= m_nlist_param[0] * m_cutoff2) {
+          private_nlist_index[tid].push_back(k);
+        }
+      }
+    };
+    const size_t numChunks = m_kernels.size();
+    const size_t lowerRange = 0;
+    const size_t upperRange = numChunks - 1;
+    CkLoop_Parallelize(
+      numChunks, lowerRange, upperRange,
+      worker, NULL, CKLOOP_NONE, NULL);
+    for (size_t j = 0; j < m_num_threads; ++j) {
+      m_nlist_index.insert(m_nlist_index.end(), private_nlist_index[i].begin(), private_nlist_index.end());
+    }
+#else
+    cvm::error("OPES cannot run because this binary is not linked with a supported threading library.\n");
+#endif
+    if (m_recursive_merge) {
+      std::sort(m_nlist_index.begin(), m_nlist_index.end());
+    }
+  }
+  std::vector<cvm::real> dev2(num_variables(), 0);
+  for (size_t k = 0; k < m_nlist_index.size(); ++k) {
+    for (size_t i = 0; i < num_variables(); ++i) {
+      dev2[i] += variables(i)->dist2(m_nlist_center[i], m_kernels[m_nlist_index[k]].m_center[i]);
+    }
+  }
+  for (size_t i = 0; i < num_variables(); ++i) {
+    if (m_nlist_index.empty()) {
+      m_nlist_dev2[i] = m_kernels.back().m_sigma[i] * m_kernels.back().m_sigma[i];
+    } else {
+      m_nlist_dev2[i] = dev2[i] / m_nlist_index.size();
+    }
+  }
+  m_traj_line.nlker = m_nlist_index.size();
+  m_traj_line.nlsteps = m_nlist_steps;
+  m_nlist_steps = 0;
+  m_nlist_update = false;
+}
diff --git a/lib/colvars/colvarbias_opes.h b/lib/colvars/colvarbias_opes.h
new file mode 100644
index 0000000000..0c52ba2413
--- /dev/null
+++ b/lib/colvars/colvarbias_opes.h
@@ -0,0 +1,176 @@
+#ifndef COLVARBIAS_OPES_H
+#define COLVARBIAS_OPES_H
+
+// This code is mainly adapted from the PLUMED opes module, which uses the
+// LGPLv3 license as shown below:
+/* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+   Copyright (c) 2020-2021 of Michele Invernizzi.
+
+   This file is part of the OPES plumed module.
+
+   The OPES plumed module is free software: you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   The OPES plumed module is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with plumed.  If not, see <http://www.gnu.org/licenses/>.
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
+
+#include "colvarbias.h"
+
+#include <vector>
+#include <memory>
+
+// OPES_METAD implementation: swiped from OPESmetad.cpp of PLUMED
+class colvarbias_opes: public colvarbias {
+public:
+  /// The Gaussian kernel data structure
+  struct kernel {
+    cvm::real m_height;
+    std::vector<cvm::real> m_center;
+    std::vector<cvm::real> m_sigma;
+    kernel() {}
+    kernel(cvm::real h, const std::vector<cvm::real>& c,
+           const std::vector<cvm::real>& s):
+      m_height(h), m_center(c), m_sigma(s) {}
+  };
+  /// Communication between different replicas
+  enum Communication {
+    /// One replica (default)
+    single_replica,
+    /// Hills added concurrently by several replicas
+    multiple_replicas
+  };
+  /// Constructor
+  colvarbias_opes(char const *key);
+  /// Initializer
+  int init(std::string const &conf) override;
+  /// Per-timestep update
+  int update() override;
+  /// Save the state to a text file for restarting
+  std::ostream &write_state_data(std::ostream &os) override;
+  /// Read the state from a text file for restarting
+  std::istream &read_state_data(std::istream &is) override;
+  /// Save the state to a binary file for restarting
+  cvm::memory_stream &write_state_data(cvm::memory_stream &os) override;
+  /// Read the state from a binary file for restarting
+  cvm::memory_stream &read_state_data(cvm::memory_stream &is) override;
+  /// Write to files at restart steps
+  int write_output_files() override;
+private:
+  int update_opes();
+  int calculate_opes();
+  void save_state();
+  cvm::real getProbAndDerivatives(const std::vector<cvm::real>& cv, std::vector<cvm::real>& der_prob) const;
+  cvm::real evaluateKernel(const kernel& G, const std::vector<cvm::real>& x) const;
+  cvm::real evaluateKernel(const kernel& G, const std::vector<cvm::real>& x, std::vector<cvm::real>& accumulated_derivative, std::vector<cvm::real>& dist) const;
+  void addKernel(const double height, const std::vector<cvm::real>& center, const std::vector<cvm::real>& sigma, const double logweight);
+  void addKernel(const double height, const std::vector<cvm::real>& center, const std::vector<cvm::real>& sigma);
+  size_t getMergeableKernel(const std::vector<cvm::real>& giver_center, const size_t giver_k) const;
+  void mergeKernels(kernel& k1, const kernel& k2) const;
+  void updateNlist(const std::vector<cvm::real>& center);
+  struct traj_line {
+    double rct;
+    double zed;
+    double neff;
+    double work;
+    size_t nker;
+    size_t nlker;
+    size_t nlsteps;
+  };
+  void writeTrajBuffer();
+  void showInfo() const;
+  template <typename OST> OST &write_state_data_template_(OST &os) const;
+  template <typename IST> IST &read_state_data_template_(IST &os);
+  std::string const traj_file_name(const std::string& suffix) const;
+  int collectSampleToPMFGrid();
+  int computePMF();
+  int writePMF(const std::unique_ptr<colvar_grid_scalar>& pmf_grid, const std::string &filename, bool keep_open);
+private:
+  cvm::real m_kbt;
+  cvm::real m_barrier;
+  cvm::real m_biasfactor;
+  cvm::real m_bias_prefactor;
+  cvm::real m_temperature;
+  cvm::step_number m_pace;
+  cvm::step_number m_adaptive_sigma_stride;
+  cvm::step_number m_adaptive_counter;
+  unsigned long long m_counter;
+  cvm::real m_compression_threshold;
+  cvm::real m_compression_threshold2;
+  bool m_adaptive_sigma;
+  bool m_fixed_sigma;
+  bool m_no_zed;
+  // bool m_restart;
+  bool m_nlist;
+  bool m_recursive_merge;
+  std::vector<cvm::real> m_nlist_param;
+  std::vector<cvm::real> m_sigma0;
+  std::vector<cvm::real> m_sigma_min;
+  cvm::real m_epsilon;
+  cvm::real m_sum_weights;
+  cvm::real m_sum_weights2;
+  cvm::real m_cutoff;
+  cvm::real m_cutoff2;
+  cvm::real m_zed;
+  cvm::real m_old_kdenorm;
+  cvm::real m_kdenorm;
+  cvm::real m_val_at_cutoff;
+  cvm::real m_rct;
+  cvm::real m_neff;
+  std::vector<kernel> m_kernels;
+  std::vector<kernel> m_delta_kernels;
+  std::vector<cvm::real> m_av_cv;
+  std::vector<cvm::real> m_av_M2;
+  std::ostringstream m_kernels_output;
+  std::vector<cvm::real> m_nlist_center;
+  std::vector<size_t> m_nlist_index;
+  std::vector<cvm::real> m_nlist_dev2;
+  size_t m_nlist_steps;
+  bool m_nlist_update;
+  bool m_nlist_pace_reset;
+  size_t m_nker;
+  bool m_calc_work;
+  cvm::real m_work;
+  /// Communication between different replicas
+  Communication comm;
+  /// \brief Identifier for this replica
+  std::string            replica_id;
+  size_t m_num_walkers;
+  size_t shared_freq;
+  size_t m_num_threads;
+  size_t m_nlker;
+  // size_t m_state_stride;
+  // std::unordered_map<std::string, std::string> m_kernel_output_components;
+  std::string m_kernels_output_headers;
+  cvm::step_number m_traj_output_frequency;
+  traj_line m_traj_line;
+  std::ostringstream m_traj_oss;
+  bool m_is_first_step;
+  std::vector<cvm::real> m_cv;
+  // For saving states
+  decltype(m_zed) m_saved_zed;
+  decltype(m_sum_weights) m_saved_sum_weights;
+  decltype(m_sum_weights2) m_saved_sum_weights2;
+  decltype(m_kernels) m_saved_kernels;
+  // PMF grid from reweighting
+  bool m_pmf_grid_on;
+  std::vector<colvar*> m_pmf_cvs;
+  std::string grid_conf;
+  std::shared_ptr<colvar_grid_scalar> m_reweight_grid;
+  std::unique_ptr<colvar_grid_scalar> m_pmf_grid;
+  cvm::step_number m_pmf_hist_freq;
+  bool m_pmf_shared; // shared PMF among replicas
+  std::unique_ptr<colvar_grid_scalar> m_global_reweight_grid;
+  std::unique_ptr<colvar_grid_scalar> m_global_pmf_grid;
+  bool m_explore;
+  bool m_inf_biasfactor;
+};
+
+#endif // COLVARBIAS_OPES_H
diff --git a/lib/colvars/colvarcomp.cpp b/lib/colvars/colvarcomp.cpp
index e6729f43a7..9a056f7dd3 100644
--- a/lib/colvars/colvarcomp.cpp
+++ b/lib/colvars/colvarcomp.cpp
@@ -261,7 +261,6 @@ int colvar::cvc::init_dependencies() {
     require_feature_children(f_cvc_explicit_gradient, f_ag_explicit_gradient);
 
     init_feature(f_cvc_inv_gradient, "inverse_gradient", f_type_dynamic);
-    require_feature_self(f_cvc_inv_gradient, f_cvc_gradient);
 
     init_feature(f_cvc_debug_gradient, "debug_gradient", f_type_user);
     require_feature_self(f_cvc_debug_gradient, f_cvc_gradient);
@@ -525,7 +524,7 @@ void colvar::cvc::calc_force_invgrads()
 
 void colvar::cvc::calc_Jacobian_derivative()
 {
-  cvm::error("Error: calculation of inverse gradients is not implemented "
+  cvm::error("Error: calculation of Jacobian derivatives is not implemented "
              "for colvar components of type \""+function_type()+"\".\n",
              COLVARS_NOT_IMPLEMENTED);
 }
@@ -533,8 +532,10 @@ void colvar::cvc::calc_Jacobian_derivative()
 
 void colvar::cvc::calc_fit_gradients()
 {
-  for (size_t ig = 0; ig < atom_groups.size(); ig++) {
-    atom_groups[ig]->calc_fit_gradients();
+  if (is_enabled(f_cvc_explicit_gradient)) {
+    for (size_t ig = 0; ig < atom_groups.size(); ig++) {
+      atom_groups[ig]->calc_fit_gradients();
+    }
   }
 }
 
diff --git a/lib/colvars/colvarcomp.h b/lib/colvars/colvarcomp.h
index 334fdc1f6e..53755576c2 100644
--- a/lib/colvars/colvarcomp.h
+++ b/lib/colvars/colvarcomp.h
@@ -233,8 +233,14 @@ public:
 
   /// Forcibly set value of CVC - useful for driving an external coordinate,
   /// eg. lambda dynamics
-  inline void set_value(colvarvalue const &new_value) {
+  inline void set_value(colvarvalue const &new_value, bool now=false) {
     x = new_value;
+    // Cache value to be communicated to back-end between time steps
+    cvm::proxy->set_alch_lambda(x.real_value);
+    if (now) {
+      // If requested (e.g. upon restarting), sync to back-end
+      cvm::proxy->send_alch_lambda();
+    }
   }
 
 protected:
@@ -1212,9 +1218,11 @@ protected:
   // No atom groups needed
 public:
   alch_lambda();
+  int init_alchemy(int time_step_factor);
   virtual ~alch_lambda() {}
   virtual void calc_value();
-  virtual void calc_gradients();
+  virtual void calc_force_invgrads();
+  virtual void calc_Jacobian_derivative();
   virtual void apply_force(colvarvalue const &force);
 };
 
diff --git a/lib/colvars/colvarcomp_alchlambda.cpp b/lib/colvars/colvarcomp_alchlambda.cpp
index a175ea43e8..29168d87ce 100644
--- a/lib/colvars/colvarcomp_alchlambda.cpp
+++ b/lib/colvars/colvarcomp_alchlambda.cpp
@@ -20,22 +20,46 @@ colvar::alch_lambda::alch_lambda()
 {
   set_function_type("alchLambda");
 
-  disable(f_cvc_explicit_gradient);
-  disable(f_cvc_gradient);
+  provide(f_cvc_explicit_gradient, false);
+  provide(f_cvc_gradient, false); // Cannot apply forces on this CVC
+  provide(f_cvc_collect_atom_ids, false);
+
+  provide(f_cvc_inv_gradient); // Projected force is TI derivative
+  provide(f_cvc_Jacobian);     // Zero
 
   x.type(colvarvalue::type_scalar);
-  // Query initial value from back-end
+
+  // Query initial value from back-end; will be overwritten if restarting from a state file
   cvm::proxy->get_alch_lambda(&x.real_value);
 }
 
 
+int colvar::alch_lambda::init_alchemy(int factor)
+{
+  // We need calculation every time step
+  // default in Tinker-HP and NAMD2, must be enforced in NAMD3
+  // Also checks back-end settings, ie. that alchemy is enabled
+  // (in NAMD3: alchType TI, computeEnergies at the right frequency)
+
+  // Forbid MTS until fully implemented
+  if (factor != 1) {
+    return cvm::error("Error: timeStepFactor > 1 is not yet supported for alchemical variables.");
+  }
+  cvm::proxy->request_alch_energy_freq(factor);
+
+  return COLVARS_OK;
+}
+
+
 void colvar::alch_lambda::calc_value()
 {
-  // Special workflow:
-  // at the beginning of the timestep we get a force instead of calculating the value
+  // By default, follow external parameter
+  // This might get overwritten by driving extended dynamics
+  // (in apply_force() below)
+  cvm::proxy->get_alch_lambda(&x.real_value);
 
   cvm::proxy->get_dE_dlambda(&ft.real_value);
-  ft.real_value *= -1.0; // Energy derivative to force
+  ft.real_value *= -1.0; // Convert energy derivative to force
 
   // Include any force due to bias on Flambda
   ft.real_value += cvm::proxy->indirect_lambda_biasing_force;
@@ -43,19 +67,24 @@ void colvar::alch_lambda::calc_value()
 }
 
 
-void colvar::alch_lambda::calc_gradients()
+void colvar::alch_lambda::calc_force_invgrads()
 {
+  // All the work is done in calc_value()
+}
+
+
+void colvar::alch_lambda::calc_Jacobian_derivative()
+{
+  jd = 0.0;
 }
 
 
 void colvar::alch_lambda::apply_force(colvarvalue const & /* force */)
 {
-  // new value will be cached and sent at end of timestep
-  cvm::proxy->set_alch_lambda(x.real_value);
+  // Forces, if any, are applied in colvar::update_extended_Lagrangian()
 }
 
 
-
 colvar::alch_Flambda::alch_Flambda()
 {
   set_function_type("alch_Flambda");
diff --git a/lib/colvars/colvarcomp_angles.cpp b/lib/colvars/colvarcomp_angles.cpp
index 56894e9f5c..0e7aed65ed 100644
--- a/lib/colvars/colvarcomp_angles.cpp
+++ b/lib/colvars/colvarcomp_angles.cpp
@@ -267,74 +267,22 @@ void colvar::dihedral::calc_value()
 
 void colvar::dihedral::calc_gradients()
 {
-  cvm::rvector A = cvm::rvector::outer(r12, r23);
-  cvm::real   rA = A.norm();
-  cvm::rvector B = cvm::rvector::outer(r23, r34);
-  cvm::real   rB = B.norm();
-  cvm::rvector C = cvm::rvector::outer(r23, A);
-  cvm::real   rC = C.norm();
+  // Eqs. (27i) ~ (27l) from https://doi.org/10.1002/(SICI)1096-987X(19960715)17:9<1132::AID-JCC5>3.0.CO;2-T.
 
-  cvm::real const cos_phi = (A*B)/(rA*rB);
-  cvm::real const sin_phi = (C*B)/(rC*rB);
+  const cvm::rvector A = cvm::rvector::outer(r12, r23);
+  const cvm::rvector B = cvm::rvector::outer(r23, r34);
+  const cvm::real   nG = r23.norm();
+  const cvm::real   A2 = A.norm2();
+  const cvm::real   B2 = B.norm2();
 
-  cvm::rvector f1, f2, f3;
-
-  rB = 1.0/rB;
-  B *= rB;
-
-  if (cvm::fabs(sin_phi) > 0.1) {
-    rA = 1.0/rA;
-    A *= rA;
-    cvm::rvector const dcosdA = rA*(cos_phi*A-B);
-    cvm::rvector const dcosdB = rB*(cos_phi*B-A);
-    // rA = 1.0;
-
-    cvm::real const K = (1.0/sin_phi) * (180.0/PI);
-
-        f1 = K * cvm::rvector::outer(r23, dcosdA);
-        f3 = K * cvm::rvector::outer(dcosdB, r23);
-        f2 = K * (cvm::rvector::outer(dcosdA, r12)
-                   +  cvm::rvector::outer(r34, dcosdB));
-  }
-  else {
-    rC = 1.0/rC;
-    C *= rC;
-    cvm::rvector const dsindC = rC*(sin_phi*C-B);
-    cvm::rvector const dsindB = rB*(sin_phi*B-C);
-    // rC = 1.0;
-
-    cvm::real    const K = (-1.0/cos_phi) * (180.0/PI);
-
-    f1.x = K*((r23.y*r23.y + r23.z*r23.z)*dsindC.x
-              - r23.x*r23.y*dsindC.y
-              - r23.x*r23.z*dsindC.z);
-    f1.y = K*((r23.z*r23.z + r23.x*r23.x)*dsindC.y
-              - r23.y*r23.z*dsindC.z
-              - r23.y*r23.x*dsindC.x);
-    f1.z = K*((r23.x*r23.x + r23.y*r23.y)*dsindC.z
-              - r23.z*r23.x*dsindC.x
-              - r23.z*r23.y*dsindC.y);
-
-    f3 = cvm::rvector::outer(dsindB, r23);
-    f3 *= K;
-
-    f2.x = K*(-(r23.y*r12.y + r23.z*r12.z)*dsindC.x
-              +(2.0*r23.x*r12.y - r12.x*r23.y)*dsindC.y
-              +(2.0*r23.x*r12.z - r12.x*r23.z)*dsindC.z
-              +dsindB.z*r34.y - dsindB.y*r34.z);
-    f2.y = K*(-(r23.z*r12.z + r23.x*r12.x)*dsindC.y
-              +(2.0*r23.y*r12.z - r12.y*r23.z)*dsindC.z
-              +(2.0*r23.y*r12.x - r12.y*r23.x)*dsindC.x
-              +dsindB.x*r34.z - dsindB.z*r34.x);
-    f2.z = K*(-(r23.x*r12.x + r23.y*r12.y)*dsindC.z
-              +(2.0*r23.z*r12.x - r12.z*r23.x)*dsindC.x
-              +(2.0*r23.z*r12.y - r12.z*r23.y)*dsindC.y
-              +dsindB.y*r34.x - dsindB.x*r34.y);
-  }
+  const cvm::real     K = 180.0/PI;
+  const cvm::rvector f1 = K * nG / A2 * A;
+  const cvm::rvector f2 = K * ((r12 * r23 / (A2 * nG)) * A + (r34 * r23 / (B2 * nG)) * B);
+  const cvm::rvector f3 = K * nG / B2 * B;
 
   group1->set_weighted_gradient(-f1);
-  group2->set_weighted_gradient(-f2 + f1);
-  group3->set_weighted_gradient(-f3 + f2);
+  group2->set_weighted_gradient( f2 + f1);
+  group3->set_weighted_gradient(-f3 - f2);
   group4->set_weighted_gradient(f3);
 }
 
diff --git a/lib/colvars/colvarcomp_distances.cpp b/lib/colvars/colvarcomp_distances.cpp
index 319190c385..6de68264c3 100644
--- a/lib/colvars/colvarcomp_distances.cpp
+++ b/lib/colvars/colvarcomp_distances.cpp
@@ -384,32 +384,30 @@ void colvar::distance_dir::apply_force(colvarvalue const &force)
   cvm::real const iprod = force.rvector_value * x.rvector_value;
   cvm::rvector const force_tang = force.rvector_value - iprod * x.rvector_value;
 
-  if (!group1->noforce)
-    group1->apply_force(-1.0 * force_tang);
-
-  if (!group2->noforce)
-    group2->apply_force(       force_tang);
+  if (!group1->noforce) {
+    group1->apply_force(-1.0 / dist_v.norm() * force_tang);
+  }
+  if (!group2->noforce) {
+    group2->apply_force( 1.0 / dist_v.norm() * force_tang);
+  }
 }
 
 
-cvm::real colvar::distance_dir::dist2(colvarvalue const &x1,
-                                      colvarvalue const &x2) const
+cvm::real colvar::distance_dir::dist2(colvarvalue const &x1, colvarvalue const &x2) const
 {
-  return (x1.rvector_value - x2.rvector_value).norm2();
+  return x1.dist2(x2);
 }
 
 
-colvarvalue colvar::distance_dir::dist2_lgrad(colvarvalue const &x1,
-                                              colvarvalue const &x2) const
+colvarvalue colvar::distance_dir::dist2_lgrad(colvarvalue const &x1, colvarvalue const &x2) const
 {
-  return colvarvalue((x1.rvector_value - x2.rvector_value), colvarvalue::type_unit3vectorderiv);
+  return x1.dist2_grad(x2);
 }
 
 
-colvarvalue colvar::distance_dir::dist2_rgrad(colvarvalue const &x1,
-                                              colvarvalue const &x2) const
+colvarvalue colvar::distance_dir::dist2_rgrad(colvarvalue const &x1, colvarvalue const &x2) const
 {
-  return colvarvalue((x2.rvector_value - x1.rvector_value), colvarvalue::type_unit3vectorderiv);
+  return x2.dist2_grad(x1);
 }
 
 
@@ -1005,7 +1003,7 @@ void colvar::rmsd::calc_Jacobian_derivative()
     for (size_t ia = 0; ia < atoms->size(); ia++) {
 
       // Gradient of optimal quaternion wrt current Cartesian position
-      atoms->rot_deriv->calc_derivative_wrt_group1(ia, nullptr, &dq);
+      atoms->rot_deriv->calc_derivative_wrt_group1<false, true, false>(ia, nullptr, &dq);
 
       g11 = 2.0 * (atoms->rot.q)[1]*dq[1];
       g22 = 2.0 * (atoms->rot.q)[2]*dq[2];
@@ -1304,7 +1302,7 @@ void colvar::eigenvector::calc_Jacobian_derivative()
     // Gradient of optimal quaternion wrt current Cartesian position
     // trick: d(R^-1)/dx = d(R^t)/dx = (dR/dx)^t
     // we can just transpose the derivatives of the direct matrix
-    atoms->rot_deriv->calc_derivative_wrt_group1(ia, nullptr, &dq_1);
+    atoms->rot_deriv->calc_derivative_wrt_group1<false, true, false>(ia, nullptr, &dq_1);
 
     g11 = 2.0 * quat0[1]*dq_1[1];
     g22 = 2.0 * quat0[2]*dq_1[2];
@@ -1403,11 +1401,12 @@ void colvar::cartesian::apply_force(colvarvalue const &force)
   size_t ia, j;
   if (!atoms->noforce) {
     cvm::rvector f;
+    auto ag_force = atoms->get_group_force_object();
     for (ia = 0; ia < atoms->size(); ia++) {
       for (j = 0; j < dim; j++) {
         f[axes[j]] = force.vector1d_value[dim*ia + j];
       }
-      (*atoms)[ia].apply_force(f);
+      ag_force.add_atom_force(ia, f);
     }
   }
 }
diff --git a/lib/colvars/colvarcomp_protein.cpp b/lib/colvars/colvarcomp_protein.cpp
index f782095148..832005e2a7 100644
--- a/lib/colvars/colvarcomp_protein.cpp
+++ b/lib/colvars/colvarcomp_protein.cpp
@@ -28,34 +28,58 @@ colvar::alpha_angles::alpha_angles()
 int colvar::alpha_angles::init(std::string const &conf)
 {
   int error_code = cvc::init(conf);
+  if (error_code != COLVARS_OK) return error_code;
 
   std::string segment_id;
-  get_keyval(conf, "psfSegID", segment_id, std::string("MAIN"));
-
   std::vector<int> residues;
-  {
-    std::string residues_conf = "";
-    key_lookup(conf, "residueRange", &residues_conf);
+
+  bool b_use_index_groups = false;
+  cvm::atom_group group_CA, group_N, group_O;
+
+  std::string residues_conf = "";
+  std::string prefix;
+
+  // residueRange is mandatory for the topology-based case
+  if (key_lookup(conf, "residueRange", &residues_conf)) {
     if (residues_conf.size()) {
       std::istringstream is(residues_conf);
       int initial, final;
       char dash;
       if ( (is >> initial) && (initial > 0) &&
-           (is >> dash) && (dash == '-') &&
-           (is >> final) && (final > 0) ) {
+          (is >> dash) && (dash == '-') &&
+          (is >> final) && (final > 0) ) {
         for (int rnum = initial; rnum <= final; rnum++) {
           residues.push_back(rnum);
         }
       }
     } else {
-      error_code |=
-          cvm::error("Error: no residues defined in \"residueRange\".\n", COLVARS_INPUT_ERROR);
+      return cvm::error("Error: no residues defined in \"residueRange\".\n", COLVARS_INPUT_ERROR);
     }
-  }
 
-  if (residues.size() < 5) {
-    error_code |= cvm::error("Error: not enough residues defined in \"residueRange\".\n",
-                             COLVARS_INPUT_ERROR);
+    if (residues.size() < 5) {
+      return cvm::error("Error: not enough residues defined in \"residueRange\".\n", COLVARS_INPUT_ERROR);
+    }
+    get_keyval(conf, "psfSegID", segment_id, std::string("MAIN"));
+
+  } else {
+    b_use_index_groups = true;
+    get_keyval(conf, "prefix", prefix, "alpha_");
+
+    // Not all groups are mandatory, parse silently
+    group_CA.add_index_group(prefix + "CA", true);
+    group_N.add_index_group(prefix + "N", true);
+    group_O.add_index_group(prefix + "O", true);
+    int na = group_CA.size();
+    int nn = group_N.size();
+    int no = group_O.size();
+    if ((nn != 0 || no != 0) && (nn != no)) {
+      return cvm::error("Error: If either is provided, atom groups " + prefix + "N and " + prefix + "O must have the same number of atoms.",
+                        COLVARS_INPUT_ERROR);
+    }
+    if (nn != 0 && na != 0 && nn != na) {
+      return cvm::error("Error: If both are provided, atom groups " + prefix + "N and " + prefix + "CA must have the same number of atoms.",
+                        COLVARS_INPUT_ERROR);
+    }
   }
 
   std::string const &sid    = segment_id;
@@ -64,8 +88,7 @@ int colvar::alpha_angles::init(std::string const &conf)
 
   get_keyval(conf, "hBondCoeff", hb_coeff, hb_coeff);
   if ((hb_coeff < 0.0) || (hb_coeff > 1.0)) {
-    error_code |=
-        cvm::error("Error: hBondCoeff must be defined between 0 and 1.\n", COLVARS_INPUT_ERROR);
+    return cvm::error("Error: hBondCoeff must be defined between 0 and 1.\n", COLVARS_INPUT_ERROR);
   }
 
 
@@ -73,14 +96,29 @@ int colvar::alpha_angles::init(std::string const &conf)
   get_keyval(conf, "angleTol", theta_tol, theta_tol);
 
   if (hb_coeff < 1.0) {
-
-    for (size_t i = 0; i < residues.size()-2; i++) {
-      theta.push_back(new colvar::angle(cvm::atom(r[i  ], "CA", sid),
-                                        cvm::atom(r[i+1], "CA", sid),
-                                        cvm::atom(r[i+2], "CA", sid)));
-      register_atom_group(theta.back()->atom_groups[0]);
-      register_atom_group(theta.back()->atom_groups[1]);
-      register_atom_group(theta.back()->atom_groups[2]);
+    if (b_use_index_groups) {
+      if (group_CA.size() < 5) {
+        return cvm::error("Not enough atoms (" + cvm::to_str(group_CA.size()) + ") in index group \"" + prefix + "CA\"",
+                          COLVARS_INPUT_ERROR);
+      }
+      for (size_t i = 0; i < group_CA.size()-2; i++) {
+        // Note: the angle constructor constructs copies of the atom objects
+        theta.push_back(new colvar::angle(group_CA[i],
+                                          group_CA[i+1],
+                                          group_CA[i+2]));
+        register_atom_group(theta.back()->atom_groups[0]);
+        register_atom_group(theta.back()->atom_groups[1]);
+        register_atom_group(theta.back()->atom_groups[2]);
+      }
+    } else {
+      for (size_t i = 0; i < residues.size()-2; i++) {
+        theta.push_back(new colvar::angle(cvm::atom(r[i  ], "CA", sid),
+                                          cvm::atom(r[i+1], "CA", sid),
+                                          cvm::atom(r[i+2], "CA", sid)));
+        register_atom_group(theta.back()->atom_groups[0]);
+        register_atom_group(theta.back()->atom_groups[1]);
+        register_atom_group(theta.back()->atom_groups[2]);
+      }
     }
 
   } else {
@@ -93,14 +131,27 @@ int colvar::alpha_angles::init(std::string const &conf)
     get_keyval(conf, "hBondExpDenom", ed, ed);
 
     if (hb_coeff > 0.0) {
-
-      for (size_t i = 0; i < residues.size()-4; i++) {
-        hb.push_back(new colvar::h_bond(cvm::atom(r[i  ], "O",  sid),
-                                        cvm::atom(r[i+4], "N",  sid),
-                                        r0, en, ed));
-        register_atom_group(hb.back()->atom_groups[0]);
+      if (b_use_index_groups) {
+        if (group_N.size() < 5) {
+          return cvm::error("Not enough atoms (" + cvm::to_str(group_N.size()) + ") in index group \"" + prefix + "N\"",
+                            COLVARS_INPUT_ERROR);
+        }
+        for (size_t i = 0; i < group_N.size()-4; i++) {
+          // Note: we need to call the atom copy constructor here because
+          // the h_bond constructor does not make copies of the provided atoms
+          hb.push_back(new colvar::h_bond(cvm::atom(group_O[i]),
+                                          cvm::atom(group_N[i+4]),
+                                          r0, en, ed));
+          register_atom_group(hb.back()->atom_groups[0]);
+        }
+      } else {
+        for (size_t i = 0; i < residues.size()-4; i++) {
+          hb.push_back(new colvar::h_bond(cvm::atom(r[i  ], "O",  sid),
+                                          cvm::atom(r[i+4], "N",  sid),
+                                          r0, en, ed));
+          register_atom_group(hb.back()->atom_groups[0]);
+        }
       }
-
     } else {
       cvm::log("The hBondCoeff specified will disable the hydrogen bond terms.\n");
     }
@@ -290,41 +341,62 @@ int colvar::dihedPC::init(std::string const &conf)
   if (cvm::debug())
     cvm::log("Initializing dihedral PC object.\n");
 
+  bool b_use_index_groups = false;
   std::string segment_id;
-  get_keyval(conf, "psfSegID", segment_id, std::string("MAIN"));
-
   std::vector<int> residues;
-  {
-    std::string residues_conf = "";
-    key_lookup(conf, "residueRange", &residues_conf);
+  size_t n_residues;
+  std::string residues_conf = "";
+  std::string prefix;
+  cvm::atom_group group_CA, group_N, group_C;
+
+  // residueRange is mandatory for the topology-based case
+  if (key_lookup(conf, "residueRange", &residues_conf)) {
     if (residues_conf.size()) {
       std::istringstream is(residues_conf);
       int initial, final;
       char dash;
       if ( (is >> initial) && (initial > 0) &&
-           (is >> dash) && (dash == '-') &&
-           (is >> final) && (final > 0) ) {
+          (is >> dash) && (dash == '-') &&
+          (is >> final) && (final > 0) ) {
         for (int rnum = initial; rnum <= final; rnum++) {
           residues.push_back(rnum);
         }
       }
     } else {
-      error_code |=
-          cvm::error("Error: no residues defined in \"residueRange\".\n", COLVARS_INPUT_ERROR);
+      return cvm::error("Error: no residues defined in \"residueRange\".\n", COLVARS_INPUT_ERROR);
     }
-  }
+    n_residues = residues.size();
+    get_keyval(conf, "psfSegID", segment_id, std::string("MAIN"));
 
-  if (residues.size() < 2) {
+  } else {
+
+    b_use_index_groups = true;
+    get_keyval(conf, "prefix", prefix, "dihed_");
+
+    // All three groups are required
+    group_CA.add_index_group(prefix + "CA");
+    group_N.add_index_group(prefix + "N");
+    group_C.add_index_group(prefix + "C");
+    int na = group_CA.size();
+    int nn = group_N.size();
+    int nc = group_C.size();
+    if ((nn != na || na != nc)) {
+      return cvm::error("Error: atom groups " + prefix + "N, " + prefix + "CA, and " + prefix +
+                        "C must have the same number of atoms.", COLVARS_INPUT_ERROR);
+    }
+    n_residues = nn;
+  }
+  if (n_residues < 2) {
     error_code |=
-        cvm::error("Error: dihedralPC requires at least two residues.\n", COLVARS_INPUT_ERROR);
+      cvm::error("Error: dihedralPC requires at least two residues.\n", COLVARS_INPUT_ERROR);
   }
 
   std::string const &sid    = segment_id;
   std::vector<int> const &r = residues;
 
   std::string vecFileName;
-  int         vecNumber;
   if (get_keyval(conf, "vectorFile", vecFileName, vecFileName)) {
+    int vecNumber;
     get_keyval(conf, "vectorNumber", vecNumber, 0);
     if (vecNumber < 1) {
       error_code |=
@@ -339,9 +411,8 @@ int colvar::dihedPC::init(std::string const &conf)
     }
 
     // TODO: adapt to different formats by setting this flag
-    bool eigenvectors_as_columns = true;
-
-    if (eigenvectors_as_columns) {
+    // bool eigenvectors_as_columns = true;
+    // if (eigenvectors_as_columns) {
       // Carma-style dPCA file
       std::string line;
       cvm::real c;
@@ -352,9 +423,7 @@ int colvar::dihedPC::init(std::string const &conf)
         for (int i=0; i<vecNumber; i++) ls >> c;
         coeffs.push_back(c);
       }
-    }
-/*  TODO Uncomment this when different formats are recognized
-    else {
+    /* } else { // Uncomment this when different formats are recognized
       // Eigenvectors as lines
       // Skip to the right line
       for (int i = 1; i<vecNumber; i++)
@@ -380,28 +449,42 @@ int colvar::dihedPC::init(std::string const &conf)
     get_keyval(conf, "vector", coeffs, coeffs);
   }
 
-  if ( coeffs.size() != 4 * (residues.size() - 1)) {
+  if ( coeffs.size() != 4 * (n_residues - 1)) {
     error_code |= cvm::error("Error: wrong number of coefficients: " + cvm::to_str(coeffs.size()) +
-                             ". Expected " + cvm::to_str(4 * (residues.size() - 1)) +
+                             ". Expected " + cvm::to_str(4 * (n_residues - 1)) +
                              " (4 coeffs per residue, minus one residue).\n",
                              COLVARS_INPUT_ERROR);
   }
 
-  for (size_t i = 0; i < residues.size()-1; i++) {
+  for (size_t i = 0; i < n_residues-1; i++) {
     // Psi
-    theta.push_back(new colvar::dihedral(cvm::atom(r[i  ], "N", sid),
-                                         cvm::atom(r[i  ], "CA", sid),
-                                         cvm::atom(r[i  ], "C", sid),
-                                         cvm::atom(r[i+1], "N", sid)));
+    if (b_use_index_groups) {
+      theta.push_back(new colvar::dihedral( group_N[i],
+                                            group_CA[i],
+                                            group_C[i],
+                                            group_N[i+1]));
+    } else {
+      theta.push_back(new colvar::dihedral(cvm::atom(r[i  ], "N", sid),
+                                           cvm::atom(r[i  ], "CA", sid),
+                                           cvm::atom(r[i  ], "C", sid),
+                                           cvm::atom(r[i+1], "N", sid)));
+    }
     register_atom_group(theta.back()->atom_groups[0]);
     register_atom_group(theta.back()->atom_groups[1]);
     register_atom_group(theta.back()->atom_groups[2]);
     register_atom_group(theta.back()->atom_groups[3]);
     // Phi (next res)
-    theta.push_back(new colvar::dihedral(cvm::atom(r[i  ], "C", sid),
-                                         cvm::atom(r[i+1], "N", sid),
-                                         cvm::atom(r[i+1], "CA", sid),
-                                         cvm::atom(r[i+1], "C", sid)));
+    if (b_use_index_groups) {
+      theta.push_back(new colvar::dihedral(group_C[i],
+                                           group_N[i+1],
+                                           group_CA[i+1],
+                                           group_C[i+1]));
+    } else {
+      theta.push_back(new colvar::dihedral(cvm::atom(r[i  ], "C", sid),
+                                           cvm::atom(r[i+1], "N", sid),
+                                           cvm::atom(r[i+1], "CA", sid),
+                                           cvm::atom(r[i+1], "C", sid)));
+    }
     register_atom_group(theta.back()->atom_groups[0]);
     register_atom_group(theta.back()->atom_groups[1]);
     register_atom_group(theta.back()->atom_groups[2]);
diff --git a/lib/colvars/colvarcomp_rotations.cpp b/lib/colvars/colvarcomp_rotations.cpp
index a04ace851a..766a0870d1 100644
--- a/lib/colvars/colvarcomp_rotations.cpp
+++ b/lib/colvars/colvarcomp_rotations.cpp
@@ -137,11 +137,14 @@ void colvar::orientation::apply_force(colvarvalue const &force)
   if (!atoms->noforce) {
     rot_deriv_impl->prepare_derivative(rotation_derivative_dldq::use_dq);
     cvm::vector1d<cvm::rvector> dq0_2;
+    auto ag_force = atoms->get_group_force_object();
     for (size_t ia = 0; ia < atoms->size(); ia++) {
-      rot_deriv_impl->calc_derivative_wrt_group2(ia, nullptr, &dq0_2);
-      for (size_t i = 0; i < 4; i++) {
-        (*atoms)[ia].apply_force(FQ[i] * dq0_2[i]);
-      }
+      rot_deriv_impl->calc_derivative_wrt_group2<false, true, false>(ia, nullptr, &dq0_2);
+      const auto f_ia = FQ[0] * dq0_2[0] +
+                        FQ[1] * dq0_2[1] +
+                        FQ[2] * dq0_2[2] +
+                        FQ[3] * dq0_2[3];
+      ag_force.add_atom_force(ia, f_ia);
     }
   }
 }
@@ -205,7 +208,7 @@ void colvar::orientation_angle::calc_gradients()
   rot_deriv_impl->prepare_derivative(rotation_derivative_dldq::use_dq);
   cvm::vector1d<cvm::rvector> dq0_2;
   for (size_t ia = 0; ia < atoms->size(); ia++) {
-    rot_deriv_impl->calc_derivative_wrt_group2(ia, nullptr, &dq0_2);
+    rot_deriv_impl->calc_derivative_wrt_group2<false, true, false>(ia, nullptr, &dq0_2);
     (*atoms)[ia].grad = (dxdq0 * dq0_2[0]);
   }
 }
@@ -265,7 +268,7 @@ void colvar::orientation_proj::calc_gradients()
   rot_deriv_impl->prepare_derivative(rotation_derivative_dldq::use_dq);
   cvm::vector1d<cvm::rvector> dq0_2;
   for (size_t ia = 0; ia < atoms->size(); ia++) {
-    rot_deriv_impl->calc_derivative_wrt_group2(ia, nullptr, &dq0_2);
+    rot_deriv_impl->calc_derivative_wrt_group2<false, true, false>(ia, nullptr, &dq0_2);
     (*atoms)[ia].grad = (dxdq0 * dq0_2[0]);
   }
 }
@@ -314,7 +317,7 @@ void colvar::tilt::calc_gradients()
   cvm::vector1d<cvm::rvector> dq0_2;
   for (size_t ia = 0; ia < atoms->size(); ia++) {
     (*atoms)[ia].grad = cvm::rvector(0.0, 0.0, 0.0);
-    rot_deriv_impl->calc_derivative_wrt_group2(ia, nullptr, &dq0_2);
+    rot_deriv_impl->calc_derivative_wrt_group2<false, true, false>(ia, nullptr, &dq0_2);
     for (size_t iq = 0; iq < 4; iq++) {
       (*atoms)[ia].grad += (dxdq[iq] * dq0_2[iq]);
     }
@@ -351,7 +354,7 @@ void colvar::spin_angle::calc_gradients()
   cvm::vector1d<cvm::rvector> dq0_2;
   for (size_t ia = 0; ia < atoms->size(); ia++) {
     (*atoms)[ia].grad = cvm::rvector(0.0, 0.0, 0.0);
-    rot_deriv_impl->calc_derivative_wrt_group2(ia, nullptr, &dq0_2);
+    rot_deriv_impl->calc_derivative_wrt_group2<false, true, false>(ia, nullptr, &dq0_2);
     for (size_t iq = 0; iq < 4; iq++) {
       (*atoms)[ia].grad += (dxdq[iq] * dq0_2[iq]);
     }
@@ -399,7 +402,7 @@ void colvar::euler_phi::calc_gradients()
   rot_deriv_impl->prepare_derivative(rotation_derivative_dldq::use_dq);
   cvm::vector1d<cvm::rvector> dq0_2;
   for (size_t ia = 0; ia < atoms->size(); ia++) {
-    rot_deriv_impl->calc_derivative_wrt_group2(ia, nullptr, &dq0_2);
+    rot_deriv_impl->calc_derivative_wrt_group2<false, true, false>(ia, nullptr, &dq0_2);
     (*atoms)[ia].grad = (dxdq0 * dq0_2[0]) +
                         (dxdq1 * dq0_2[1]) +
                         (dxdq2 * dq0_2[2]) +
@@ -448,7 +451,7 @@ void colvar::euler_psi::calc_gradients()
   rot_deriv_impl->prepare_derivative(rotation_derivative_dldq::use_dq);
   cvm::vector1d<cvm::rvector> dq0_2;
   for (size_t ia = 0; ia < atoms->size(); ia++) {
-    rot_deriv_impl->calc_derivative_wrt_group2(ia, nullptr, &dq0_2);
+    rot_deriv_impl->calc_derivative_wrt_group2<false, true, false>(ia, nullptr, &dq0_2);
     (*atoms)[ia].grad = (dxdq0 * dq0_2[0]) +
                         (dxdq1 * dq0_2[1]) +
                         (dxdq2 * dq0_2[2]) +
@@ -495,7 +498,7 @@ void colvar::euler_theta::calc_gradients()
   rot_deriv_impl->prepare_derivative(rotation_derivative_dldq::use_dq);
   cvm::vector1d<cvm::rvector> dq0_2;
   for (size_t ia = 0; ia < atoms->size(); ia++) {
-    rot_deriv_impl->calc_derivative_wrt_group2(ia, nullptr, &dq0_2);
+    rot_deriv_impl->calc_derivative_wrt_group2<false, true, false>(ia, nullptr, &dq0_2);
     (*atoms)[ia].grad = (dxdq0 * dq0_2[0]) +
                         (dxdq1 * dq0_2[1]) +
                         (dxdq2 * dq0_2[2]) +
diff --git a/lib/colvars/colvarcomp_torchann.cpp b/lib/colvars/colvarcomp_torchann.cpp
new file mode 100644
index 0000000000..7b83baf9b6
--- /dev/null
+++ b/lib/colvars/colvarcomp_torchann.cpp
@@ -0,0 +1,233 @@
+// -*- c++ -*-
+
+// This file is part of the Collective Variables module (Colvars).
+// The original version of Colvars and its updates are located at:
+// https://github.com/Colvars/colvars
+// Please update all Colvars source files before making any changes.
+// If you wish to distribute your changes, please submit them to the
+// Colvars repository at GitHub.
+
+#include "colvar.h"
+#include "colvarcomp.h"
+#include "colvarmodule.h"
+#include "colvarparse.h"
+#include "colvarvalue.h"
+
+#include "colvarcomp_torchann.h"
+
+
+#ifdef COLVARS_TORCH
+
+colvar::torchANN::torchANN()
+{
+  set_function_type("torchANN");
+  provide(f_cvc_periodic);
+}
+
+colvar::torchANN::~torchANN() {}
+
+
+int colvar::torchANN::init(std::string const &conf) {
+
+  int error_code = linearCombination::init(conf);
+
+  std::string model_file ;
+  get_keyval(conf, "modelFile", model_file, std::string(""));
+  try {
+    nn = torch::jit::load(model_file);
+    nn.to(torch::kCPU);
+    cvm::log("torch model loaded.") ;
+  } catch (const std::exception & e) {
+    return cvm::error("Error: couldn't load libtorch model (see below).\n" + cvm::to_str(e.what()),
+                      COLVARS_INPUT_ERROR);
+  }
+
+  auto const legacy_keyword = get_keyval(conf, "m_output_index", m_output_index, m_output_index);
+  if (legacy_keyword) {
+    cvm::log("Warning: m_output_index is a deprecated keyword, please use output_component instead.\n");
+  }
+  get_keyval(conf, "output_component", m_output_index, m_output_index);
+
+  get_keyval(conf, "doubleInputTensor", use_double_input, use_double_input);
+  //get_keyval(conf, "useGPU", use_gpu, false);
+
+  cvc_indices.resize(cv.size(),0);
+
+  size_t num_inputs = 0;
+  // compute total number of inputs of neural network
+  for (size_t i_cv = 0; i_cv < cv.size(); ++i_cv)
+  {
+    num_inputs += cv[i_cv]->value().size() ;
+    if (i_cv < cv.size() - 1)
+      cvc_indices[i_cv+1] = num_inputs;
+  }
+  cvm::log("Input dimension of model: " + cvm::to_str(num_inputs));
+
+  // initialize the input tensor
+  auto options = torch::TensorOptions().dtype(torch::kFloat32).requires_grad(true);
+
+  /*
+  if (use_gpu) {
+    if (torch::cuda::is_available()) {
+      try {
+        nn.to(torch::kCUDA);
+      } catch(const std::exception & e) {
+        cvm::error("Failed to move model to GPU.");
+        use_gpu = false;
+      }
+    } else {
+      use_gpu = false;
+      cvm::log("GPU not available.");
+    }
+  }
+
+  if (use_gpu) {
+    options = options.device(torch::kCUDA);
+    if (use_double_input) {
+      cvm::log("Data type reset to Float for GPU computation!");
+      use_double_input = false;
+    }
+  }
+  */
+
+  if (use_double_input) {  // set type to double
+    options = options.dtype(torch::kFloat64);
+    nn.to(torch::kFloat64);
+    cvm::log("Model's dtype: kFloat64.");
+  } else {
+    cvm::log("Model's dtype: kFloat32.");
+  }
+
+  input_tensor = torch::zeros({1,(long int) num_inputs}, options);
+
+  try { // test the model
+    std::vector<torch::jit::IValue> inputs={input_tensor};
+    nn_outputs = nn.forward(inputs).toTensor()[0][m_output_index];
+    cvm::log("Evaluating model with zero tensor succeeded.");
+  } catch (const std::exception & e) {
+    error_code |= cvm::error("Error: evaluating model with zero tensor failed (see below).\n" +
+                                 cvm::to_str(e.what()),
+                             COLVARS_INPUT_ERROR);
+  }
+
+  return error_code;
+}
+
+
+void colvar::torchANN::calc_value() {
+
+  for (size_t i_cv = 0; i_cv < cv.size(); ++i_cv)
+    cv[i_cv]->calc_value();
+
+  /*
+  if (use_gpu)
+    input_tensor = input_tensor.to(torch::kCPU);
+  */
+
+  // set input tensor with no_grad
+  {
+    torch::NoGradGuard no_grad;
+    size_t l = 0;
+    for (size_t i_cv = 0; i_cv < cv.size(); ++i_cv) {
+      const colvarvalue& current_cv_value = cv[i_cv]->value();
+      if (current_cv_value.type() == colvarvalue::type_scalar) {
+        input_tensor[0][l++] = cv[i_cv]->sup_coeff * (cvm::pow(current_cv_value.real_value, cv[i_cv]->sup_np));
+      } else {
+        for (size_t j_elem = 0; j_elem < current_cv_value.size(); ++j_elem)
+          input_tensor[0][l++] = cv[i_cv]->sup_coeff * current_cv_value[j_elem];
+      }
+    }
+  }
+
+  /*
+  if (use_gpu)
+    input_tensor = input_tensor.to(torch::kCUDA);
+  */
+
+  std::vector<torch::jit::IValue> inputs={input_tensor};
+
+  // evaluate the value of function
+  nn_outputs = nn.forward(inputs).toTensor()[0][m_output_index];
+
+  input_grad = torch::autograd::grad({nn_outputs}, {input_tensor})[0][0];
+
+  /*
+  if (use_gpu)
+    input_grad = input_grad.to(torch::kCPU);
+  */
+
+  x = nn_outputs.item<double>() ;
+
+  this->wrap(x);
+
+}
+
+void colvar::torchANN::calc_gradients() {
+  for (size_t i_cv = 0; i_cv < cv.size(); ++i_cv) {
+    cv[i_cv]->calc_gradients();
+    if (cv[i_cv]->is_enabled(f_cvc_explicit_gradient)) {
+      const cvm::real factor_polynomial = getPolynomialFactorOfCVGradient(i_cv);
+      // get the initial index of this cvc
+      size_t l = cvc_indices[i_cv];
+      for (size_t j_elem = 0; j_elem < cv[i_cv]->value().size(); ++j_elem) {
+        // get derivative of neural network wrt its input
+        const cvm::real factor = input_grad[l+j_elem].item<double>();
+        for (size_t k_ag = 0 ; k_ag < cv[i_cv]->atom_groups.size(); ++k_ag) {
+          for (size_t l_atom = 0; l_atom < (cv[i_cv]->atom_groups)[k_ag]->size(); ++l_atom) {
+            (*(cv[i_cv]->atom_groups)[k_ag])[l_atom].grad = factor_polynomial * factor * (*(cv[i_cv]->atom_groups)[k_ag])[l_atom].grad;
+          }
+        }
+      }
+    }
+  }
+}
+
+void colvar::torchANN::apply_force(colvarvalue const &force) {
+
+  for (size_t i_cv = 0; i_cv < cv.size(); ++i_cv) {
+    // If this CV uses explicit gradients, then atomic gradients is already calculated
+    // We can apply the force to atom groups directly
+    if (cv[i_cv]->is_enabled(f_cvc_explicit_gradient)) {
+      for (size_t k_ag = 0 ; k_ag < cv[i_cv]->atom_groups.size(); ++k_ag) {
+        (cv[i_cv]->atom_groups)[k_ag]->apply_colvar_force(force.real_value);
+      }
+    } else {
+      const colvarvalue& current_cv_value = cv[i_cv]->value();
+      colvarvalue cv_force(current_cv_value);
+      cv_force.reset();
+      const cvm::real factor_polynomial = getPolynomialFactorOfCVGradient(i_cv);
+      // get the initial index of this cvc
+      size_t l = cvc_indices[i_cv];
+      for (size_t j_elem = 0; j_elem < current_cv_value.size(); ++j_elem) {
+        cv_force[j_elem] = factor_polynomial * input_grad[l+j_elem].item<double>() * force.real_value;
+      }
+      cv[i_cv]->apply_force(cv_force);
+    }
+  }
+}
+
+
+#else
+
+colvar::torchANN::torchANN()
+{
+  set_function_type("torchANN");
+}
+
+colvar::torchANN::~torchANN() {}
+
+int colvar::torchANN::init(std::string const &conf) {
+
+  return cvm::error(
+          "torchANN requires the libtorch library, but it is not enabled during compilation.\n"
+          "Please refer to the Compilation Notes section of the Colvars manual for more "
+          "information.\n",
+          COLVARS_NOT_IMPLEMENTED);
+
+}
+
+void colvar::torchANN::calc_value()
+{
+}
+
+#endif
diff --git a/lib/colvars/colvarcomp_torchann.h b/lib/colvars/colvarcomp_torchann.h
new file mode 100644
index 0000000000..ae241edbcc
--- /dev/null
+++ b/lib/colvars/colvarcomp_torchann.h
@@ -0,0 +1,63 @@
+// -*- c++ -*-
+
+// This file is part of the Collective Variables module (Colvars).
+// The original version of Colvars and its updates are located at:
+// https://github.com/Colvars/colvars
+// Please update all Colvars source files before making any changes.
+// If you wish to distribute your changes, please submit them to the
+// Colvars repository at GitHub.
+//
+#ifndef COLVARCOMP_TORCH_H
+#define COLVARCOMP_TORCH_H
+
+// Declaration of torchann
+
+#include <memory>
+
+#include "colvar.h"
+#include "colvarcomp.h"
+#include "colvarmodule.h"
+
+#ifdef COLVARS_TORCH
+
+#include <torch/torch.h>
+#include <torch/script.h>
+
+class colvar::torchANN
+  : public colvar::linearCombination
+{
+protected:
+    torch::jit::script::Module nn;
+    /// the index of nn output component
+    size_t m_output_index = 0;
+    bool use_double_input = false;
+    //bool use_gpu;
+    // 1d tensor, concatenation of values of sub-cvcs
+    torch::Tensor input_tensor;
+    torch::Tensor nn_outputs;
+    torch::Tensor input_grad;
+    // record the initial index of of sub-cvcs in input_tensor
+    std::vector<int> cvc_indices;
+public:
+    torchANN();
+    virtual ~torchANN();
+    virtual int init(std::string const &conf);
+    virtual void calc_value();
+    virtual void calc_gradients();
+    virtual void apply_force(colvarvalue const &force);
+};
+
+#else
+
+class colvar::torchANN
+  : public colvar::cvc
+{
+public:
+    torchANN();
+    virtual ~torchANN();
+    virtual int init(std::string const &conf);
+    virtual void calc_value();
+};
+#endif // COLVARS_TORCH checking
+
+#endif
diff --git a/lib/colvars/colvardeps.cpp b/lib/colvars/colvardeps.cpp
index 46b7917569..3ba3209000 100644
--- a/lib/colvars/colvardeps.cpp
+++ b/lib/colvars/colvardeps.cpp
@@ -92,6 +92,8 @@ void colvardeps::restore_children_deps() {
 
 void colvardeps::provide(int feature_id, bool truefalse) {
   feature_states[feature_id].available = truefalse;
+  // Make sure that we don't leave this feature enabled
+  if (!truefalse) disable(feature_id);
 }
 
 
@@ -123,8 +125,9 @@ bool colvardeps::get_keyval_feature(colvarparse *cvp,
 
 
 int colvardeps::enable(int feature_id,
-                       bool dry_run /* default: false */,
-                       bool toplevel /* default: true */)
+                       bool dry_run  /* default: false */,
+                       bool toplevel /* default: true */,
+                       bool error    /*default: false */)
 {
   int res;
   size_t i, j;
@@ -137,9 +140,12 @@ int colvardeps::enable(int feature_id,
   feature *f = features()[feature_id];
   feature_state *fs = &feature_states[feature_id];
 
+  // dry_run can be true because parent object is not active, yet we are displaying an error message
+  // then error is set to true
+
   if (cvm::debug()) {
     cvm::log("DEPS: " + description +
-      (dry_run ? " testing " : " enabling ") +
+      (dry_run ? " testing " : " enabling ") +  (error ? " [error] " : "") +
       "\"" + f->description +"\"\n");
   }
 
@@ -159,7 +165,7 @@ int colvardeps::enable(int feature_id,
     (is_dynamic(feature_id) ? "Dynamic" : "User-controlled");
 
   if (!fs->available) {
-    if (!dry_run) {
+    if (!dry_run || error) {
       if (toplevel) {
         cvm::error("Error: " + feature_type_descr + " feature unavailable: \""
           + f->description + "\" in " + description + ".\n");
@@ -172,7 +178,7 @@ int colvardeps::enable(int feature_id,
   }
 
   if (!toplevel && !is_dynamic(feature_id)) {
-    if (!dry_run) {
+    if (!dry_run || error) {
       cvm::log(feature_type_descr + " feature \"" + f->description
         + "\" cannot be enabled automatically in " + description + ".\n");
       if (is_user(feature_id)) {
@@ -189,7 +195,7 @@ int colvardeps::enable(int feature_id,
     if (cvm::debug())
       cvm::log(f->description + " requires exclude " + g->description + "\n");
     if (is_enabled(f->requires_exclude[i])) {
-      if (!dry_run) {
+      if (!dry_run || error) {
         cvm::log("Feature \"" + f->description + "\" is incompatible with \""
         + g->description + "\" in " + description + ".\n");
         if (toplevel) {
@@ -204,10 +210,14 @@ int colvardeps::enable(int feature_id,
   for (i=0; i<f->requires_self.size(); i++) {
     if (cvm::debug())
       cvm::log(f->description + " requires self " + features()[f->requires_self[i]]->description + "\n");
-    res = enable(f->requires_self[i], dry_run, false);
+    res = enable(f->requires_self[i], dry_run, false, error);
     if (res != COLVARS_OK) {
-      if (!dry_run) {
-        cvm::log("...required by \"" + f->description + "\" in " + description + "\n");
+      if (!dry_run || error) {
+        if (toplevel) {
+          cvm::log("Cannot enable \"" + f->description + "\" in " + description + "\n");
+        } else {
+          cvm::log("...required by \"" + f->description + "\" in " + description + "\n");
+        }
         if (toplevel) {
           cvm::error("Error: Failed dependency in " + description + ".\n");
         }
@@ -225,11 +235,11 @@ int colvardeps::enable(int feature_id,
       int g = f->requires_alt[i][j];
       if (cvm::debug())
         cvm::log(f->description + " requires alt " + features()[g]->description + "\n");
-      res = enable(g, true, false);  // see if available
+      res = enable(g, true, false, error);  // see if available
       if (res == COLVARS_OK) {
         ok = true;
-        if (!dry_run) {
-          enable(g, false, false); // Require again, for real
+        if (!dry_run || error) {
+          enable(g, false, false, error); // Require again, for real
           fs->alternate_refs.push_back(g); // We remember we enabled this
           // so we can free it if this feature gets disabled
         }
@@ -245,7 +255,7 @@ int colvardeps::enable(int feature_id,
         for (j=0; j<f->requires_alt[i].size(); j++) {
           int g = f->requires_alt[i][j];
           cvm::log(cvm::to_str(j+1) + ". " + features()[g]->description + "\n");
-          enable(g, false, false); // Just for printing error output
+          enable(g, false, false, true); // Just for printing error output
         }
         cvm::decrease_depth();
         cvm::log("-----------------------------------------\n");
@@ -264,10 +274,14 @@ int colvardeps::enable(int feature_id,
   for (i=0; i<f->requires_children.size(); i++) {
     int g = f->requires_children[i];
     for (j=0; j<children.size(); j++) {
-      res = children[j]->enable(g, dry_run || !is_enabled(), false);
+      res = children[j]->enable(g, dry_run || !is_enabled(), false, error);
       if (res != COLVARS_OK) {
-        if (!dry_run) {
-          cvm::log("...required by \"" + f->description + "\" in " + description + "\n");
+        if (!dry_run || error) {
+          if (toplevel) {
+            cvm::log("Cannot enable \"" + f->description + "\" in " + description + "\n");
+          } else {
+            cvm::log("...required by \"" + f->description + "\" in " + description + "\n");
+          }
           if (toplevel) {
             cvm::error("Error: Failed dependency in " + description + ".\n");
           }
diff --git a/lib/colvars/colvardeps.h b/lib/colvars/colvardeps.h
index 1bd304b545..92e7a88326 100644
--- a/lib/colvars/colvardeps.h
+++ b/lib/colvars/colvardeps.h
@@ -198,7 +198,9 @@ public:
   /// \param toplevel False if this is called as part of a chain of dependency resolution.
   /// This is used to diagnose failed dependencies by displaying the full stack:
   /// only the toplevel dependency will throw a fatal error.
-  int enable(int f, bool dry_run = false, bool toplevel = true);
+  /// \param error Recursively enable, printing error messages along the way
+  /// Necessary when propagating errors across alternate dependencies
+  int enable(int f, bool dry_run = false, bool toplevel = true, bool error = false);
 
   /// Disable a feature, decrease the reference count of its dependencies
   /// and recursively disable them as applicable
@@ -255,6 +257,8 @@ public:
     f_cvb_scale_biasing_force,
     /// \brief whether this bias is applied to one or more ext-Lagrangian colvars
     f_cvb_extended,
+    /// Process this bias's data in parallel over multiple CPU threads
+    f_cvb_smp,
     f_cvb_ntot
   };
 
@@ -263,8 +267,11 @@ public:
     f_cv_active,
     /// \brief Colvar is awake (active on its own accord) this timestep
     f_cv_awake,
-    /// \brief Gradients are calculated and temporarily stored, so
-    /// that external forces can be applied
+    /// \brief External force can be applied, either to atoms or to an
+    /// extended DOF
+    f_cv_apply_force,
+    /// \brief Gradients are calculated and temporarily stored,
+    /// so that external forces can be propagated to atoms
     f_cv_gradient,
     /// \brief Collect atomic gradient data from all cvcs into vector
     /// atomic_gradient
@@ -277,7 +284,10 @@ public:
     /// forces on the inverse gradient
     f_cv_total_force,
     /// \brief Calculate total force from atomic forces
+    /// or get it from the back-end for an external parameter
     f_cv_total_force_calc,
+    /// \brief Total force is that of current time step
+    f_cv_total_force_current_step,
     /// \brief Subtract the applied force from the total force
     f_cv_subtract_applied_force,
     /// \brief Estimate Jacobian derivative
@@ -289,8 +299,10 @@ public:
     /// center with fictitious mass; bias forces will be applied to
     /// the center
     f_cv_extended_Lagrangian,
-    /// \brief An extended variable that sets an external variable in the
-    /// back-end (eg. an alchemical coupling parameter for lambda-dynamics)
+    /// \brief A variable that constrains or follows an external parameter
+    /// in the back-end (eg. an alchemical coupling parameter for lambda-dynamics)
+    /// If extended Lagrangian, then we drive the external parameter
+    /// Otherwise we follow it
     /// Can have a single component
     f_cv_external,
     /// \brief The extended system coordinate undergoes Langevin dynamics
diff --git a/lib/colvars/colvargrid.cpp b/lib/colvars/colvargrid.cpp
index 11693a7587..ad42966943 100644
--- a/lib/colvars/colvargrid.cpp
+++ b/lib/colvars/colvargrid.cpp
@@ -24,15 +24,14 @@ colvar_grid_count::colvar_grid_count()
   mult = 1;
 }
 
-colvar_grid_count::colvar_grid_count(std::vector<int> const &nx_i,
-                                     size_t const &def_count)
-  : colvar_grid<size_t>(nx_i, def_count, 1)
+colvar_grid_count::colvar_grid_count(std::vector<colvar *>  &colvars,
+                                     std::string config)
+  : colvar_grid<size_t>(colvars, 0, 1, false, nullptr, config)
 {}
 
 colvar_grid_count::colvar_grid_count(std::vector<colvar *>  &colvars,
-                                     size_t const &def_count,
-                                     bool margin)
-  : colvar_grid<size_t>(colvars, def_count, 1, margin)
+                                     std::shared_ptr<const colvar_grid_params> params)
+  : colvar_grid<size_t>(colvars, 0, 1, false, params)
 {}
 
 std::string colvar_grid_count::get_state_params() const
@@ -132,13 +131,17 @@ colvar_grid_scalar::colvar_grid_scalar(colvar_grid_scalar const &g)
 {
 }
 
-colvar_grid_scalar::colvar_grid_scalar(std::vector<int> const &nx_i)
-  : colvar_grid<cvm::real>(nx_i, 0.0, 1), samples(NULL)
+colvar_grid_scalar::colvar_grid_scalar(std::vector<colvar *> &colvars,
+                                       std::shared_ptr<const colvar_grid_params> params,
+                                       bool add_extra_bin,
+                                       std::string config)
+  : colvar_grid<cvm::real>(colvars, 0.0, 1, add_extra_bin, params, config), samples(NULL)
 {
 }
 
-colvar_grid_scalar::colvar_grid_scalar(std::vector<colvar *> &colvars, bool margin)
-  : colvar_grid<cvm::real>(colvars, 0.0, 1, margin), samples(NULL)
+colvar_grid_scalar::colvar_grid_scalar(std::string const &filename)
+  : colvar_grid<cvm::real>(filename, 1),
+    samples(nullptr)
 {
 }
 
@@ -330,89 +333,37 @@ cvm::real colvar_grid_scalar::grid_rmsd(colvar_grid_scalar const &other_grid) co
 
 
 colvar_grid_gradient::colvar_grid_gradient()
-  : colvar_grid<cvm::real>(), samples(NULL), full_samples(0), min_samples(0)
+  : colvar_grid<cvm::real>(), samples(NULL)
 {}
 
 
-colvar_grid_gradient::colvar_grid_gradient(std::vector<int> const &nx_i)
-  : colvar_grid<cvm::real>(nx_i, 0.0, nx_i.size()), samples(NULL), full_samples(0), min_samples(0)
-{}
+// colvar_grid_gradient::colvar_grid_gradient(std::vector<colvar *> &colvars, std::string config)
+//   : colvar_grid<cvm::real>(colvars, 0.0, colvars.size(), false, nullptr, config), samples(NULL)
+// {}
 
+// colvar_grid_gradient::colvar_grid_gradient(std::vector<colvar *> &colvars,
+//                                            std::shared_ptr<colvar_grid_count> samples_in)
+//   : colvar_grid<cvm::real>(colvars, 0.0, colvars.size(), false, samples_in), samples(samples_in)
+// {
+//   if (samples_in)
+//     samples_in->has_parent_data = true;
+// }
 
-colvar_grid_gradient::colvar_grid_gradient(std::vector<colvar *> &colvars)
-  : colvar_grid<cvm::real>(colvars, 0.0, colvars.size()), samples(NULL), full_samples(0), min_samples(0)
-{}
-
-
-colvar_grid_gradient::colvar_grid_gradient(std::vector<colvar *> &colvars, std::shared_ptr<colvar_grid_count> samples_in)
-  : colvar_grid<cvm::real>(colvars, 0.0, colvars.size()), samples(samples_in), full_samples(0), min_samples(0)
+colvar_grid_gradient::colvar_grid_gradient(std::vector<colvar *> &colvars,
+                                           std::shared_ptr<colvar_grid_count> samples_in,
+                                           std::shared_ptr<const colvar_grid_params> params,
+                                           std::string config)
+  : colvar_grid<cvm::real>(colvars, 0.0, colvars.size(), false, params, config), samples(samples_in)
 {
-  samples_in->has_parent_data = true;
+  if (samples_in)
+    samples_in->has_parent_data = true;
 }
 
 
-colvar_grid_gradient::colvar_grid_gradient(std::string &filename)
-  : colvar_grid<cvm::real>(),
-    samples(NULL)
+colvar_grid_gradient::colvar_grid_gradient(std::string const &filename)
+  : colvar_grid<cvm::real>(filename, 0),
+    samples(nullptr)
 {
-  std::istream &is = cvm::main()->proxy->input_stream(filename,
-                                                      "gradient file");
-  if (!is) {
-    return;
-  }
-
-  // Data in the header: nColvars, then for each
-  // xiMin, dXi, nPoints, periodic flag
-
-  std::string  hash;
-  size_t i;
-
-  if ( !(is >> hash) || (hash != "#") ) {
-    cvm::error("Error reading grid at position "+
-                cvm::to_str(static_cast<size_t>(is.tellg()))+
-                " in stream(read \"" + hash + "\")\n");
-    return;
-  }
-
-  is >> nd;
-
-  if (nd > 50) {
-    cvm::error("Error: excessive number of dimensions in file \""+
-               filename+"\".  Please ensure that the file is not corrupt.\n",
-               COLVARS_INPUT_ERROR);
-    return;
-  }
-
-  mult = nd;
-  std::vector<cvm::real> lower_in(nd), widths_in(nd);
-  std::vector<int>       nx_in(nd);
-  std::vector<int>       periodic_in(nd);
-
-  for (i = 0; i < nd; i++ ) {
-    if ( !(is >> hash) || (hash != "#") ) {
-      cvm::error("Error reading grid at position "+
-                  cvm::to_str(static_cast<size_t>(is.tellg()))+
-                  " in stream(read \"" + hash + "\")\n");
-      return;
-    }
-
-    is >> lower_in[i] >> widths_in[i] >> nx_in[i] >> periodic_in[i];
-  }
-
-  this->setup(nx_in, 0., mult);
-
-  widths = widths_in;
-
-  for (i = 0; i < nd; i++ ) {
-    lower_boundaries.push_back(colvarvalue(lower_in[i]));
-    periodic.push_back(static_cast<bool>(periodic_in[i]));
-  }
-
-  // Reset the istream for read_multicol, which expects the whole file
-  is.clear();
-  is.seekg(0);
-  read_multicol(is);
-  cvm::main()->proxy->close_input_stream(filename);
 }
 
 std::string colvar_grid_gradient::get_state_params() const
@@ -586,12 +537,13 @@ cvm::real colvar_grid_gradient::grid_rmsd(colvar_grid_gradient const &other_grid
 }
 
 
-integrate_potential::integrate_potential(std::vector<colvar *> &colvars, std::shared_ptr<colvar_grid_gradient> gradients)
-  : colvar_grid_scalar(colvars, true),
+integrate_potential::integrate_potential(std::vector<colvar *> &colvars,
+                                         std::shared_ptr<colvar_grid_gradient> gradients)
+  : colvar_grid_scalar(colvars, gradients, true),
     b_smoothed(false),
     gradients(gradients)
 {
-  // parent class colvar_grid_scalar is constructed with margin option set to true
+  // parent class colvar_grid_scalar is constructed with add_extra_bin option set to true
   // hence PMF grid is wider than gradient grid if non-PBC
 
   if (nd > 1) {
diff --git a/lib/colvars/colvargrid.h b/lib/colvars/colvargrid.h
index 4cbbb10961..697b46a560 100644
--- a/lib/colvars/colvargrid.h
+++ b/lib/colvars/colvargrid.h
@@ -19,17 +19,13 @@
 #include "colvarparse.h"
 
 
-/// \brief Grid of values of a function of several collective
-/// variables \param T The data type
-///
-/// Only scalar colvars supported so far: vector colvars are treated as arrays
-template <class T> class colvar_grid : public colvarparse {
-
-  //protected:
-public: // TODO create accessors for these after all instantiations work
+/// \brief Unified base class for grid of values of a function of several collective
+/// variables
+class colvar_grid_params  {
 
+public:
   /// Number of dimensions
-  size_t nd;
+  size_t nd = 0;
 
   /// Number of points along each dimension
   std::vector<int> nx;
@@ -37,6 +33,27 @@ public: // TODO create accessors for these after all instantiations work
   /// Cumulative number of points along each dimension
   std::vector<int> nxc;
 
+  /// Lower boundaries of the colvars in this grid
+  std::vector<colvarvalue>  lower_boundaries;
+
+  /// Upper boundaries of the colvars in this grid
+  std::vector<colvarvalue>  upper_boundaries;
+
+  /// Widths of the colvars in this grid
+  std::vector<cvm::real>    widths;
+};
+
+
+/// \brief Grid of values of a function of several collective
+/// variables \param T The data type
+///
+/// Only scalar colvars supported so far: vector colvars are treated as arrays
+/// All common, type-independent members are collected in the base class colvar_grid_base
+template <class T> class colvar_grid : public colvar_grid_params, public colvarparse {
+
+  //protected:
+public: // TODO create accessors for these after all instantiations work
+
   /// \brief Multiplicity of each datum (allow the binning of
   /// non-scalar types such as atomic gradients)
   size_t mult;
@@ -73,13 +90,6 @@ public: // TODO create accessors for these after all instantiations work
   }
 
 public:
-
-  /// Lower boundaries of the colvars in this grid
-  std::vector<colvarvalue> lower_boundaries;
-
-  /// Upper boundaries of the colvars in this grid
-  std::vector<colvarvalue> upper_boundaries;
-
   /// Whether some colvars are periodic
   std::vector<bool>        periodic;
 
@@ -89,9 +99,6 @@ public:
   /// Whether some colvars have hard upper boundaries
   std::vector<bool>        hard_upper_boundaries;
 
-  /// Widths of the colvars in this grid
-  std::vector<cvm::real>   widths;
-
   /// True if this is a count grid related to another grid of data
   bool has_parent_data;
 
@@ -218,19 +225,15 @@ public:
   /// \brief "Almost copy-constructor": only copies configuration
   /// parameters from another grid, but doesn't reallocate stuff;
   /// setup() must be called after that;
-  colvar_grid(colvar_grid<T> const &g) : colvarparse(),
-                                         nd(g.nd),
-                                         nx(g.nx),
+  colvar_grid(colvar_grid<T> const &g) : colvar_grid_params(colvar_grid_params(g)),
+                                         colvarparse(),
                                          mult(g.mult),
                                          data(),
                                          cv(g.cv),
                                          use_actual_value(g.use_actual_value),
-                                         lower_boundaries(g.lower_boundaries),
-                                         upper_boundaries(g.upper_boundaries),
                                          periodic(g.periodic),
                                          hard_lower_boundaries(g.hard_lower_boundaries),
                                          hard_upper_boundaries(g.hard_upper_boundaries),
-                                         widths(g.widths),
                                          has_parent_data(false),
                                          has_data(false)
   {}
@@ -247,22 +250,31 @@ public:
     this->setup(nx_i, t, mult_i);
   }
 
-  /// \brief Constructor from a vector of colvars
+  /// \brief Constructor from a vector of colvars or an optional grid config string
   /// \param add_extra_bin requests that non-periodic dimensions are extended
   /// by 1 bin to accommodate the integral (PMF) of another gridded quantity (gradient)
   colvar_grid(std::vector<colvar *> const &colvars,
               T const &t = T(),
               size_t mult_i = 1,
-              bool add_extra_bin = false)
+              bool add_extra_bin = false,
+              std::shared_ptr<const colvar_grid_params> params = nullptr,
+              std::string config = std::string())
     : has_parent_data(false), has_data(false)
   {
     (void) t;
-    this->init_from_colvars(colvars, mult_i, add_extra_bin);
+    this->init_from_colvars(colvars, mult_i, add_extra_bin, params, config);
   }
 
+  /// \brief Constructor from a multicol file
+  /// \param filename multicol file containing data to be read
+  /// \param multi_i multiplicity of the data - if 0, assume gradient multiplicity (mult = nd)
+  colvar_grid(std::string const &filename, size_t mult_i = 1);
+
   int init_from_colvars(std::vector<colvar *> const &colvars,
                         size_t mult_i = 1,
-                        bool add_extra_bin = false)
+                        bool add_extra_bin = false,
+                        std::shared_ptr<const colvar_grid_params> params = nullptr,
+                        std::string config = std::string())
   {
     if (cvm::debug()) {
       cvm::log("Reading grid configuration from collective variables.\n");
@@ -279,8 +291,7 @@ public:
                " collective variables, multiplicity = "+cvm::to_str(mult_i)+".\n");
     }
 
-    for (i =  0; i < cv.size(); i++) {
-
+    for (i =  0; i < nd; i++) {
       if (cv[i]->value().type() != colvarvalue::type_scalar) {
         cvm::error("Colvar grids can only be automatically "
                    "constructed for scalar variables.  "
@@ -298,7 +309,6 @@ public:
       widths.push_back(cv[i]->width);
       hard_lower_boundaries.push_back(cv[i]->is_enabled(colvardeps::f_cv_hard_lower_boundary));
       hard_upper_boundaries.push_back(cv[i]->is_enabled(colvardeps::f_cv_hard_upper_boundary));
-      periodic.push_back(cv[i]->periodic_boundaries());
 
       // By default, get reported colvar value (for extended Lagrangian colvars)
       use_actual_value.push_back(false);
@@ -310,22 +320,55 @@ public:
         use_actual_value[i-1] = true;
       }
 
+      // This needs to work if the boundaries are undefined in the colvars
+      lower_boundaries.push_back(cv[i]->lower_boundary);
+      upper_boundaries.push_back(cv[i]->upper_boundary);
+    }
+
+    // Replace widths and boundaries with optional custom configuration
+    if (!config.empty()) {
+      this->parse_params(config);
+      this->check_keywords(config, "grid");
+
+      if (params) {
+        cvm::error("Error: init_from_colvars was passed both a grid config and a template grid.", COLVARS_BUG_ERROR);
+        return COLVARS_BUG_ERROR;
+      }
+    } else if (params) {
+      // Match grid sizes with template
+
+      if (params->nd != nd) {
+        cvm::error("Trying to initialize grid from template with wrong dimension (" +
+                    cvm::to_str(params->nd) + " instead of " +
+                    cvm::to_str(this->nd) + ").");
+        return COLVARS_ERROR;
+      }
+
+      widths =params->widths;
+      lower_boundaries =params->lower_boundaries;
+      upper_boundaries =params->upper_boundaries;
+    }
+
+    // Only now can we determine periodicity
+    for (i =  0; i < nd; i++) {
+      periodic.push_back(cv[i]->periodic_boundaries(lower_boundaries[i].real_value,
+                                                    upper_boundaries[i].real_value));
+
       if (add_extra_bin) {
+        // Shift the grid by half the bin width (values at edges instead of center of bins)
+        lower_boundaries[i] -= 0.5 * widths[i];
+
         if (periodic[i]) {
-          // Shift the grid by half the bin width (values at edges instead of center of bins)
-          lower_boundaries.push_back(cv[i]->lower_boundary.real_value - 0.5 * widths[i]);
-          upper_boundaries.push_back(cv[i]->upper_boundary.real_value - 0.5 * widths[i]);
+          // Just shift
+          upper_boundaries[i] -= 0.5 * widths[i];
         } else {
-          // Make this grid larger by one bin width
-          lower_boundaries.push_back(cv[i]->lower_boundary.real_value - 0.5 * widths[i]);
-          upper_boundaries.push_back(cv[i]->upper_boundary.real_value + 0.5 * widths[i]);
+          // Widen grid by one bin width
+          upper_boundaries[i] += 0.5 * widths[i];
         }
-      } else {
-        lower_boundaries.push_back(cv[i]->lower_boundary);
-        upper_boundaries.push_back(cv[i]->upper_boundary);
       }
     }
 
+    // Reset grid sizes based on widths and boundaries
     this->init_from_boundaries();
     return this->setup();
   }
@@ -966,14 +1009,12 @@ public:
   virtual ~colvar_grid_count()
   {}
 
-  /// Constructor
-  colvar_grid_count(std::vector<int> const &nx_i,
-                    size_t const           &def_count = 0);
-
-  /// Constructor from a vector of colvars
+  /// Constructor from a vector of colvars or a config string
   colvar_grid_count(std::vector<colvar *>  &colvars,
-                    size_t const           &def_count = 0,
-                    bool                   add_extra_bin = false);
+                    std::shared_ptr<const colvar_grid_params> params = nullptr);
+
+  colvar_grid_count(std::vector<colvar *>  &colvars,
+                    std::string            config);
 
   /// Increment the counter at given position
   inline void incr_count(std::vector<int> const &ix)
@@ -1255,12 +1296,14 @@ public:
   /// Destructor
   virtual ~colvar_grid_scalar();
 
-  /// Constructor from specific sizes arrays
-  colvar_grid_scalar(std::vector<int> const &nx_i);
-
   /// Constructor from a vector of colvars
   colvar_grid_scalar(std::vector<colvar *> &colvars,
-                     bool add_extra_bin = false);
+                     std::shared_ptr<const colvar_grid_params> params = nullptr,
+                     bool add_extra_bin = false,
+                     std::string config = std::string());
+
+  /// Constructor from a multicol file
+  colvar_grid_scalar(std::string const &filename);
 
   /// Accumulate the value
   inline void acc_value(std::vector<int> const &ix,
@@ -1334,8 +1377,8 @@ public:
 
   /// \brief Return the gradient of the scalar field from finite differences
   /// Input coordinates are those of gradient grid, shifted wrt scalar grid
-  /// Should not be called on edges of scalar grid, provided the latter has margins
-  /// wrt gradient grid
+  /// Should not be called on edges of scalar grid, provided the latter has
+  /// margins (extra bins) wrt gradient grid
   inline void vector_gradient_finite_diff( const std::vector<int> &ix0, std::vector<cvm::real> &grad)
   {
     cvm::real A0, A1;
@@ -1566,17 +1609,21 @@ public:
   virtual ~colvar_grid_gradient()
   {}
 
-  /// Constructor from specific sizes arrays
-  colvar_grid_gradient(std::vector<int> const &nx_i);
+  // /// Constructor from specific sizes arrays
+  // colvar_grid_gradient(std::vector<int> const &nx_i);
 
-  /// Constructor from a vector of colvars
-  colvar_grid_gradient(std::vector<colvar *>  &colvars);
+  // /// Constructor from a vector of colvars
+  // colvar_grid_gradient(std::vector<colvar *>  &colvars,
+  //                      std::string config = std::string());
 
   /// Constructor from a multicol file
-  colvar_grid_gradient(std::string &filename);
+  colvar_grid_gradient(std::string const &filename);
 
   /// Constructor from a vector of colvars and a pointer to the count grid
-  colvar_grid_gradient(std::vector<colvar *> &colvars, std::shared_ptr<colvar_grid_count> samples_in);
+  colvar_grid_gradient(std::vector<colvar *> &colvars,
+                       std::shared_ptr<colvar_grid_count> samples_in = nullptr,
+                       std::shared_ptr<const colvar_grid_params> params = nullptr,
+                       std::string config = std::string());
 
   /// Parameters for smoothing data with low sampling
   int full_samples;
@@ -1829,7 +1876,8 @@ class integrate_potential : public colvar_grid_scalar
   {}
 
   /// Constructor from a vector of colvars + gradient grid
-  integrate_potential(std::vector<colvar *> &colvars, std::shared_ptr<colvar_grid_gradient> gradients);
+  integrate_potential(std::vector<colvar *> &colvars,
+                      std::shared_ptr<colvar_grid_gradient> gradients);
 
   /// Constructor from a gradient grid (for processing grid files without a Colvars config)
   integrate_potential(std::shared_ptr<colvar_grid_gradient> gradients);
diff --git a/lib/colvars/colvargrid_def.h b/lib/colvars/colvargrid_def.h
index fa6531271b..96075e1ffe 100644
--- a/lib/colvars/colvargrid_def.h
+++ b/lib/colvars/colvargrid_def.h
@@ -22,6 +22,62 @@
 #include "colvars_memstream.h"
 
 
+template <class T>
+colvar_grid<T>::colvar_grid(std::string const &filename, size_t mult_i)
+{
+std::istream &is = cvm::main()->proxy->input_stream(filename, "multicol grid file");
+if (!is) {
+  return;
+}
+
+// Data in the header: nColvars, then for each
+// xiMin, dXi, nPoints, periodic flag
+
+std::string  hash;
+size_t i;
+
+if ( !(is >> hash) || (hash != "#") ) {
+  cvm::error("Error reading grid at position "+
+              cvm::to_str(static_cast<size_t>(is.tellg()))+
+              " in stream(read \"" + hash + "\")\n");
+  return;
+}
+
+is >> nd;
+mult = (mult_i == 0) ? nd : mult_i;
+
+std::vector<cvm::real> lower_in(nd), widths_in(nd);
+std::vector<int>       nx_in(nd);
+std::vector<int>       periodic_in(nd);
+
+for (i = 0; i < nd; i++ ) {
+  if ( !(is >> hash) || (hash != "#") ) {
+    cvm::error("Error reading grid at position "+
+                cvm::to_str(static_cast<size_t>(is.tellg()))+
+                " in stream(read \"" + hash + "\")\n");
+    return;
+  }
+
+  is >> lower_in[i] >> widths_in[i] >> nx_in[i] >> periodic_in[i];
+}
+
+this->setup(nx_in, 0., mult);
+
+widths = widths_in;
+
+for (i = 0; i < nd; i++ ) {
+  lower_boundaries.push_back(colvarvalue(lower_in[i]));
+  periodic.push_back(static_cast<bool>(periodic_in[i]));
+}
+
+// Reset the istream for read_multicol, which expects the whole file
+is.clear();
+is.seekg(0);
+read_multicol(is);
+cvm::main()->proxy->close_input_stream(filename);
+}
+
+
 template <class T, class IST> IST &read_restart_template_(colvar_grid<T> &g, IST &is)
 {
   auto const start_pos = is.tellg();
@@ -203,14 +259,16 @@ template <class T> int colvar_grid<T>::parse_params(std::string const &conf,
                           lower_boundaries, lower_boundaries, colvarparse::parse_silent);
   colvarparse::get_keyval(conf, "upper_boundaries",
                           upper_boundaries, upper_boundaries, colvarparse::parse_silent);
+  // plural form is used in state file
+  colvarparse::get_keyval(conf, "widths", widths, widths, colvarparse::parse_silent);
 
   // camel case keywords are used in config file
-  colvarparse::get_keyval(conf, "lowerBoundaries",
+  colvarparse::get_keyval(conf, "lowerBoundary",
                           lower_boundaries, lower_boundaries, parse_mode);
-  colvarparse::get_keyval(conf, "upperBoundaries",
+  colvarparse::get_keyval(conf, "upperBoundary",
                           upper_boundaries, upper_boundaries, parse_mode);
 
-  colvarparse::get_keyval(conf, "widths", widths, widths, parse_mode);
+  colvarparse::get_keyval(conf, "width", widths, widths, parse_mode);
 
   // only used in state file
   colvarparse::get_keyval(conf, "sizes", nx, nx, colvarparse::parse_silent);
diff --git a/lib/colvars/colvarmodule.cpp b/lib/colvars/colvarmodule.cpp
index 25b1efe209..34485d7883 100644
--- a/lib/colvars/colvarmodule.cpp
+++ b/lib/colvars/colvarmodule.cpp
@@ -24,6 +24,7 @@
 #include "colvarbias_histogram_reweight_amd.h"
 #include "colvarbias_meta.h"
 #include "colvarbias_restraint.h"
+#include "colvarbias_opes.h"
 #include "colvarscript.h"
 #include "colvaratoms.h"
 #include "colvarcomp.h"
@@ -109,23 +110,23 @@ colvarmodule::colvarmodule(colvarproxy *proxy_in)
            "  https://doi.org/10.1080/00268976.2013.813594\n"
            "as well as all other papers listed below for individual features used.\n");
 
-#if (__cplusplus >= 201103L)
-  cvm::log("This version was built with the C++11 standard or higher.\n");
-#else
-  cvm::log("This version was built without the C++11 standard: some features are disabled.\n"
-    "Please see the following link for details:\n"
-    "  https://colvars.github.io/README-c++11.html\n");
-#endif
-
   cvm::log("Summary of compile-time features available in this build:\n");
 
-  if (proxy->check_smp_enabled() == COLVARS_NOT_IMPLEMENTED) {
-    cvm::log("  - SMP parallelism: not available\n");
+  std::string cxx_lang_msg("  - C++ language version: " + cvm::to_str(__cplusplus));
+#if defined(_WIN32) && !defined(__CYGWIN__)
+  cxx_lang_msg += std::string(" (warning: may not be accurate for this build)");
+#endif
+  cxx_lang_msg += std::string("\n");
+  cvm::log(cxx_lang_msg);
+
+  if (proxy->check_replicas_enabled() == COLVARS_NOT_IMPLEMENTED) {
+    cvm::log("  - Multiple replicas: not available\n");
   } else {
-    if (proxy->check_smp_enabled() == COLVARS_OK) {
-      cvm::log("  - SMP parallelism: enabled (num. threads = " + to_str(proxy->smp_num_threads()) + ")\n");
+    if (proxy->check_replicas_enabled() == COLVARS_OK) {
+      cvm::log("  - Multiple replicas: enabled (replica number " +
+               to_str(proxy->replica_index() + 1) + " of " + to_str(proxy->num_replicas()) + ")\n");
     } else {
-      cvm::log("  - SMP parallelism: available, but not enabled\n");
+      cvm::log("  - Multiple replicas: available, but not (yet) enabled\n");
     }
   }
 
@@ -201,6 +202,20 @@ std::vector<int> *colvarmodule::variables_active_smp_items()
 }
 
 
+int colvarmodule::calc_component_smp(int i)
+{
+  colvar *x = (*(variables_active_smp()))[i];
+  int x_item = (*(variables_active_smp_items()))[i];
+  if (cvm::debug()) {
+    cvm::log("Thread "+cvm::to_str(proxy->smp_thread_id())+"/"+
+             cvm::to_str(proxy->smp_num_threads())+
+             ": calc_component_smp(), i = "+cvm::to_str(i)+", cv = "+
+             x->name+", cvc = "+cvm::to_str(x_item)+"\n");
+  }
+  return x->calc_cvcs(x_item, 1);
+}
+
+
 std::vector<colvarbias *> *colvarmodule::biases_active()
 {
   return &(biases_active_);
@@ -387,8 +402,26 @@ int colvarmodule::parse_global_params(std::string const &conf)
     }
   }
 
-  if (parse->get_keyval(conf, "smp", proxy->b_smp_active, proxy->b_smp_active)) {
-    if (proxy->b_smp_active == false) {
+  std::string smp;
+  if (parse->get_keyval(conf, "smp", smp, "cvcs")) {
+    if (smp == "cvcs" || smp == "on" || smp == "yes") {
+      if (proxy->set_smp_mode(colvarproxy_smp::smp_mode_t::cvcs) != COLVARS_OK) {
+        cvm::error("Colvars component-based parallelism is not implemented.\n");
+        return COLVARS_INPUT_ERROR;
+      } else {
+        cvm::log("SMP parallelism will be applied to Colvars components.\n");
+        cvm::log("  - SMP parallelism: enabled (num. threads = " + to_str(proxy->smp_num_threads()) + ")\n");
+      }
+    } else if (smp == "inner_loop") {
+      if (proxy->set_smp_mode(colvarproxy_smp::smp_mode_t::inner_loop) != COLVARS_OK) {
+        cvm::error("SMP parallelism inside the calculation of Colvars components is not implemented.\n");
+        return COLVARS_INPUT_ERROR;
+      } else {
+        cvm::log("SMP parallelism will be applied inside the Colvars components.\n");
+      cvm::log("  - SMP parallelism: enabled (num. threads = " + to_str(proxy->smp_num_threads()) + ")\n");
+      }
+    } else {
+      proxy->set_smp_mode(colvarproxy_smp::smp_mode_t::none);
       cvm::log("SMP parallelism has been disabled.\n");
     }
   }
@@ -589,6 +622,9 @@ int colvarmodule::parse_biases(std::string const &conf)
   /// initialize reweightaMD instances
   parse_biases_type<colvarbias_reweightaMD>(conf, "reweightaMD");
 
+  /// initialize OPES instances
+  parse_biases_type<colvarbias_opes>(conf, "opes_metad");
+
   if (use_scripted_forces) {
     cvm::log(cvm::line_marker);
     cvm::increase_depth();
@@ -922,7 +958,7 @@ int colvarmodule::calc_colvars()
   }
 
   // if SMP support is available, split up the work
-  if (proxy->check_smp_enabled() == COLVARS_OK) {
+  if (proxy->get_smp_mode() == colvarproxy_smp::smp_mode_t::cvcs) {
 
     // first, calculate how much work (currently, how many active CVCs) each colvar has
 
@@ -948,8 +984,10 @@ int colvarmodule::calc_colvars()
     }
     cvm::decrease_depth();
 
-    // calculate colvar components in parallel
-    error_code |= proxy->smp_colvars_loop();
+    // calculate active colvar components in parallel
+    error_code |= proxy->smp_loop(variables_active_smp()->size(), [](int i) {
+        return cvm::main()->calc_component_smp(i);
+      });
 
     cvm::increase_depth();
     for (cvi = variables_active()->begin(); cvi != variables_active()->end(); cvi++) {
@@ -1013,7 +1051,7 @@ int colvarmodule::calc_biases()
   }
 
   // If SMP support is available, split up the work (unless biases need to use main thread's memory)
-  if (proxy->check_smp_enabled() == COLVARS_OK && !biases_need_main_thread) {
+  if (proxy->get_smp_mode() == colvarproxy::smp_mode_t::cvcs && !biases_need_main_thread) {
 
     if (use_scripted_forces && !scripting_after_biases) {
       // calculate biases and scripted forces in parallel
@@ -1097,7 +1135,7 @@ int colvarmodule::update_colvar_forces()
     cvm::log("Communicating forces from the colvars to the atoms.\n");
   cvm::increase_depth();
   for (cvi = variables_active()->begin(); cvi != variables_active()->end(); cvi++) {
-    if ((*cvi)->is_enabled(colvardeps::f_cv_gradient)) {
+    if ((*cvi)->is_enabled(colvardeps::f_cv_apply_force)) {
       (*cvi)->communicate_forces();
       if (cvm::get_error()) {
         return COLVARS_ERROR;
@@ -1986,7 +2024,7 @@ size_t & colvarmodule::depth()
 {
   // NOTE: do not call log() or error() here, to avoid recursion
   colvarmodule *cv = cvm::main();
-  if (proxy->check_smp_enabled() == COLVARS_OK) {
+  if (proxy->get_smp_mode() == colvarproxy::smp_mode_t::cvcs) {
     int const nt = proxy->smp_num_threads();
     if (int(cv->depth_v.size()) != nt) {
       proxy->smp_lock();
diff --git a/lib/colvars/colvarmodule.h b/lib/colvars/colvarmodule.h
index fa84b1ad75..5f042767dc 100644
--- a/lib/colvars/colvarmodule.h
+++ b/lib/colvars/colvarmodule.h
@@ -18,6 +18,11 @@
 #define COLVARS_DEBUG false
 #endif
 
+#if defined(__FAST_MATH__)
+// NOTE: This is used for fixing https://github.com/Colvars/colvars/issues/767
+#define COLVARS_BOUNDED_INV_TRIGONOMETRIC_FUNC
+#endif
+
 /*! \mainpage Main page
 This is the Developer's documentation for the Collective Variables module (Colvars).
 
@@ -147,17 +152,44 @@ public:
     return ::cos(static_cast<double>(x));
   }
 
-  /// Reimplemented to work around MS compiler issues
-  static inline real asin(real const &x)
-  {
-    return ::asin(static_cast<double>(x));
-  }
+#ifndef PI
+#define PI   3.14159265358979323846
+#endif
+#ifndef PI_2
+#define PI_2 1.57079632679489661923
+#endif
 
-  /// Reimplemented to work around MS compiler issues
-  static inline real acos(real const &x)
-  {
+/// Reimplemented to work around compiler issues; return hard-coded values for boundary conditions
+static inline real asin(real const &x)
+{
+#ifdef COLVARS_BOUNDED_INV_TRIGONOMETRIC_FUNC
+    if (x <= -1.0) {
+        return -PI_2;
+    } else if (x >= 1.0) {
+        return PI_2;
+    } else {
+        return ::asin(static_cast<double>(x));
+    }
+#else
+    return ::asin(static_cast<double>(x));
+#endif
+}
+
+/// Reimplemented to work around compiler issues; return hard-coded values for boundary conditions
+static inline real acos(real const &x)
+{
+#ifdef COLVARS_BOUNDED_INV_TRIGONOMETRIC_FUNC
+    if (x <= -1.0) {
+        return PI;
+    } else if (x >= 1.0) {
+        return 0.0;
+    } else {
+        return ::acos(static_cast<double>(x));
+    }
+#else
     return ::acos(static_cast<double>(x));
-  }
+#endif
+}
 
   /// Reimplemented to work around MS compiler issues
   static inline real atan2(real const &x, real const &y)
@@ -307,6 +339,9 @@ public:
   /// Indexes of the items to calculate for each colvar
   std::vector<int> *variables_active_smp_items();
 
+  /// Calculate the value of the specified component (to be called in a SMP loop)
+  int calc_component_smp(int i);
+
   /// Array of collective variable biases
   std::vector<colvarbias *> biases;
 
diff --git a/lib/colvars/colvarmodule_refs.h b/lib/colvars/colvarmodule_refs.h
index 2e9615e3b4..0317567502 100644
--- a/lib/colvars/colvarmodule_refs.h
+++ b/lib/colvars/colvarmodule_refs.h
@@ -129,6 +129,23 @@
     "  url = {https://doi.org/10.1002/jcc.26075}\n"
     "}\n";
 
+  paper_count_[std::string("Fiorin2024")] = 0;
+  paper_url_[std::string("Fiorin2024")] = "https://doi.org/10.1021/acs.jpcb.4c05604";
+  paper_bibtex_[std::string("Fiorin2024")] =
+    "\n"
+    "@article{Fiorin2024,\n"
+    "  author = {Fiorin, Giacomo and Marinelli, Fabrizio and Forrest, Lucy R. and Chen, Haochuan and Chipot, Christophe and Kohlmeyer, Axel and Santuz, Hubert and H{\\'e}nin, J{\\'e}rôme},\n"
+    "  title = {Expanded Functionality and Portability for the Colvars Library},\n"
+    "  journal = {J. Phys. Chem. {B}},\n"
+    "  volume = {128},\n"
+    "  number = {45},\n"
+    "  pages = {11108--11123},\n"
+    "  year = {2024},\n"
+    "  doi = {10.1021/acs.jpcb.4c05604},\n"
+    "  pmid = 39501453,\n"
+    "  url = { https://doi.org/10.1021/acs.jpcb.4c05604}\n"
+    "}\n";
+
   paper_count_[std::string("Fu2016")] = 0;
   paper_url_[std::string("Fu2016")] = "https://doi.org/10.1021/acs.jctc.6b00447";
   paper_bibtex_[std::string("Fu2016")] =
@@ -227,6 +244,20 @@
     "  url = {https://doi.org/10.1016/0263-7855(96)00018-5}\n"
     "}\n";
 
+  paper_count_[std::string("Lagardere2023")] = 0;
+  paper_url_[std::string("Lagardere2023")] = "https://arxiv.org/abs/2307.08006";
+  paper_bibtex_[std::string("Lagardere2023")] =
+    "\n"
+    "@misc{Lagardere2023,\n"
+    "      title={Lambda-ABF: Simplified, Accurate and Cost-effective Alchemical Free Energy Computations},\n"
+    "      author={Louis Lagard\\`ere and Lise Maurin and Olivier Adjoua and Krystel El Hage and Pierre Monmarch\\'e and Jean-Philip Piquemal and J\\'er\\^ome H\\'enin},\n"
+    "      year={2023},\n"
+    "      eprint={2307.08006},\n"
+    "      archivePrefix={arXiv},\n"
+    "      primaryClass={physics.chem-ph},\n"
+    "      url = {https://arxiv.org/abs/2307.08006}\n"
+    "}\n";
+
   paper_count_[std::string("Lesage2017")] = 0;
   paper_url_[std::string("Lesage2017")] = "https://doi.org/10.1021/acs.jpcb.6b10055";
   paper_bibtex_[std::string("Lesage2017")] =
@@ -344,6 +375,45 @@
     "  url = {https://doi.org/10.1021/ct500320c}\n"
     "}\n";
 
+  paper_count_[std::string("Invernizzi2020")] = 0;
+  paper_url_[std::string("Invernizzi2020")] = "https://pubs.acs.org/doi/10.1021/acs.jpclett.0c00497";
+  paper_bibtex_[std::string("Invernizzi2020")] =
+    "\n"
+    "@article{Invernizzi2020,\n"
+    "	title = {Rethinking {Metadynamics}: {From} {Bias} {Potentials} to {Probability} {Distributions}},\n"
+    "	volume = {11},\n"
+    "	issn = {1948-7185, 1948-7185},\n"
+    "	shorttitle = {Rethinking {Metadynamics}},\n"
+    "	url = {https://pubs.acs.org/doi/10.1021/acs.jpclett.0c00497},\n"
+    "	doi = {10.1021/acs.jpclett.0c00497},\n"
+    "	number = {7},\n"
+    "	urldate = {2020-09-30},\n"
+    "	journal = {J. Phys. Chem. Lett.},\n"
+    "	author = {Invernizzi, Michele and Parrinello, Michele},\n"
+    "	month = apr,\n"
+    "	year = {2020},\n"
+    "	pages = {2731--2736},\n"
+    "}\n";
+
+  paper_count_[std::string("Invernizzi2022")] = 0;
+  paper_url_[std::string("Invernizzi2022")] = "https://doi.org/10.1021/acs.jctc.2c00152";
+  paper_bibtex_[std::string("Invernizzi2022")] =
+    "\n"
+    "@article{Invernizzi2022,\n"
+    "	title = {Exploration vs {Convergence} {Speed} in {Adaptive}-{Bias} {Enhanced} {Sampling}},\n"
+    "	volume = {18},\n"
+    "	issn = {1549-9618},\n"
+    "	url = {https://doi.org/10.1021/acs.jctc.2c00152},\n"
+    "	doi = {10.1021/acs.jctc.2c00152},\n"
+    "	number = {6},\n"
+    "	urldate = {2024-07-02},\n"
+    "	journal = {J. Chem. Theory Comput.},\n"
+    "	author = {Invernizzi, Michele and Parrinello, Michele},\n"
+    "	month = jun,\n"
+    "	year = {2022},\n"
+    "	pages = {3988--3996},\n"
+    "}\n";
+
   paper_count_[std::string("n/a")] = 0;
   paper_url_[std::string("n/a")] = "";
   paper_bibtex_[std::string("n/a")] = "";
@@ -489,6 +559,42 @@
   feature_count_[std::string("Multi-Map collective variables")] = 0;
   feature_paper_map_[std::string("Multi-Map collective variables")] = "Fiorin2020";
 
+  feature_count_[std::string("Colvars-GROMACS interface")] = 0;
+  feature_paper_map_[std::string("Colvars-GROMACS interface")] = "Fiorin2024";
+
+  feature_count_[std::string("gspath colvar component")] = 0;
+  feature_paper_map_[std::string("gspath colvar component")] = "Fiorin2024";
+
+  feature_count_[std::string("gzpath colvar component")] = 0;
+  feature_paper_map_[std::string("gzpath colvar component")] = "Fiorin2024";
+
+  feature_count_[std::string("linearCombination colvar component")] = 0;
+  feature_paper_map_[std::string("linearCombination colvar component")] = "Fiorin2024";
+
+  feature_count_[std::string("gspathCV colvar component")] = 0;
+  feature_paper_map_[std::string("gspathCV colvar component")] = "Fiorin2024";
+
+  feature_count_[std::string("gzpathCV colvar component")] = 0;
+  feature_paper_map_[std::string("gzpathCV colvar component")] = "Fiorin2024";
+
+  feature_count_[std::string("aspathCV colvar component")] = 0;
+  feature_paper_map_[std::string("aspathCV colvar component")] = "Fiorin2024";
+
+  feature_count_[std::string("azpathCV colvar component")] = 0;
+  feature_paper_map_[std::string("azpathCV colvar component")] = "Fiorin2024";
+
+  feature_count_[std::string("Custom functions (Lepton)")] = 0;
+  feature_paper_map_[std::string("Custom functions (Lepton)")] = "Fiorin2024";
+
+  feature_count_[std::string("Scripted functions (Tcl)")] = 0;
+  feature_paper_map_[std::string("Scripted functions (Tcl)")] = "Fiorin2024";
+
+  feature_count_[std::string("ABMD bias")] = 0;
+  feature_paper_map_[std::string("ABMD bias")] = "Fiorin2024";
+
+  feature_count_[std::string("Updated multiple-walker ABF implementation")] = 0;
+  feature_paper_map_[std::string("Updated multiple-walker ABF implementation")] = "Fiorin2024";
+
   feature_count_[std::string("Umbrella-integration eABF estimator")] = 0;
   feature_paper_map_[std::string("Umbrella-integration eABF estimator")] = "Fu2016";
 
@@ -525,6 +631,15 @@
   feature_count_[std::string("VMD engine")] = 0;
   feature_paper_map_[std::string("VMD engine")] = "Humphrey1996";
 
+  feature_count_[std::string("alchLambda colvar component")] = 0;
+  feature_paper_map_[std::string("alchLambda colvar component")] = "Lagardere2023";
+
+  feature_count_[std::string("alchFLambda colvar component")] = 0;
+  feature_paper_map_[std::string("alchFLambda colvar component")] = "Lagardere2023";
+
+  feature_count_[std::string("Tinker-HP interface")] = 0;
+  feature_paper_map_[std::string("Tinker-HP interface")] = "Lagardere2023";
+
   feature_count_[std::string("eABF implementation")] = 0;
   feature_paper_map_[std::string("eABF implementation")] = "Lesage2017";
 
@@ -555,38 +670,14 @@
   feature_count_[std::string("ALB colvar bias implementation")] = 0;
   feature_paper_map_[std::string("ALB colvar bias implementation")] = "White2014";
 
-  feature_count_[std::string("Colvars-GROMACS interface")] = 0;
-  feature_paper_map_[std::string("Colvars-GROMACS interface")] = "n/a";
+  feature_count_[std::string("OPES")] = 0;
+  feature_paper_map_[std::string("OPES")] = "Invernizzi2020";
 
-  feature_count_[std::string("gspath colvar component")] = 0;
-  feature_paper_map_[std::string("gspath colvar component")] = "n/a";
-
-  feature_count_[std::string("gzpath colvar component")] = 0;
-  feature_paper_map_[std::string("gzpath colvar component")] = "n/a";
-
-  feature_count_[std::string("linearCombination colvar component")] = 0;
-  feature_paper_map_[std::string("linearCombination colvar component")] = "n/a";
-
-  feature_count_[std::string("gspathCV colvar component")] = 0;
-  feature_paper_map_[std::string("gspathCV colvar component")] = "n/a";
-
-  feature_count_[std::string("gzpathCV colvar component")] = 0;
-  feature_paper_map_[std::string("gzpathCV colvar component")] = "n/a";
-
-  feature_count_[std::string("aspathCV colvar component")] = 0;
-  feature_paper_map_[std::string("aspathCV colvar component")] = "n/a";
-
-  feature_count_[std::string("azpathCV colvar component")] = 0;
-  feature_paper_map_[std::string("azpathCV colvar component")] = "n/a";
+  feature_count_[std::string("OPES explore or adaptive kernels")] = 0;
+  feature_paper_map_[std::string("OPES explore or adaptive kernels")] = "Invernizzi2022";
 
   feature_count_[std::string("coordNum pairlist")] = 0;
   feature_paper_map_[std::string("coordNum pairlist")] = "n/a";
 
-  feature_count_[std::string("Custom functions (Lepton)")] = 0;
-  feature_paper_map_[std::string("Custom functions (Lepton)")] = "n/a";
-
-  feature_count_[std::string("Scripted functions (Tcl)")] = 0;
-  feature_paper_map_[std::string("Scripted functions (Tcl)")] = "n/a";
-
-  feature_count_[std::string("ABMD bias")] = 0;
-  feature_paper_map_[std::string("ABMD bias")] = "n/a";
+  feature_count_[std::string("torchANN colvar component")] = 0;
+  feature_paper_map_[std::string("torchANN colvar component")] = "n/a";
diff --git a/lib/colvars/colvarparse.cpp b/lib/colvars/colvarparse.cpp
index 76b5c694c1..cf3096ba50 100644
--- a/lib/colvars/colvarparse.cpp
+++ b/lib/colvars/colvarparse.cpp
@@ -592,7 +592,7 @@ int colvarparse::check_keywords(std::string &conf, char const *key)
 {
   if (cvm::debug())
     cvm::log("Configuration string for \""+std::string(key)+
-             "\": \"\n"+conf+"\".\n");
+             "\":\n\""+conf+"\".\n");
 
   strip_values(conf);
   // after stripping, the config string has either empty lines, or
@@ -833,7 +833,8 @@ bool colvarparse::key_lookup(std::string const &conf,
                                        data_end) + 1;
     }
 
-    if (data != NULL) {
+    // data_end < data_begin means that the data or block contains only whitespace
+    if (data != NULL && data_end > data_begin) {
       data->append(line, data_begin, (data_end-data_begin));
 
       if (cvm::debug()) {
diff --git a/lib/colvars/colvarproxy.cpp b/lib/colvars/colvarproxy.cpp
index 588b7c68d8..1ed7a55552 100644
--- a/lib/colvars/colvarproxy.cpp
+++ b/lib/colvars/colvarproxy.cpp
@@ -243,7 +243,7 @@ void colvarproxy_atom_groups::compute_max_atom_groups_applied_force()
 
 colvarproxy_smp::colvarproxy_smp()
 {
-  b_smp_active = true; // May be disabled by user option
+  smp_mode = smp_mode_t::cvcs; // May be disabled by user option
   omp_lock_state = NULL;
 #if defined(_OPENMP)
   if (omp_get_thread_num() == 0) {
@@ -265,41 +265,45 @@ colvarproxy_smp::~colvarproxy_smp()
 #endif
 }
 
-
-int colvarproxy_smp::check_smp_enabled()
-{
+colvarproxy::smp_mode_t colvarproxy_smp::get_smp_mode() const {
 #if defined(_OPENMP)
-  if (b_smp_active) {
-    return COLVARS_OK;
-  }
-  return COLVARS_ERROR;
+  return smp_mode;
 #else
-  return COLVARS_NOT_IMPLEMENTED;
+  return colvarproxy::smp_mode_t::none;
+#endif
+}
+
+int colvarproxy_smp::set_smp_mode(smp_mode_t mode) {
+#if defined(_OPENMP)
+  smp_mode = mode;
+  return COLVARS_OK;
+#else
+  if (mode != colvarproxy::smp_mode_t::none) {
+    return COLVARS_NOT_IMPLEMENTED;
+  } else {
+    smp_mode = colvarproxy::smp_mode_t::none;
+  }
+  return COLVARS_OK;
 #endif
 }
 
 
-int colvarproxy_smp::smp_colvars_loop()
+int colvarproxy_smp::smp_loop(int n_items, std::function<int (int)> const &worker)
 {
+  int error_code = COLVARS_OK;
 #if defined(_OPENMP)
-  colvarmodule *cv = cvm::main();
-  colvarproxy *proxy = cv->proxy;
+  cvm::increase_depth();
 #pragma omp parallel for
-  for (int i = 0; i < static_cast<int>(cv->variables_active_smp()->size()); i++) {
-    colvar *x = (*(cv->variables_active_smp()))[i];
-    int x_item = (*(cv->variables_active_smp_items()))[i];
-    if (cvm::debug()) {
-      cvm::log("["+cvm::to_str(proxy->smp_thread_id())+"/"+
-               cvm::to_str(proxy->smp_num_threads())+
-               "]: calc_colvars_items_smp(), i = "+cvm::to_str(i)+", cv = "+
-               x->name+", cvc = "+cvm::to_str(x_item)+"\n");
-    }
-    x->calc_cvcs(x_item, 1);
+  for (int i = 0; i < n_items; i++) {
+    int const retcode = worker(i);
+#pragma omp atomic
+    error_code |= retcode;
   }
-  return cvm::get_error();
+  cvm::decrease_depth();
 #else
-  return COLVARS_NOT_IMPLEMENTED;
+  error_code |= COLVARS_NOT_IMPLEMENTED;
 #endif
+  return error_code;
 }
 
 
@@ -470,8 +474,8 @@ colvarproxy::~colvarproxy()
 
 bool colvarproxy::io_available()
 {
-  return (check_smp_enabled() == COLVARS_OK && smp_thread_id() == 0) ||
-    (check_smp_enabled() != COLVARS_OK);
+  return ((get_smp_mode() != smp_mode_t::none) && smp_thread_id() == 0) ||
+    (get_smp_mode() == smp_mode_t::none);
 }
 
 
diff --git a/lib/colvars/colvarproxy.h b/lib/colvars/colvarproxy.h
index 91db6011e9..353f354efa 100644
--- a/lib/colvars/colvarproxy.h
+++ b/lib/colvars/colvarproxy.h
@@ -10,9 +10,12 @@
 #ifndef COLVARPROXY_H
 #define COLVARPROXY_H
 
+#include <functional>
+
 #include "colvarmodule.h"
 #include "colvartypes.h"
 #include "colvarproxy_io.h"
+#include "colvarproxy_replicas.h"
 #include "colvarproxy_system.h"
 #include "colvarproxy_tcl.h"
 #include "colvarproxy_volmaps.h"
@@ -447,21 +450,22 @@ class colvarproxy_smp {
 
 public:
 
+  enum class smp_mode_t {cvcs, inner_loop, none};
+
   /// Constructor
   colvarproxy_smp();
 
   /// Destructor
   virtual ~colvarproxy_smp();
 
-  /// Whether threaded parallelization should be used (TODO: make this a
-  /// cvm::deps feature)
-  bool b_smp_active;
+  /// Get the current SMP mode
+  virtual smp_mode_t get_smp_mode() const;
 
-  /// Whether threaded parallelization is available (TODO: make this a cvm::deps feature)
-  virtual int check_smp_enabled();
+  /// Set the current SMP mode
+  virtual int set_smp_mode(smp_mode_t mode);
 
-  /// Distribute calculation of colvars (and their components) across threads
-  virtual int smp_colvars_loop();
+  /// Distribute computation over threads using OpenMP, unless overridden in the backend (e.g. NAMD)
+  virtual int smp_loop(int n_items, std::function<int (int)> const &worker);
 
   /// Distribute calculation of biases across threads
   virtual int smp_biases_loop();
@@ -488,38 +492,10 @@ protected:
 
   /// Lock state for OpenMP
   omp_lock_t *omp_lock_state;
-};
-
-
-/// \brief Methods for multiple-replica communication
-class colvarproxy_replicas {
-
-public:
-
-  /// Constructor
-  colvarproxy_replicas();
-
-  /// Destructor
-  virtual ~colvarproxy_replicas();
-
-  /// \brief Indicate if multi-replica support is available and active
-  virtual int replica_enabled();
-
-  /// \brief Index of this replica
-  virtual int replica_index();
-
-  /// \brief Total number of replicas
-  virtual int num_replicas();
-
-  /// \brief Synchronize replica with others
-  virtual void replica_comm_barrier();
-
-  /// \brief Receive data from other replica
-  virtual int replica_comm_recv(char* msg_data, int buf_len, int src_rep);
-
-  /// \brief Send data to other replica
-  virtual int replica_comm_send(char* msg_data, int msg_len, int dest_rep);
 
+  /// Whether threaded parallelization should be used (TODO: make this a
+  /// cvm::deps feature)
+  smp_mode_t smp_mode;
 };
 
 
diff --git a/lib/colvars/colvarproxy_io.cpp b/lib/colvars/colvarproxy_io.cpp
index 4cfdfeec26..0327ed36f0 100644
--- a/lib/colvars/colvarproxy_io.cpp
+++ b/lib/colvars/colvarproxy_io.cpp
@@ -7,10 +7,28 @@
 // If you wish to distribute your changes, please submit them to the
 // Colvars repository at GitHub.
 
+
+#if defined(_WIN32) && !defined(__CYGWIN__)
+
 // Using access() to check if a file exists (until we can assume C++14/17)
-#if !defined(_WIN32) || defined(__CYGWIN__)
-#include <unistd.h>
+#include <direct.h>
+
+#if defined(__has_include)
+# if __has_include(<filesystem>)
+#  include <filesystem> // MSVC only defines __cpp_lib_filesystem after include
+# endif
 #endif
+
+#else
+
+#include <unistd.h>
+
+#ifdef __cpp_lib_filesystem
+#include <filesystem>
+#endif
+
+#endif
+
 #if defined(_WIN32)
 #include <io.h>
 #endif
@@ -64,6 +82,53 @@ int colvarproxy_io::set_frame(long int)
 }
 
 
+std::string colvarproxy_io::get_current_work_dir() const
+{
+#ifdef __cpp_lib_filesystem
+
+  return std::filesystem::current_path().string();
+
+#else
+
+  // Legacy code
+  size_t constexpr buf_size = 3001;
+  char buf[buf_size];
+
+#if defined(_WIN32) && !defined(__CYGWIN__)
+  char *getcwd_result = ::_getcwd(buf, buf_size);
+#else
+  char *getcwd_result = ::getcwd(buf, buf_size);
+#endif
+
+  if (getcwd_result == nullptr) {
+    cvm::error("Error: cannot read the current working directory.\n", COLVARS_INPUT_ERROR);
+    return std::string("");
+  }
+
+  return std::string(getcwd_result);
+#endif
+}
+
+
+std::string colvarproxy_io::join_paths(std::string const &path1, std::string const &path2) const
+{
+#ifdef __cpp_lib_filesystem
+
+  return (std::filesystem::path(path1) / std::filesystem::path(path2)).string();
+
+#else
+
+  // Legacy code
+#if defined(_WIN32) && !defined(__CYGWIN__)
+  return (path1 + "\\" + path2);
+#else
+  return (path1 + "/" + path2);
+#endif
+
+#endif
+}
+
+
 int colvarproxy_io::backup_file(char const *filename)
 {
   // Simplified version of NAMD_file_exists()
diff --git a/lib/colvars/colvarproxy_io.h b/lib/colvars/colvarproxy_io.h
index 726f915c97..eaf750366d 100644
--- a/lib/colvars/colvarproxy_io.h
+++ b/lib/colvars/colvarproxy_io.h
@@ -38,6 +38,12 @@ public:
   // Returns error code
   virtual int set_frame(long int);
 
+  /// Get the current working directory of this process
+  std::string get_current_work_dir() const;
+
+  /// Join two paths using the operating system's path separation
+  std::string join_paths(std::string const &path1, std::string const &path2) const;
+
   /// \brief Rename the given file, before overwriting it
   virtual int backup_file(char const *filename);
 
diff --git a/lib/colvars/colvarproxy_replicas.cpp b/lib/colvars/colvarproxy_replicas.cpp
index 1f336d3e44..ec7ffdd8d5 100644
--- a/lib/colvars/colvarproxy_replicas.cpp
+++ b/lib/colvars/colvarproxy_replicas.cpp
@@ -7,50 +7,103 @@
 // If you wish to distribute your changes, please submit them to the
 // Colvars repository at GitHub.
 
+
 #include "colvarmodule.h"
-#include "colvarproxy.h"
+#include "colvarproxy_replicas.h"
 
 
-colvarproxy_replicas::colvarproxy_replicas() {}
+colvarproxy_replicas::colvarproxy_replicas()
+{
+#ifdef COLVARS_MPI
+  replicas_mpi_comm = MPI_COMM_NULL;
+#endif
+}
 
 
 colvarproxy_replicas::~colvarproxy_replicas() {}
 
 
-int colvarproxy_replicas::replica_enabled()
+void colvarproxy_replicas::set_replicas_mpi_communicator(replicas_mpi_comm_t comm)
 {
+  replicas_mpi_comm = comm;
+#ifdef COLVARS_MPI
+  if (comm != MPI_COMM_NULL) {
+    MPI_Comm_rank(comm, &replicas_mpi_rank);
+    MPI_Comm_size(comm, &replicas_mpi_num);
+    cvm::log("Enabling multiple replicas: this is replica number " +
+             cvm::to_str(replica_index() + 1) + " of " + cvm::to_str(num_replicas()) + ".\n");
+  }
+#endif
+}
+
+
+int colvarproxy_replicas::check_replicas_enabled()
+{
+#ifdef COLVARS_MPI
+  if (replicas_mpi_comm != MPI_COMM_NULL) {
+    return num_replicas() > 1 ? COLVARS_OK : COLVARS_ERROR;
+  }
+  return COLVARS_ERROR;
+#else
   return COLVARS_NOT_IMPLEMENTED;
+#endif
 }
 
 
 int colvarproxy_replicas::replica_index()
 {
-  return 0;
+  return replicas_mpi_rank;
 }
 
 
 int colvarproxy_replicas::num_replicas()
 {
-  return 1;
+  return replicas_mpi_num;
 }
 
 
-void colvarproxy_replicas::replica_comm_barrier() {}
-
-
-int colvarproxy_replicas::replica_comm_recv(char* /* msg_data */,
-                                            int /* buf_len */,
-                                            int /* src_rep */)
+void colvarproxy_replicas::replica_comm_barrier()
 {
-  return COLVARS_NOT_IMPLEMENTED;
+#ifdef COLVARS_MPI
+  MPI_Barrier(replicas_mpi_comm);
+#endif
 }
 
 
-int colvarproxy_replicas::replica_comm_send(char* /* msg_data */,
-                                            int /* msg_len */,
-                                            int /* dest_rep */)
+int colvarproxy_replicas::replica_comm_recv(char *buffer, int buffer_length, int source_rank)
 {
+#ifdef COLVARS_MPI
+  MPI_Status status;
+  int retval = MPI_Recv(buffer, buffer_length, MPI_CHAR, source_rank, 0, replicas_mpi_comm, &status);
+  if (retval == MPI_SUCCESS) {
+    MPI_Get_count(&status, MPI_CHAR, &retval);
+  } else {
+    retval = 0;
+  }
+  return retval;
+#else
+  (void)buffer;
+  (void)buffer_length;
+  (void)source_rank;
   return COLVARS_NOT_IMPLEMENTED;
+#endif
 }
 
 
+int colvarproxy_replicas::replica_comm_send(char *buffer, int buffer_length, int destination_rank)
+{
+#ifdef COLVARS_MPI
+  int retval = MPI_Send(buffer, buffer_length, MPI_CHAR, destination_rank, 0, replicas_mpi_comm);
+  if (retval == MPI_SUCCESS) {
+    retval = buffer_length;
+  } else {
+    retval = 0;
+  }
+  return retval;
+#else
+  (void)buffer;
+  (void)buffer_length;
+  (void)destination_rank;
+  return COLVARS_NOT_IMPLEMENTED;
+#endif
+}
diff --git a/lib/colvars/colvarproxy_replicas.h b/lib/colvars/colvarproxy_replicas.h
new file mode 100644
index 0000000000..b58c80bf5e
--- /dev/null
+++ b/lib/colvars/colvarproxy_replicas.h
@@ -0,0 +1,66 @@
+// -*- c++ -*-
+
+// This file is part of the Collective Variables module (Colvars).
+// The original version of Colvars and its updates are located at:
+// https://github.com/Colvars/colvars
+// Please update all Colvars source files before making any changes.
+// If you wish to distribute your changes, please submit them to the
+// Colvars repository at GitHub.
+
+#ifndef COLVARPROXY_REPLICAS_H
+#define COLVARPROXY_REPLICAS_H
+
+
+#ifdef COLVARS_MPI
+#include <mpi.h>
+typedef MPI_Comm replicas_mpi_comm_t;
+#else
+typedef void * replicas_mpi_comm_t;
+#endif
+
+
+/// \brief Methods for multiple-replica communication
+class colvarproxy_replicas {
+
+public:
+
+  /// Constructor
+  colvarproxy_replicas();
+
+  /// Destructor
+  virtual ~colvarproxy_replicas();
+
+  /// Set the multiple replicas communicator
+  virtual void set_replicas_mpi_communicator(replicas_mpi_comm_t comm);
+
+  /// Indicate if multi-replica support is available and active
+  virtual int check_replicas_enabled();
+
+  /// Index of this replica
+  virtual int replica_index();
+
+  /// Total number of replicas
+  virtual int num_replicas();
+
+  /// Synchronize replica with others
+  virtual void replica_comm_barrier();
+
+  /// Receive data from other replica
+  virtual int replica_comm_recv(char* msg_data, int buf_len, int src_rep);
+
+  /// Send data to other replica
+  virtual int replica_comm_send(char* msg_data, int msg_len, int dest_rep);
+
+protected:
+
+  /// MPI communicator containint 1 root proc from each world
+  replicas_mpi_comm_t replicas_mpi_comm;
+
+  /// Index (rank) of this replica in the MPI implementation
+  int replicas_mpi_rank = 0;
+
+  /// Number of replicas in the MPI implementation
+  int replicas_mpi_num = 1;
+};
+
+#endif
diff --git a/lib/colvars/colvarproxy_system.h b/lib/colvars/colvarproxy_system.h
index 67d0938e54..bf2ad2ea8b 100644
--- a/lib/colvars/colvarproxy_system.h
+++ b/lib/colvars/colvarproxy_system.h
@@ -94,6 +94,7 @@ public:
   virtual bool total_forces_enabled() const;
 
   /// Are total forces from the current step available?
+  /// in which case they are really system forces
   virtual bool total_forces_same_step() const;
 
   /// Get the molecule ID when called in VMD; raise error otherwise
@@ -109,6 +110,11 @@ public:
   /// Send cached value of alchemical lambda parameter to back-end (if available)
   virtual int send_alch_lambda();
 
+  /// Request energy computation every freq steps (necessary for NAMD3, not all back-ends)
+  virtual int request_alch_energy_freq(int const freq) {
+    return COLVARS_OK;
+  }
+
   /// Get energy derivative with respect to lambda (if available)
   virtual int get_dE_dlambda(cvm::real* dE_dlambda);
 
diff --git a/lib/colvars/colvars_memstream.h b/lib/colvars/colvars_memstream.h
index 0d80d2794d..c9564a3c41 100644
--- a/lib/colvars/colvars_memstream.h
+++ b/lib/colvars/colvars_memstream.h
@@ -108,6 +108,9 @@ public:
   /// Ignore formatting operators
   inline void setf(decltype(std::ios::fmtflags(0)), decltype(std::ios::floatfield)) {}
 
+  /// Ignore formatting operators
+  inline void setf(decltype(std::ios::fmtflags(0))) {}
+
   /// Ignore formatting operators
   inline void flags(decltype(std::ios::fmtflags(0))) {}
 
diff --git a/lib/colvars/colvars_version.h b/lib/colvars/colvars_version.h
index d50a00fff5..02f949b517 100644
--- a/lib/colvars/colvars_version.h
+++ b/lib/colvars/colvars_version.h
@@ -1,3 +1,3 @@
 #ifndef COLVARS_VERSION
-#define COLVARS_VERSION "2024-06-04"
+#define COLVARS_VERSION "2025-04-30"
 #endif
diff --git a/lib/colvars/colvarscript_commands.h b/lib/colvars/colvarscript_commands.h
index bdad74e433..191724dd0f 100644
--- a/lib/colvars/colvarscript_commands.h
+++ b/lib/colvars/colvarscript_commands.h
@@ -541,6 +541,15 @@ CVSCRIPT(cv_printframe,
          return COLVARS_OK;
          )
 
+CVSCRIPT(cv_patchversion,
+         "Get the Colvars patch version number (used for bugfixes only)\n"
+         "version : string - Colvars version",
+         0, 0,
+         "",
+         script->set_result_int(cvm::main()->patch_version_number());
+         return COLVARS_OK;
+         )
+
 CVSCRIPT(cv_printframelabels,
          "Return the labels that would be written to colvars.traj\n"
          "Labels : string - The labels",
@@ -656,7 +665,7 @@ CVSCRIPT(cv_update,
          )
 
 CVSCRIPT(cv_version,
-         "Get the Colvars Module version string\n"
+         "Get the Colvars version string\n"
          "version : string - Colvars version",
          0, 0,
          "",
@@ -665,7 +674,7 @@ CVSCRIPT(cv_version,
          )
 
 // This guard allows compiling colvar and bias function bodies in their
-// respecitve files instead of colvarscript_commands.o
+// respective files instead of colvarscript_commands.o
 #ifndef COLVARSCRIPT_COMMANDS_GLOBAL
 #include "colvarscript_commands_colvar.h"
 #include "colvarscript_commands_bias.h"
diff --git a/lib/colvars/colvarscript_commands_colvar.h b/lib/colvars/colvarscript_commands_colvar.h
index f6bb6b8c98..c641b321d0 100644
--- a/lib/colvars/colvarscript_commands_colvar.h
+++ b/lib/colvars/colvarscript_commands_colvar.h
@@ -23,6 +23,7 @@ CVSCRIPT(colvar_addforce,
            script->add_error_msg("addforce : error parsing force value");
            return COLVARSCRIPT_ERROR;
          }
+         this_colvar->enable(colvardeps::f_cv_apply_force);
          this_colvar->add_bias_force(force);
          script->set_result_colvarvalue(force);
          return COLVARS_OK;
diff --git a/lib/colvars/colvartypes.cpp b/lib/colvars/colvartypes.cpp
index f51791d015..6c65f1f5a6 100644
--- a/lib/colvars/colvartypes.cpp
+++ b/lib/colvars/colvartypes.cpp
@@ -137,71 +137,6 @@ std::istream & operator >> (std::istream &is, colvarmodule::quaternion &q)
 }
 
 
-cvm::quaternion
-cvm::quaternion::position_derivative_inner(cvm::rvector const &pos,
-                                            cvm::rvector const &vec) const
-{
-  cvm::quaternion result(0.0, 0.0, 0.0, 0.0);
-
-
-  result.q0 =   2.0 * pos.x * q0 * vec.x
-               +2.0 * pos.y * q0 * vec.y
-               +2.0 * pos.z * q0 * vec.z
-
-               -2.0 * pos.y * q3 * vec.x
-               +2.0 * pos.z * q2 * vec.x
-
-               +2.0 * pos.x * q3 * vec.y
-               -2.0 * pos.z * q1 * vec.y
-
-               -2.0 * pos.x * q2 * vec.z
-               +2.0 * pos.y * q1 * vec.z;
-
-
-  result.q1 =  +2.0 * pos.x * q1 * vec.x
-               -2.0 * pos.y * q1 * vec.y
-               -2.0 * pos.z * q1 * vec.z
-
-               +2.0 * pos.y * q2 * vec.x
-               +2.0 * pos.z * q3 * vec.x
-
-               +2.0 * pos.x * q2 * vec.y
-               -2.0 * pos.z * q0 * vec.y
-
-               +2.0 * pos.x * q3 * vec.z
-               +2.0 * pos.y * q0 * vec.z;
-
-
-  result.q2 =  -2.0 * pos.x * q2 * vec.x
-               +2.0 * pos.y * q2 * vec.y
-               -2.0 * pos.z * q2 * vec.z
-
-               +2.0 * pos.y * q1 * vec.x
-               +2.0 * pos.z * q0 * vec.x
-
-               +2.0 * pos.x * q1 * vec.y
-               +2.0 * pos.z * q3 * vec.y
-
-               -2.0 * pos.x * q0 * vec.z
-               +2.0 * pos.y * q3 * vec.z;
-
-
-  result.q3 =  -2.0 * pos.x * q3 * vec.x
-               -2.0 * pos.y * q3 * vec.y
-               +2.0 * pos.z * q3 * vec.z
-
-               -2.0 * pos.y * q0 * vec.x
-               +2.0 * pos.z * q1 * vec.x
-
-               +2.0 * pos.x * q0 * vec.y
-               +2.0 * pos.z * q2 * vec.y
-
-               +2.0 * pos.x * q1 * vec.z
-               +2.0 * pos.y * q2 * vec.z;
-
-  return result;
-}
-
 #ifdef COLVARS_LAMMPS
 namespace {
   inline void *new_Jacobi_solver(int size) {
@@ -336,7 +271,7 @@ void colvarmodule::rotation::compute_overlap_matrix()
 #ifndef COLVARS_LAMMPS
 namespace NR {
 
-void diagonalize_matrix(cvm::real m[4][4],
+int diagonalize_matrix(cvm::real m[4][4],
                         cvm::real eigval[4],
                         cvm::real eigvec[4][4])
 {
@@ -347,9 +282,7 @@ void diagonalize_matrix(cvm::real m[4][4],
   int jac_nrot = 0;
   if (NR_Jacobi::jacobi(m, eigval, eigvec, &jac_nrot) !=
       COLVARS_OK) {
-    cvm::error("Too many iterations in jacobi diagonalization.\n"
-               "This is usually the result of an ill-defined set of atoms for "
-               "rotational alignment (RMSD, rotateReference, etc).\n");
+    return COLVARS_ERROR;
   }
   NR_Jacobi::eigsrt(eigval, eigvec);
   // jacobi saves eigenvectors by columns
@@ -367,6 +300,7 @@ void diagonalize_matrix(cvm::real m[4][4],
       eigvec[ie][i] /= norm;
     }
   }
+  return COLVARS_OK;
 }
 
 }
@@ -429,14 +363,25 @@ void colvarmodule::rotation::calc_optimal_rotation_impl() {
                                        cvm::real[4][4]> *>(jacobi);
 
   int ierror = ecalc->Diagonalize(S, S_eigval, S_eigvec);
+#else
+  int ierror = NR::diagonalize_matrix(S, S_eigval, S_eigvec);
+#endif
   if (ierror) {
+    cvm::log("Failed to diagonalize the following overlapping matrix:\n");
+    for (size_t i = 0; i < 4; ++i) {
+      for (size_t j = 0; j < 4; ++j) {
+        cvm::log(cvm::to_str(S[i][j]) + " ");
+      }
+      cvm::log("\n");
+    }
+    cvm::log("The corresponding correlation matrix is:\n");
+    cvm::log(" " + cvm::to_str(C.xx) + " " + cvm::to_str(C.xy) + " " + cvm::to_str(C.xz));
+    cvm::log(" " + cvm::to_str(C.yx) + " " + cvm::to_str(C.yy) + " " + cvm::to_str(C.yz));
+    cvm::log(" " + cvm::to_str(C.zx) + " " + cvm::to_str(C.zy) + " " + cvm::to_str(C.zz) + "\n");
     cvm::error("Too many iterations in jacobi diagonalization.\n"
                "This is usually the result of an ill-defined set of atoms for "
                "rotational alignment (RMSD, rotateReference, etc).\n");
   }
-#else
-  NR::diagonalize_matrix(S, S_eigval, S_eigvec);
-#endif
   q = cvm::quaternion{S_eigvec[0][0], S_eigvec[0][1], S_eigvec[0][2], S_eigvec[0][3]};
 
   if (cvm::rotation::monitor_crossings) {
diff --git a/lib/colvars/colvartypes.h b/lib/colvars/colvartypes.h
index 455e628f1b..db5827990b 100644
--- a/lib/colvars/colvartypes.h
+++ b/lib/colvars/colvartypes.h
@@ -20,10 +20,6 @@
 
 #include "colvarmodule.h"
 
-#ifndef PI
-#define PI 3.14159265358979323846
-#endif
-
 // ----------------------------------------------------------------------
 /// Linear algebra functions and data types used in the collective
 /// variables implemented so far
@@ -1221,8 +1217,57 @@ public:
 
   /// \brief Multiply the given vector by the derivative of the given
   /// (rotated) position with respect to the quaternion
-  cvm::quaternion position_derivative_inner(cvm::rvector const &pos,
-                                            cvm::rvector const &vec) const;
+  /// \param pos The position \f$\mathbf{x}\f$.
+  /// \param vec The vector \f$\mathbf{v}\f$.
+  /// \return A quaternion (see the detailed documentation below).
+  ///
+  /// This function is mainly used for projecting the gradients or forces on
+  /// the rotated atoms to the forces on quaternion. Assume this rotation can
+  /// be represented as \f$R(\mathbf{q})\f$,
+  /// where \f$\mathbf{q} := (q_0, q_1, q_2, q_3)\f$
+  /// is the current quaternion, the function returns the following new
+  /// quaternion:
+  /// \f[
+  /// \left(\mathbf{v}^\mathrm{T}\frac{\partial R(\mathbf{q})}{\partial q_0}\mathbf{x},
+  ///       \mathbf{v}^\mathrm{T}\frac{\partial R(\mathbf{q})}{\partial q_1}\mathbf{x},
+  ///       \mathbf{v}^\mathrm{T}\frac{\partial R(\mathbf{q})}{\partial q_2}\mathbf{x},
+  ///       \mathbf{v}^\mathrm{T}\frac{\partial R(\mathbf{q})}{\partial q_3}\mathbf{x}\right)
+  /// \f]
+  /// where \f$\mathbf{v}\f$ is usually the gradient of \f$\xi\f$ with respect to
+  /// the rotated frame \f$\tilde{\mathbf{X}}\f$,
+  /// \f$\partial \xi / \partial \tilde{\mathbf{X}}\f$, or the force acting on it
+  /// (\f$\mathbf{F}_{\tilde{\mathbf{X}}}\f$).
+  /// By using the following loop in pseudo C++ code,
+  /// either \f$\partial \xi / \partial \tilde{\mathbf{X}}\f$
+  /// or \f$\mathbf{F}_{\tilde{\mathbf{X}}}\f$, can be projected to
+  /// \f$\partial \xi / \partial \mathbf{q}\f$ or \f$\mathbf{F}_q\f$ into `sum_dxdq`:
+  /// @code
+  /// cvm::real sum_dxdq[4] = {0, 0, 0, 0};
+  /// for (size_t i = 0; i < main_group_size(); ++i) {
+  ///   const cvm::rvector v = grad_or_force_on_rotated_main_group(i);
+  ///   const cvm::rvector x = unrotated_main_group_positions(i);
+  ///   cvm::quaternion const dxdq = position_derivative_inner(x, v);
+  ///   sum_dxdq[0] += dxdq[0];
+  ///   sum_dxdq[1] += dxdq[1];
+  ///   sum_dxdq[2] += dxdq[2];
+  ///   sum_dxdq[3] += dxdq[3];
+  /// }
+  /// @endcode
+  inline cvm::quaternion position_derivative_inner(cvm::rvector const &pos,
+                                            cvm::rvector const &vec) const {
+    return cvm::quaternion(2.0 * (vec.x * ( q0 * pos.x - q3 * pos.y + q2 * pos.z) +
+                                  vec.y * ( q3 * pos.x + q0 * pos.y - q1 * pos.z) +
+                                  vec.z * (-q2 * pos.x + q1 * pos.y + q0 * pos.z)),
+                           2.0 * (vec.x * ( q1 * pos.x + q2 * pos.y + q3 * pos.z) +
+                                  vec.y * ( q2 * pos.x - q1 * pos.y - q0 * pos.z) +
+                                  vec.z * ( q3 * pos.x + q0 * pos.y - q1 * pos.z)),
+                           2.0 * (vec.x * (-q2 * pos.x + q1 * pos.y + q0 * pos.z) +
+                                  vec.y * ( q1 * pos.x + q2 * pos.y + q3 * pos.z) +
+                                  vec.z * (-q0 * pos.x + q3 * pos.y - q2 * pos.z)),
+                           2.0 * (vec.x * (-q3 * pos.x - q0 * pos.y + q1 * pos.z) +
+                                  vec.y * ( q0 * pos.x - q3 * pos.y + q2 * pos.z) +
+                                  vec.z * ( q1 * pos.x + q2 * pos.y + q3 * pos.z)));
+  }
 
 
   /// \brief Return the cosine between the orientation frame
@@ -1301,7 +1346,7 @@ public:
 
 #ifndef COLVARS_LAMMPS
 namespace NR {
-void diagonalize_matrix(cvm::real m[4][4],
+int diagonalize_matrix(cvm::real m[4][4],
                         cvm::real eigval[4],
                         cvm::real eigvec[4][4]);
 }
diff --git a/lib/colvars/colvarvalue.cpp b/lib/colvars/colvarvalue.cpp
index 3b8077d2e7..66baf35eeb 100644
--- a/lib/colvars/colvarvalue.cpp
+++ b/lib/colvars/colvarvalue.cpp
@@ -153,29 +153,6 @@ std::string const colvarvalue::type_keyword(Type t)
 }
 
 
-size_t colvarvalue::num_df(Type t)
-{
-  switch (t) {
-  case colvarvalue::type_notset:
-  default:
-    return 0; break;
-  case colvarvalue::type_scalar:
-    return 1; break;
-  case colvarvalue::type_3vector:
-    return 3; break;
-  case colvarvalue::type_unit3vector:
-  case colvarvalue::type_unit3vectorderiv:
-    return 2; break;
-  case colvarvalue::type_quaternion:
-  case colvarvalue::type_quaternionderiv:
-    return 3; break;
-  case colvarvalue::type_vector:
-    // the size of a vector is unknown without its object
-    return 0; break;
-  }
-}
-
-
 size_t colvarvalue::num_dimensions(Type t)
 {
   switch (t) {
@@ -591,34 +568,132 @@ cvm::real operator * (colvarvalue const &x1,
 }
 
 
+cvm::real colvarvalue::norm2() const
+{
+  switch (value_type) {
+  case colvarvalue::type_scalar:
+    return (this->real_value)*(this->real_value);
+  case colvarvalue::type_3vector:
+  case colvarvalue::type_unit3vector:
+  case colvarvalue::type_unit3vectorderiv:
+    return (this->rvector_value).norm2();
+  case colvarvalue::type_quaternion:
+  case colvarvalue::type_quaternionderiv:
+    return (this->quaternion_value).norm2();
+  case colvarvalue::type_vector:
+    if (elem_types.size() > 0) {
+      // if we have information about non-scalar types, use it
+      cvm::real result = 0.0;
+      size_t i;
+      for (i = 0; i < elem_types.size(); i++) {
+        result += (this->get_elem(i)).norm2();
+      }
+      return result;
+    } else {
+      return vector1d_value.norm2();
+    }
+    break;
+  case colvarvalue::type_notset:
+  default:
+    return 0.0;
+  }
+}
+
+
+cvm::real colvarvalue::sum() const
+{
+  switch (value_type) {
+  case colvarvalue::type_scalar:
+    return (this->real_value);
+  case colvarvalue::type_3vector:
+  case colvarvalue::type_unit3vector:
+  case colvarvalue::type_unit3vectorderiv:
+    return (this->rvector_value).x + (this->rvector_value).y +
+      (this->rvector_value).z;
+  case colvarvalue::type_quaternion:
+  case colvarvalue::type_quaternionderiv:
+    return (this->quaternion_value).q0 + (this->quaternion_value).q1 +
+      (this->quaternion_value).q2 + (this->quaternion_value).q3;
+  case colvarvalue::type_vector:
+    return (this->vector1d_value).sum();
+  case colvarvalue::type_notset:
+  default:
+    return 0.0;
+  }
+}
+
+
+cvm::real colvarvalue::dist2(colvarvalue const &x2) const
+{
+  colvarvalue::check_types(*this, x2);
+
+  switch (this->type()) {
+  case colvarvalue::type_scalar:
+    return (this->real_value - x2.real_value) * (this->real_value - x2.real_value);
+  case colvarvalue::type_3vector:
+    return (this->rvector_value - x2.rvector_value).norm2();
+  case colvarvalue::type_unit3vector: {
+    cvm::rvector const &v1 = this->rvector_value;
+    cvm::rvector const &v2 = x2.rvector_value;
+    cvm::real const theta = cvm::acos(v1 * v2);
+    return theta * theta;
+  }
+  case colvarvalue::type_quaternion:
+    // angle between (*this) and x2 is the distance, the quaternion
+    // object has it implemented internally
+    return this->quaternion_value.dist2(x2.quaternion_value);
+  case colvarvalue::type_vector:
+    return (this->vector1d_value - x2.vector1d_value).norm2();
+  case colvarvalue::type_unit3vectorderiv:
+  case colvarvalue::type_quaternionderiv:
+    cvm::error("Error: computing a squared-distance between two variables of type \"" +
+                   type_desc(this->type()) + "\", for which it is not defined.\n",
+               COLVARS_BUG_ERROR);
+  case colvarvalue::type_notset:
+  default:
+    this->undef_op();
+    return 0.0;
+  };
+
+  return 0.0;
+}
+
+
 colvarvalue colvarvalue::dist2_grad(colvarvalue const &x2) const
 {
   colvarvalue::check_types(*this, x2);
 
+  // Compute derivative with respect to (*this)
+
   switch (this->value_type) {
   case colvarvalue::type_scalar:
     return 2.0 * (this->real_value - x2.real_value);
   case colvarvalue::type_3vector:
     return 2.0 * (this->rvector_value - x2.rvector_value);
-  case colvarvalue::type_unit3vector:
-  case colvarvalue::type_unit3vectorderiv:
-    {
-      cvm::rvector const &v1 = this->rvector_value;
-      cvm::rvector const &v2 = x2.rvector_value;
-      cvm::real const cos_t = v1 * v2;
-      return colvarvalue(2.0 * (cos_t * v1 - v2), colvarvalue::type_unit3vectorderiv);
-    }
+  case colvarvalue::type_unit3vector: {
+    cvm::rvector const &v1 = this->rvector_value;
+    cvm::rvector const &v2 = x2.rvector_value;
+    cvm::real const cos_t = v1 * v2;
+    return colvarvalue(2.0 * cvm::acos(cos_t) * -1.0 / cvm::sqrt(1.0 - cos_t * cos_t) * v2,
+                       colvarvalue::type_unit3vectorderiv);
+  }
   case colvarvalue::type_quaternion:
-  case colvarvalue::type_quaternionderiv:
     return this->quaternion_value.dist2_grad(x2.quaternion_value);
   case colvarvalue::type_vector:
     return colvarvalue(2.0 * (this->vector1d_value - x2.vector1d_value), colvarvalue::type_vector);
     break;
+  case colvarvalue::type_unit3vectorderiv:
+  case colvarvalue::type_quaternionderiv:
+    cvm::error("Error: computing a squared-distance gradient between two variables of type \"" +
+                   type_desc(this->type()) + "\", for which it is not defined.\n",
+               COLVARS_BUG_ERROR);
   case colvarvalue::type_notset:
   default:
     this->undef_op();
     return colvarvalue(colvarvalue::type_notset);
   };
+
+  return colvarvalue(colvarvalue::type_notset);
 }
 
 
diff --git a/lib/colvars/colvarvalue.h b/lib/colvars/colvarvalue.h
index e8a6a849d3..61f1bf718b 100644
--- a/lib/colvars/colvarvalue.h
+++ b/lib/colvars/colvarvalue.h
@@ -109,9 +109,6 @@ public:
   /// User keywords for specifying value types in the configuration
   static std::string const type_keyword(Type t);
 
-  /// Number of degrees of freedom for each supported type
-  static size_t num_df(Type t);
-
   /// Number of dimensions for each supported type (used to allocate vector1d_value)
   static size_t num_dimensions(Type t);
 
@@ -671,87 +668,4 @@ inline cvm::vector1d<cvm::real> const colvarvalue::as_vector() const
 }
 
 
-inline cvm::real colvarvalue::norm2() const
-{
-  switch (value_type) {
-  case colvarvalue::type_scalar:
-    return (this->real_value)*(this->real_value);
-  case colvarvalue::type_3vector:
-  case colvarvalue::type_unit3vector:
-  case colvarvalue::type_unit3vectorderiv:
-    return (this->rvector_value).norm2();
-  case colvarvalue::type_quaternion:
-  case colvarvalue::type_quaternionderiv:
-    return (this->quaternion_value).norm2();
-  case colvarvalue::type_vector:
-    if (elem_types.size() > 0) {
-      // if we have information about non-scalar types, use it
-      cvm::real result = 0.0;
-      size_t i;
-      for (i = 0; i < elem_types.size(); i++) {
-        result += (this->get_elem(i)).norm2();
-      }
-      return result;
-    } else {
-      return vector1d_value.norm2();
-    }
-    break;
-  case colvarvalue::type_notset:
-  default:
-    return 0.0;
-  }
-}
-
-
-inline cvm::real colvarvalue::sum() const
-{
-  switch (value_type) {
-  case colvarvalue::type_scalar:
-    return (this->real_value);
-  case colvarvalue::type_3vector:
-  case colvarvalue::type_unit3vector:
-  case colvarvalue::type_unit3vectorderiv:
-    return (this->rvector_value).x + (this->rvector_value).y +
-      (this->rvector_value).z;
-  case colvarvalue::type_quaternion:
-  case colvarvalue::type_quaternionderiv:
-    return (this->quaternion_value).q0 + (this->quaternion_value).q1 +
-      (this->quaternion_value).q2 + (this->quaternion_value).q3;
-  case colvarvalue::type_vector:
-    return (this->vector1d_value).sum();
-  case colvarvalue::type_notset:
-  default:
-    return 0.0;
-  }
-}
-
-
-inline cvm::real colvarvalue::dist2(colvarvalue const &x2) const
-{
-  colvarvalue::check_types(*this, x2);
-
-  switch (this->type()) {
-  case colvarvalue::type_scalar:
-    return (this->real_value - x2.real_value)*(this->real_value - x2.real_value);
-  case colvarvalue::type_3vector:
-    return (this->rvector_value - x2.rvector_value).norm2();
-  case colvarvalue::type_unit3vector:
-  case colvarvalue::type_unit3vectorderiv:
-    // angle between (*this) and x2 is the distance
-    return cvm::acos(this->rvector_value * x2.rvector_value) * cvm::acos(this->rvector_value * x2.rvector_value);
-  case colvarvalue::type_quaternion:
-  case colvarvalue::type_quaternionderiv:
-    // angle between (*this) and x2 is the distance, the quaternion
-    // object has it implemented internally
-    return this->quaternion_value.dist2(x2.quaternion_value);
-  case colvarvalue::type_vector:
-    return (this->vector1d_value - x2.vector1d_value).norm2();
-  case colvarvalue::type_notset:
-  default:
-    this->undef_op();
-    return 0.0;
-  };
-}
-
-
 #endif
diff --git a/src/.gitignore b/src/.gitignore
index a25a884a8a..c39dfbdfeb 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -875,6 +875,8 @@
 /fix_freeze.h
 /fix_gcmc.cpp
 /fix_gcmc.h
+/fix_gjf.cpp
+/fix_gjf.h
 /fix_gld.cpp
 /fix_gld.h
 /fix_gle.cpp
@@ -924,6 +926,8 @@
 /fix_msst.h
 /fix_neb.cpp
 /fix_neb.h
+/fix_neighbor_swap.cpp
+/fix_neighbor_swap.h
 /fix_nh_asphere.cpp
 /fix_nh_asphere.h
 /fix_nph_asphere.cpp
diff --git a/src/COLVARS/colvarproxy_lammps.cpp b/src/COLVARS/colvarproxy_lammps.cpp
index 265dc34f43..c92d835cf6 100644
--- a/src/COLVARS/colvarproxy_lammps.cpp
+++ b/src/COLVARS/colvarproxy_lammps.cpp
@@ -33,12 +33,9 @@ colvarproxy_lammps::colvarproxy_lammps(LAMMPS_NS::LAMMPS *lmp)  : _lmp(lmp), _ra
   previous_step = -1;
   do_exit = false;
 
-  inter_me = 0;
-  inter_num = 1;
   bias_energy = 0.0;
 
   engine_ready_ = false;
-  inter_comm = MPI_COMM_NULL;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -83,19 +80,6 @@ void colvarproxy_lammps::set_random_seed(int seed)
   _random = new LAMMPS_NS::RanPark(_lmp, seed);
 }
 
-/* ---------------------------------------------------------------------- */
-
-void colvarproxy_lammps::set_replicas_communicator(MPI_Comm root2root)
-{
-  inter_comm = root2root;
-
-  // initialize multi-replica support, if available
-  if (replica_enabled() == COLVARS_OK) {
-    MPI_Comm_rank(inter_comm, &inter_me);
-    MPI_Comm_size(inter_comm, &inter_num);
-  }
-}
-
 /* ----------------------------------------------------------------------
    re-initialize data where needed
 ------------------------------------------------------------------------- */
@@ -255,63 +239,7 @@ int colvarproxy_lammps::set_unit_system(std::string const &units_in, bool /*chec
   return COLVARS_OK;
 }
 
-/* ----------------------------------------------------------------------
-   multi-replica support
-------------------------------------------------------------------------- */
 
-int colvarproxy_lammps::replica_enabled()
-{
-  return (inter_comm != MPI_COMM_NULL) ? COLVARS_OK : COLVARS_NOT_IMPLEMENTED;
-}
-
-/* ---------------------------------------------------------------------- */
-
-int colvarproxy_lammps::replica_index()
-{
-  return inter_me;
-}
-
-/* ---------------------------------------------------------------------- */
-
-int colvarproxy_lammps::num_replicas()
-{
-  return inter_num;
-}
-
-/* ---------------------------------------------------------------------- */
-
-void colvarproxy_lammps::replica_comm_barrier()
-{
-  MPI_Barrier(inter_comm);
-}
-
-/* ---------------------------------------------------------------------- */
-
-int colvarproxy_lammps::replica_comm_recv(char* msg_data, int buf_len, int src_rep)
-{
-  MPI_Status status;
-  int retval;
-
-  retval = MPI_Recv(msg_data,buf_len,MPI_CHAR,src_rep,0,inter_comm,&status);
-  if (retval == MPI_SUCCESS) {
-    MPI_Get_count(&status, MPI_CHAR, &retval);
-  } else retval = 0;
-  return retval;
-}
-
-/* ---------------------------------------------------------------------- */
-
-int colvarproxy_lammps::replica_comm_send(char* msg_data, int msg_len, int dest_rep)
-{
-  int retval;
-  retval = MPI_Send(msg_data,msg_len,MPI_CHAR,dest_rep,0,inter_comm);
-  if (retval == MPI_SUCCESS) {
-    retval = msg_len;
-  } else retval = 0;
-  return retval;
-}
-
-/* ---------------------------------------------------------------------- */
 
 int colvarproxy_lammps::check_atom_id(int atom_number)
 {
diff --git a/src/COLVARS/colvarproxy_lammps.h b/src/COLVARS/colvarproxy_lammps.h
index d98be37b09..06bda4252e 100644
--- a/src/COLVARS/colvarproxy_lammps.h
+++ b/src/COLVARS/colvarproxy_lammps.h
@@ -45,9 +45,6 @@ class colvarproxy_lammps : public colvarproxy {
 
   std::vector<int> atoms_types;
 
-  MPI_Comm inter_comm;        // MPI comm with 1 root proc from each world
-  int inter_me, inter_num;    // rank for the inter replica comm
-
  public:
   friend class cvm::atom;
 
@@ -59,9 +56,6 @@ class colvarproxy_lammps : public colvarproxy {
   /// Set the internal seed used by \link rand_gaussian() \endlink
   void set_random_seed(int seed);
 
-  /// Set the multiple replicas communicator
-  void set_replicas_communicator(MPI_Comm root2root);
-
   int setup() override;
 
   // disable default and copy constructor
@@ -72,7 +66,8 @@ class colvarproxy_lammps : public colvarproxy {
   // methods for lammps to move data or trigger actions in the proxy
  public:
   bool total_forces_enabled() const override { return total_force_requested; };
-  bool total_forces_same_step() const override { return true; };
+  // Total forces are saved at end of step, only processed at the next step
+  bool total_forces_same_step() const override { return false; };
   bool want_exit() const { return do_exit; };
 
   // perform colvars computation. returns biasing energy
@@ -102,14 +97,6 @@ class colvarproxy_lammps : public colvarproxy {
   int check_atom_id(int atom_number) override;
 
   inline std::vector<int> *modify_atom_types() { return &atoms_types; }
-
-  int replica_enabled() override;
-  int replica_index() override;
-  int num_replicas() override;
-
-  void replica_comm_barrier() override;
-  int replica_comm_recv(char *msg_data, int buf_len, int src_rep) override;
-  int replica_comm_send(char *msg_data, int msg_len, int dest_rep) override;
 };
 
 #endif
diff --git a/src/COLVARS/colvarproxy_lammps_version.h b/src/COLVARS/colvarproxy_lammps_version.h
index 5901044b1e..1c16217679 100644
--- a/src/COLVARS/colvarproxy_lammps_version.h
+++ b/src/COLVARS/colvarproxy_lammps_version.h
@@ -1,3 +1,3 @@
 #ifndef COLVARPROXY_VERSION
-#define COLVARPROXY_VERSION "2024-07-05"
+#define COLVARPROXY_VERSION "2025-03-31"
 #endif
diff --git a/src/COLVARS/fix_colvars.cpp b/src/COLVARS/fix_colvars.cpp
index d5a8806eed..247ad2ef8b 100644
--- a/src/COLVARS/fix_colvars.cpp
+++ b/src/COLVARS/fix_colvars.cpp
@@ -267,6 +267,7 @@ void FixColvars::init()
   if (init_flag) return;
   init_flag = 1;
 
+#if defined(COLVARS_MPI)
   if (universe->nworlds > 1) {
     // create inter root communicator
     int color = 1;
@@ -275,9 +276,10 @@ void FixColvars::init()
     }
     MPI_Comm_split(universe->uworld, color, universe->iworld, &root2root);
     if (me == 0) {
-      proxy->set_replicas_communicator(root2root);
+      proxy->set_replicas_mpi_communicator(root2root);
     }
   }
+#endif
 }
 
 
diff --git a/src/Depend.sh b/src/Depend.sh
index 9ddb29450d..ba55deb62c 100755
--- a/src/Depend.sh
+++ b/src/Depend.sh
@@ -146,6 +146,7 @@ fi
 
 if (test $1 = "MC") then
   depend MISC
+  depend VORONOI
 fi
 
 if (test $1 = "MEAM") then
diff --git a/src/EXTRA-FIX/fix_gjf.cpp b/src/EXTRA-FIX/fix_gjf.cpp
new file mode 100644
index 0000000000..f602e6181d
--- /dev/null
+++ b/src/EXTRA-FIX/fix_gjf.cpp
@@ -0,0 +1,687 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Tim Linke & Niels Gronbech-Jensen (UC Davis)
+------------------------------------------------------------------------- */
+
+#include "fix_gjf.h"
+
+#include "atom.h"
+#include "citeme.h"
+#include "comm.h"
+#include "compute.h"
+#include "error.h"
+#include "force.h"
+#include "group.h"
+#include "input.h"
+#include "memory.h"
+#include "modify.h"
+#include "random_mars.h"
+#include "respa.h"
+#include "update.h"
+#include "variable.h"
+
+#include <cmath>
+#include <cstring>
+
+using namespace LAMMPS_NS;
+using namespace FixConst;
+
+enum { NOBIAS, BIAS };
+enum { CONSTANT, EQUAL, ATOM };
+
+static const char cite_gjf[] =
+  "GJ methods: doi:10.1080/00268976.2019.1662506\n\n"
+  "@Article{gronbech-jensen_complete_2020,\n"
+        "title = {Complete set of stochastic Verlet-type thermostats for correct Langevin simulations},\n"
+        "volume = {118},\n"
+  "number = {8},\n"
+        "url = {https://www.tandfonline.com/doi/full/10.1080/00268976.2019.1662506},\n"
+        "doi = {10.1080/00268976.2019.1662506},\n"
+        "journal = {Molecular Physics},\n"
+        "author = {Grønbech-Jensen, Niels},\n"
+        "year = {2020}\n"
+  "}\n\n";
+
+static const char cite_gjf_7[] =
+  "GJ-VII method: doi:10.1063/5.0066008\n\n"
+  "@Article{finkelstein_2021,\n"
+  "title = {Bringing discrete-time Langevin splitting methods into agreement with thermodynamics},\n"
+  "volume = {155},\n"
+  "number = {18},\n"
+  "url = {https://doi.org/10.1063/5.0066008},\n"
+  "doi = {10.1063/5.0066008},\n"
+  "journal = {J. Chem. Phys.},\n"
+  "author = {Finkelstein, Joshua and Cheng, Chungho and Fiorin, Giacomo and Seibold, Benjamin and Grønbech-Jensen, Niels},\n"
+  "year = {2021},\n"
+  "pages = {184104}\n"
+  "}\n\n";
+
+static const char cite_gjf_8[] =
+  "GJ-VIII method: doi:10.1007/s10955-024-03345-1\n\n"
+  "@Article{gronbech_jensen_2024,\n"
+  "title = {On the Definition of Velocity in Discrete-Time, Stochastic Langevin Simulations},\n"
+  "volume = {191},\n"
+  "number = {10},\n"
+  "url = {https://doi.org/10.1007/s10955-024-03345-1},\n"
+  "doi = {10.1007/s10955-024-03345-1},\n"
+  "journal = {J. Stat. Phys.},\n"
+  "author = {Gronbech-Jensen, Niels},\n"
+  "year = {2024},\n"
+  "pages = {137}\n"
+  "}\n\n";
+
+static const char cite_gjf_vhalf[] =
+  "GJ-I vhalf method: doi:10.1080/00268976.2019.1570369\n\n"
+  "@Article{jensen_accurate_2019,\n"
+        "title = {Accurate configurational and kinetic statistics in discrete-time Langevin systems},\n"
+        "volume = {117},\n"
+        "url = {https://www.tandfonline.com/doi/full/10.1080/00268976.2019.1570369},\n"
+        "doi = {10.1080/00268976.2019.1570369},\n"
+        "number = {18},\n"
+        "journal = {Molecular Physics},\n"
+        "author = {Jensen, Lucas Frese Grønbech and Grønbech-Jensen, Niels},\n"
+        "year = {2019}\n"
+  "}\n\n";
+
+static const char cite_gjf_vfull[] =
+  "GJ-I vfull method: doi:10.1080/00268976.2012.760055\n\n"
+  "@Article{gronbech-jensen_simple_2013,\n"
+  "title = {A simple and effective Verlet-type algorithm for simulating Langevin dynamics},\n"
+  "volume = {111},\n"
+  "url = {http://www.tandfonline.com/doi/abs/10.1080/00268976.2012.760055},\n"
+  "doi = {10.1080/00268976.2012.760055},\n"
+  "pages = {983-991},\n"
+  "number = {8},\n"
+  "journal = {Molecular Physics},\n"
+  "author = {Grønbech-Jensen, Niels and Farago, Oded},\n"
+  "year = {2013}\n"
+  "}\n\n";
+
+/* ---------------------------------------------------------------------- */
+
+FixGJF::FixGJF(LAMMPS *lmp, int narg, char **arg) :
+    Fix(lmp, narg, arg), tstr(nullptr), tforce(nullptr), lv(nullptr), id_temp(nullptr), random(nullptr)
+{
+  if (lmp->citeme) lmp->citeme->add(cite_gjf);
+  if (narg < 7) error->all(FLERR, "Illegal fix gjf command");
+
+  time_integrate = 1;
+  global_freq = 1;
+  nevery = 1;
+
+  if (utils::strmatch(arg[3], "^v_")) {
+    tstr = utils::strdup(arg[3] + 2);
+  } else {
+    t_start = utils::numeric(FLERR, arg[3], false, lmp);
+    t_target = t_start;
+    tstyle = CONSTANT;
+  }
+
+  t_stop = utils::numeric(FLERR, arg[4], false, lmp);
+  t_period = utils::numeric(FLERR, arg[5], false, lmp);
+  seed = utils::inumeric(FLERR, arg[6], false, lmp);
+
+  if (t_period <= 0.0) error->all(FLERR, "Fix gjf period must be > 0.0");
+  if (seed <= 0) error->all(FLERR, "Illegal fix gjf command");
+
+  // initialize Marsaglia RNG with processor-unique seed
+  random = new RanMars(lmp, seed + comm->me);
+
+  int GJmethods = 8; // number of currently implemented GJ methods
+  maxatom = 0;
+
+  // optional args
+  // per default, use half step and GJ-I
+
+  osflag = 0;
+  GJmethod = 1;
+  lv_allocated = 0;
+
+  int iarg = 7;
+  while (iarg < narg) {
+    if (strcmp(arg[iarg], "vel") == 0) {
+      if (iarg + 2 > narg) error->all(FLERR, "Illegal fix gjf command");
+      if (strcmp(arg[iarg + 1], "vfull") == 0) {
+        osflag = 1;
+      } else if (strcmp(arg[iarg + 1], "vhalf") == 0) {
+        osflag = 0;
+      } else
+        error->all(FLERR, "Illegal fix gjf command");
+      iarg += 2;
+    } else if (strcmp(arg[iarg], "method") == 0) {
+      GJmethod = utils::inumeric(FLERR, arg[iarg + 1], false, lmp);
+      if (GJmethod == 7) {
+        if (iarg + 3 > narg) error->all(FLERR, "Illegal fix gjf command for GJ-VII");
+        gjfc2 = utils::numeric(FLERR, arg[iarg + 2], false, lmp);
+        if (gjfc2 < 0 || gjfc2 > 1) error->all(FLERR, "Choice of c2 in GJ-VII must be 0≤c2≤1");
+        iarg += 3;
+        if (lmp->citeme) lmp->citeme->add(cite_gjf_7);
+      }
+      else {
+        if (iarg + 2 > narg) error->all(FLERR, "Illegal fix gjf command");
+        if (GJmethod < 0 || GJmethod > GJmethods) error->all(FLERR, "Invalid GJ method choice in gjf command");
+        if (GJmethod == 8) if (lmp->citeme) lmp->citeme->add(cite_gjf_8);
+        iarg += 2;
+      }
+    } else
+      error->all(FLERR, "Illegal fix gjf command");
+  }
+  if (GJmethod == 1 && osflag == 0) if (lmp->citeme) lmp->citeme->add(cite_gjf_vhalf);
+  if (GJmethod == 1 && osflag == 1) if (lmp->citeme) lmp->citeme->add(cite_gjf_vfull);
+
+  // set temperature = nullptr, user can override via fix_modify if wants bias
+  id_temp = nullptr;
+  temperature = nullptr;
+
+  lv = nullptr;
+  tforce = nullptr;
+
+  // setup atom-based array for lv
+  // register with Atom class
+  // no need to set peratom_flag, b/c data is for internal use only
+
+
+  FixGJF::grow_arrays(atom->nmax);
+  atom->add_callback(Atom::GROW);
+
+  // initialize lv to onsite velocity
+  int nlocal = atom->nlocal;
+  for (int i = 0; i < nlocal; i++) {
+    lv[i][0] = 0.0;
+    lv[i][1] = 0.0;
+    lv[i][2] = 0.0;
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+FixGJF::~FixGJF()
+{
+  if (copymode) return;
+
+  delete random;
+  delete[] tstr;
+  delete[] id_temp;
+  memory->destroy(tforce);
+
+  memory->destroy(lv);
+  if (modify->get_fix_by_id(id)) atom->delete_callback(id, Atom::GROW);
+}
+
+/* ---------------------------------------------------------------------- */
+
+int FixGJF::setmask()
+{
+  int mask = 0;
+  mask |= INITIAL_INTEGRATE;
+  mask |= FINAL_INTEGRATE;
+  if (!osflag) mask |= END_OF_STEP;
+  return mask;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixGJF::init()
+{
+  if (id_temp) {
+    temperature = modify->get_compute_by_id(id_temp);
+    if (!temperature) {
+      error->all(FLERR, "Temperature compute ID {} for fix {} does not exist", id_temp, style);
+    } else {
+      if (temperature->tempflag == 0)
+        error->all(FLERR, "Compute ID {} for fix {} does not compute temperature", id_temp, style);
+    }
+  }
+  // check variable
+
+  if (tstr) {
+    tvar = input->variable->find(tstr);
+    if (tvar < 0) error->all(FLERR, "Variable name {} for fix gjf does not exist", tstr);
+    if (input->variable->equalstyle(tvar))
+      tstyle = EQUAL;
+    else if (input->variable->atomstyle(tvar))
+      tstyle = ATOM;
+    else
+      error->all(FLERR, "Variable {} for fix gjf is invalid style", tstr);
+  }
+
+  if (utils::strmatch(update->integrate_style, "^respa")) {
+    error->all(FLERR, "Fix gjf and run style respa are not compatible");
+  }
+
+  if (temperature && temperature->tempbias)
+    tbiasflag = BIAS;
+  else
+    tbiasflag = NOBIAS;
+
+  // Complete set of thermostats is given in Gronbech-Jensen, Molecular Physics, 118 (2020)
+  switch (GJmethod) {
+    case 1:
+      gjfc2 = (1.0 - update->dt / 2.0 / t_period) / (1.0 + update->dt / 2.0 / t_period);
+      break;
+    case 2:
+      gjfc2 = exp(-update->dt / t_period);
+      break;
+    case 3:
+      gjfc2 = 1.0 - update->dt / t_period;
+      break;
+    case 4:
+      gjfc2 = ( sqrt(1.0 + 4.0 * (update->dt / t_period) ) - 1.0 ) / ( 2.0 * update->dt / t_period );
+      break;
+    case 5:
+      gjfc2 = 1.0 / (1.0 + update->dt / t_period);
+      break;
+    case 6:
+      gjfc2 = (1.0 / (1.0 + update->dt / 2.0 / t_period)) * (1.0 / (1.0 + update->dt / 2.0 / t_period));
+      break;
+    case 7: // provided in Finkelstein (2021)
+      update->dt = (1.0 + gjfc2) / (1.0 - gjfc2) * log(gjfc2) * log(gjfc2) * 0.5 * t_period;
+      break;
+    case 8: // provided in Gronbech-Jensen (2024)
+      gjfc2 = sqrt( (update->dt / t_period) * (update->dt / t_period) + 1.0 ) - update->dt / t_period;
+      break;
+    case 0:
+      gjfc2 = 0.0;
+      break;
+    default:
+      error->all(FLERR, "Fix gjf method not found");
+      break;
+  }
+  gjfc1 = (1.0 + gjfc2) / 2.0;
+  gjfc3 = (1.0 - gjfc2) * t_period / update->dt;
+}
+
+/* ----------------------------------------------------------------------
+  integrate position and velocity according to the GJ methods
+  in Grønbech-Jensen, J Stat Phys 191, 137 (2024). The general workflow is
+    1. GJ Initial Integration
+    2. Force Update
+    3. GJ Final Integration
+    4. Velocity Choice in end_of_step()
+------------------------------------------------------------------------- */
+
+void FixGJF::initial_integrate(int /* vflag */)
+{
+  // This function provides the integration of the GJ formulation 24 a-e
+  double **x = atom->x;
+  double **v = atom->v;
+  double **f = atom->f;
+  double *mass = atom->mass;
+  double *rmass = atom->rmass;
+  int *type = atom->type;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+
+  double fran[3];
+
+  double boltz = force->boltz;
+  double dt = update->dt;
+  double mvv2e = force->mvv2e;
+  double ftm2v = force->ftm2v;
+
+  double dtf = 0.5 * dt * ftm2v;
+  double dtfm;
+  double c1sqrt = sqrt(gjfc1);
+  double c3sqrt = sqrt(gjfc3);
+  double csq = sqrt(gjfc3 / gjfc1);
+  double m, beta;
+
+  // If user elected vhalf, v needs to be reassigned to onsite velocity for integration
+  if (!osflag && lv_allocated) {
+    for (int i = 0; i < nlocal; i++)
+      if (mask[i] & groupbit) {
+        // lv is Eq. 24f from previous time step
+        v[i][0] = lv[i][0];
+        v[i][1] = lv[i][1];
+        v[i][2] = lv[i][2];
+      }
+  }
+
+  compute_target();
+  if (tbiasflag) temperature->compute_scalar();
+
+  if (rmass) {
+    for (int i = 0; i < nlocal; i++) {
+      if (mask[i] & groupbit) {
+        if (tstyle == ATOM) tsqrt = sqrt(tforce[i]);
+        m = rmass[i];
+        beta = tsqrt * sqrt(2.0*dt*m*boltz/t_period/mvv2e) / ftm2v;
+
+        fran[0] = beta*random->gaussian();
+        fran[1] = beta*random->gaussian();
+        fran[2] = beta*random->gaussian();
+
+        // First integration delivers Eq. 24a and 24b:
+        dtfm = dtf / m;
+        v[i][0] += csq * dtfm * f[i][0];
+        v[i][1] += csq * dtfm * f[i][1];
+        v[i][2] += csq * dtfm * f[i][2];
+        x[i][0] += 0.5 * csq * dt * v[i][0];
+        x[i][1] += 0.5 * csq * dt * v[i][1];
+        x[i][2] += 0.5 * csq * dt * v[i][2];
+
+        if (tbiasflag) temperature->remove_bias(i, v[i]);
+
+        // Calculate Eq. 24c:
+        lv[i][0] = c1sqrt*v[i][0] + ftm2v * (c3sqrt / (2.0 * m)) * fran[0];
+        lv[i][1] = c1sqrt*v[i][1] + ftm2v * (c3sqrt / (2.0 * m)) * fran[1];
+        lv[i][2] = c1sqrt*v[i][2] + ftm2v * (c3sqrt / (2.0 * m)) * fran[2];
+
+        // Calculate Eq. 24d
+        v[i][0] = (gjfc2 / c1sqrt) * lv[i][0] + ftm2v * csq * (0.5 / m) * fran[0];
+        v[i][1] = (gjfc2 / c1sqrt) * lv[i][1] + ftm2v * csq * (0.5 / m) * fran[1];
+        v[i][2] = (gjfc2 / c1sqrt) * lv[i][2] + ftm2v * csq * (0.5 / m) * fran[2];
+
+        if (tbiasflag) temperature->restore_bias(i, v[i]);
+        if (tbiasflag) temperature->restore_bias(i, lv[i]);
+
+        // Calculate Eq. 24e. Final integrator then calculates Eq. 24f after force update.
+        x[i][0] += 0.5 * csq * dt * v[i][0];
+        x[i][1] += 0.5 * csq * dt * v[i][1];
+        x[i][2] += 0.5 * csq * dt * v[i][2];
+      }
+    }
+  } else {
+    for (int i = 0; i < nlocal; i++) {
+      if (mask[i] & groupbit) {
+        if (tstyle == ATOM) tsqrt = sqrt(tforce[i]);
+        m = mass[type[i]];
+        beta = tsqrt * sqrt(2.0*dt*m*boltz/t_period/mvv2e) / ftm2v;
+
+        fran[0] = beta*random->gaussian();
+        fran[1] = beta*random->gaussian();
+        fran[2] = beta*random->gaussian();
+
+        // First integration delivers Eq. 24a and 24b:
+        dtfm = dtf / m;
+        v[i][0] += csq * dtfm * f[i][0];
+        v[i][1] += csq * dtfm * f[i][1];
+        v[i][2] += csq * dtfm * f[i][2];
+        x[i][0] += 0.5 * csq * dt * v[i][0];
+        x[i][1] += 0.5 * csq * dt * v[i][1];
+        x[i][2] += 0.5 * csq * dt * v[i][2];
+
+        if (tbiasflag) temperature->remove_bias(i, v[i]);
+
+        // Calculate Eq. 24c:
+        lv[i][0] = c1sqrt*v[i][0] + ftm2v * (c3sqrt / (2.0 * m)) * fran[0];
+        lv[i][1] = c1sqrt*v[i][1] + ftm2v * (c3sqrt / (2.0 * m)) * fran[1];
+        lv[i][2] = c1sqrt*v[i][2] + ftm2v * (c3sqrt / (2.0 * m)) * fran[2];
+
+        // Calculate Eq. 24d
+        v[i][0] = (gjfc2 / c1sqrt) * lv[i][0] + ftm2v * csq * (0.5 / m) * fran[0];
+        v[i][1] = (gjfc2 / c1sqrt) * lv[i][1] + ftm2v * csq * (0.5 / m) * fran[1];
+        v[i][2] = (gjfc2 / c1sqrt) * lv[i][2] + ftm2v * csq * (0.5 / m) * fran[2];
+
+        if (tbiasflag) temperature->restore_bias(i, v[i]);
+        if (tbiasflag) temperature->restore_bias(i, lv[i]);
+
+        // Calculate Eq. 24e. Final integrator then calculates Eq. 24f after force update.
+        x[i][0] += 0.5 * csq * dt * v[i][0];
+        x[i][1] += 0.5 * csq * dt * v[i][1];
+        x[i][2] += 0.5 * csq * dt * v[i][2];
+      }
+    }
+  }
+}
+
+void FixGJF::final_integrate()
+{
+  double **v = atom->v;
+  double **f = atom->f;
+  double *rmass = atom->rmass;
+  double *mass = atom->mass;
+  int *type = atom->type;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  if (igroup == atom->firstgroup) nlocal = atom->nfirst;
+
+  double dtfm;
+  double dtf = 0.5 * update->dt * force->ftm2v;
+  double csq = sqrt(gjfc3 / gjfc1);
+
+  // Calculate Eq. 24f.
+  if (rmass) {
+    for (int i = 0; i < nlocal; i++)
+      if (mask[i] & groupbit) {
+        dtfm = dtf / rmass[i];
+        v[i][0] += csq * dtfm * f[i][0];
+        v[i][1] += csq * dtfm * f[i][1];
+        v[i][2] += csq * dtfm * f[i][2];
+      }
+
+  } else {
+    for (int i = 0; i < nlocal; i++)
+      if (mask[i] & groupbit) {
+        dtfm = dtf / mass[type[i]];
+        v[i][0] += csq * dtfm * f[i][0];
+        v[i][1] += csq * dtfm * f[i][1];
+        v[i][2] += csq * dtfm * f[i][2];
+      }
+  }
+
+  lv_allocated = 1;
+}
+
+/* ----------------------------------------------------------------------
+   set current t_target and t_sqrt
+------------------------------------------------------------------------- */
+
+void FixGJF::compute_target()
+{
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+
+  double delta = update->ntimestep - update->beginstep;
+  if (delta != 0.0) delta /= update->endstep - update->beginstep;
+
+  // if variable temp, evaluate variable, wrap with clear/add
+  // reallocate tforce array if necessary
+
+  if (tstyle == CONSTANT) {
+    t_target = t_start + delta * (t_stop-t_start);
+    tsqrt = sqrt(t_target);
+  } else {
+    modify->clearstep_compute();
+    if (tstyle == EQUAL) {
+      t_target = input->variable->compute_equal(tvar);
+      if (t_target < 0.0)
+        error->one(FLERR, "Fix gjf variable returned negative temperature");
+      tsqrt = sqrt(t_target);
+    } else {
+      if (atom->nmax > maxatom) {
+        maxatom = atom->nmax;
+        memory->destroy(tforce);
+        memory->create(tforce,maxatom,"gjf:tforce");
+      }
+      input->variable->compute_atom(tvar,igroup,tforce,1,0);
+      for (int i = 0; i < nlocal; i++)
+        if (mask[i] & groupbit)
+            if (tforce[i] < 0.0)
+              error->one(FLERR, "Fix gjf variable returned negative temperature");
+    }
+    modify->addstep_compute(update->ntimestep + 1);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   select velocity for GJ
+------------------------------------------------------------------------- */
+
+void FixGJF::end_of_step()
+{
+  double **v = atom->v;
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+
+  // After the final integrator delivers 24f, either the on-site or half-step
+  // velocity is used in remaining simulation tasks, depending on user input
+  double tmp[3];
+  for (int i = 0; i < nlocal; i++)
+    if (mask[i] & groupbit) {
+      // v is Eq. 24f
+      tmp[0] = v[i][0];
+      tmp[1] = v[i][1];
+      tmp[2] = v[i][2];
+      // Move on with half-step velocity
+      v[i][0] = lv[i][0];
+      v[i][1] = lv[i][1];
+      v[i][2] = lv[i][2];
+      // store Eq. 24f in lv for next timestep
+      lv[i][0] = tmp[0];
+      lv[i][1] = tmp[1];
+      lv[i][2] = tmp[2];
+    }
+}
+
+// clang-format on
+/* ---------------------------------------------------------------------- */
+
+void FixGJF::reset_target(double t_new)
+{
+  t_target = t_start = t_stop = t_new;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixGJF::reset_dt()
+{
+  // Complete set of thermostats is given in Gronbech-Jensen, Molecular Physics, 118 (2020)
+  switch (GJmethod) {
+    case 1:
+      gjfc2 = (1.0 - update->dt / 2.0 / t_period) / (1.0 + update->dt / 2.0 / t_period);
+      break;
+    case 2:
+      gjfc2 = exp(-update->dt / t_period);
+      break;
+    case 3:
+      gjfc2 = 1.0 - update->dt / t_period;
+      break;
+    case 4:
+      gjfc2 = ( sqrt(1.0 + 4.0 * (update->dt / t_period) ) - 1.0 ) / ( 2.0 * update->dt / t_period );
+      break;
+    case 5:
+      gjfc2 = 1.0 / (1.0 + update->dt / t_period);
+      break;
+    case 6:
+      gjfc2 = (1.0 / (1.0 + update->dt / 2.0 / t_period)) * (1.0 / (1.0 + update->dt / 2.0 / t_period));
+      break;
+    case 7: // provided in Finkelstein (2021)
+      update->dt = (1.0 + gjfc2) / (1.0 - gjfc2) * log(gjfc2) * log(gjfc2) * 0.5 * t_period;
+      break;
+    case 8: // provided in Gronbech-Jensen (2024)
+      gjfc2 = sqrt( (update->dt / t_period)*(update->dt / t_period) + 1.0 ) - update->dt / t_period;
+      break;
+    case 0:
+      gjfc2 = 0.0;
+      break;
+    default:
+      error->all(FLERR, "Fix gjf method not found");
+      break;
+  }
+  gjfc1 = (1.0 + gjfc2) / 2.0;
+  gjfc3 = (1.0 - gjfc2) * t_period / update->dt;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int FixGJF::modify_param(int narg, char **arg)
+{
+  if (strcmp(arg[0], "temp") == 0) {
+    if (narg < 2) utils::missing_cmd_args(FLERR, "fix_modify", error);
+    delete[] id_temp;
+    id_temp = utils::strdup(arg[1]);
+    temperature = modify->get_compute_by_id(id_temp);
+    if (!temperature)
+      error->all(FLERR, "Could not find fix_modify temperature compute ID: {}", id_temp);
+
+    if (temperature->tempflag == 0)
+      error->all(FLERR, "Fix_modify temperature compute {} does not compute temperature", id_temp);
+    if (temperature->igroup != igroup && comm->me == 0)
+      error->warning(FLERR, "Group for fix_modify temp != fix group: {} vs {}",
+                     group->names[igroup], group->names[temperature->igroup]);
+    return 2;
+  }
+  return 0;
+}
+
+/* ----------------------------------------------------------------------
+   extract thermostat properties
+------------------------------------------------------------------------- */
+
+void *FixGJF::extract(const char *str, int &dim)
+{
+  dim = 0;
+  if (strcmp(str, "t_target") == 0) { return &t_target; }
+  return nullptr;
+}
+
+/* ----------------------------------------------------------------------
+   memory usage of tally array
+------------------------------------------------------------------------- */
+
+double FixGJF::memory_usage()
+{
+  double bytes = 0.0;
+  bytes += (double) atom->nmax * 3 * sizeof(double);
+  if (tforce) bytes += (double) atom->nmax * sizeof(double);
+  return bytes;
+}
+
+/* ----------------------------------------------------------------------
+   allocate atom-based array for lv
+------------------------------------------------------------------------- */
+
+void FixGJF::grow_arrays(int nmax)
+{
+  memory->grow(lv, nmax, 3, "fix_gjf:lv");
+}
+
+/* ----------------------------------------------------------------------
+   copy values within local atom-based array
+------------------------------------------------------------------------- */
+
+void FixGJF::copy_arrays(int i, int j, int /*delflag*/)
+{
+  lv[j][0] = lv[i][0];
+  lv[j][1] = lv[i][1];
+  lv[j][2] = lv[i][2];
+}
+
+/* ----------------------------------------------------------------------
+   pack values in local atom-based array for exchange with another proc
+------------------------------------------------------------------------- */
+
+int FixGJF::pack_exchange(int i, double *buf)
+{
+  int n = 0;
+  buf[n++] = lv[i][0];
+  buf[n++] = lv[i][1];
+  buf[n++] = lv[i][2];
+  return n;
+}
+
+/* ----------------------------------------------------------------------
+   unpack values in local atom-based array from exchange with another proc
+------------------------------------------------------------------------- */
+
+int FixGJF::unpack_exchange(int nlocal, double *buf)
+{
+  int n = 0;
+  lv[nlocal][0] = buf[n++];
+  lv[nlocal][1] = buf[n++];
+  lv[nlocal][2] = buf[n++];
+  return n;
+}
diff --git a/src/EXTRA-FIX/fix_gjf.h b/src/EXTRA-FIX/fix_gjf.h
new file mode 100644
index 0000000000..154f6543da
--- /dev/null
+++ b/src/EXTRA-FIX/fix_gjf.h
@@ -0,0 +1,68 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+// clang-format off
+FixStyle(gjf,FixGJF);
+// clang-format on
+#else
+
+#ifndef LMP_FIX_GJF_H
+#define LMP_FIX_GJF_H
+
+#include "fix.h"
+
+namespace LAMMPS_NS {
+
+class FixGJF : public Fix {
+ public:
+  FixGJF(class LAMMPS *, int, char **);
+  ~FixGJF() override;
+  int setmask() override;
+  void init() override;
+  void initial_integrate(int) override;
+  void final_integrate() override;
+  void end_of_step() override;
+  void reset_target(double) override;
+  void reset_dt() override;
+  int modify_param(int, char **) override;
+  double memory_usage() override;
+  void *extract(const char *, int &) override;
+  void grow_arrays(int) override;
+  void copy_arrays(int, int, int) override;
+  int pack_exchange(int, double *) override;
+  int unpack_exchange(int, double *) override;
+
+ protected:
+  int osflag, tbiasflag, GJmethod, maxatom, lv_allocated;
+  double t_start, t_stop, t_period, t_target, tsqrt;
+  double gjfc1, gjfc2, gjfc3;
+  int tstyle, tvar;
+  char *tstr;
+
+  double *tforce;
+  double **lv;    //half step velocity
+
+  char *id_temp;
+  class Compute *temperature;
+
+  class RanMars *random;
+  int seed;
+
+  void compute_target();
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/GRANULAR/fix_add_heat.cpp b/src/GRANULAR/fix_add_heat.cpp
index 73bf8a9a26..330723700b 100644
--- a/src/GRANULAR/fix_add_heat.cpp
+++ b/src/GRANULAR/fix_add_heat.cpp
@@ -20,11 +20,13 @@
 #include "atom.h"
 #include "error.h"
 #include "input.h"
+#include "math_special.h"
 #include "memory.h"
 #include "variable.h"
 
 using namespace LAMMPS_NS;
 using namespace FixConst;
+using MathSpecial::powint;
 
 enum { CONSTANT, EQUAL, ATOM };
 enum { ADD, LINEAR, QUARTIC };
@@ -152,7 +154,7 @@ void FixAddHeat::post_force(int /*vflag*/)
       } else if (style == LINEAR) {
         heatflow[i] += prefactor * (vtmp - temperature[i]);
       } else if (style == QUARTIC) {
-        heatflow[i] += prefactor * (pow(vtmp, 4.0) - pow(temperature[i], 4.0));
+        heatflow[i] += prefactor * (powint(vtmp, 4) - powint(temperature[i], 4));
       }
     }
   }
diff --git a/src/GRANULAR/fix_granular_mdr.cpp b/src/GRANULAR/fix_granular_mdr.cpp
index 9efabdf465..0416edc432 100644
--- a/src/GRANULAR/fix_granular_mdr.cpp
+++ b/src/GRANULAR/fix_granular_mdr.cpp
@@ -29,6 +29,7 @@
 #include "granular_model.h"
 #include "input.h"
 #include "math_const.h"
+#include "math_special.h"
 #include "memory.h"
 #include "modify.h"
 #include "neigh_list.h"
@@ -43,9 +44,10 @@ using namespace Granular_NS;
 using namespace Granular_MDR_NS;
 using namespace FixConst;
 using MathConst::MY_PI;
+using MathSpecial::cube;
 
 static constexpr double EPSILON = 1e-16;
-static constexpr double OVERLAP_LIMIT = 0.75;
+static constexpr double OVERLAP_LIMIT = 0.95;
 
 enum { COMM_1, COMM_2 };
 
@@ -85,7 +87,7 @@ void FixGranularMDR::post_constructor()
   modify->add_fix(
       fmt::format("{} all property/atom d_Ro d_Vcaps d_Vgeo d_Velas d_eps_bar d_dRnumerator "
                   "d_dRdenominator d_Acon0 d_Acon1 d_Atot d_Atot_sum d_ddelta_bar d_psi "
-                  "d_history_setup_flag d_sigmaxx d_sigmayy d_sigmazz ghost yes",
+                  "d_history_setup_flag d_sigmaxx d_sigmayy d_sigmazz d_dRavg ghost yes",
                   id_fix));
 
   index_Ro = atom->find_custom("Ro", tmp1, tmp2);
@@ -105,6 +107,7 @@ void FixGranularMDR::post_constructor()
   index_sigmaxx = atom->find_custom("sigmaxx", tmp1, tmp2);
   index_sigmayy = atom->find_custom("sigmayy", tmp1, tmp2);
   index_sigmazz = atom->find_custom("sigmazz", tmp1, tmp2);
+  index_dRavg = atom->find_custom("dRavg", tmp1, tmp2);
 }
 
 /* ---------------------------------------------------------------------- */
@@ -153,31 +156,31 @@ void FixGranularMDR::setup_pre_force(int /*vflag*/)
 
     norm_model2 = dynamic_cast<GranSubModNormalMDR *>(fix->model->normal_model);
 
-    if (norm_model && norm_model2 && (norm_model->E != norm_model2->E))
+    if (norm_model && norm_model2 && fabs(norm_model->get_emod() - norm_model2->get_emod()) > EPSILON)
       error->all(
           FLERR, Error::NOLASTLINE,
           "Young's modulus in pair style, {}, does not agree with value {} in fix gran/wall/region",
-          norm_model->E, norm_model2->E);
-    if (norm_model->nu != norm_model2->nu)
+          norm_model->get_emod(), norm_model2->get_emod());
+    if (fabs(norm_model->get_poiss() - norm_model2->get_poiss()) > EPSILON)
       error->all(
           FLERR, Error::NOLASTLINE,
           "Poisson's ratio in pair style, {}, does not agree with value {} in fix gran/wall/region",
-          norm_model->nu, norm_model2->nu);
-    if (norm_model->Y != norm_model2->Y)
+          norm_model->get_poiss(), norm_model2->get_poiss());
+    if (fabs(norm_model->Y - norm_model2->Y) > EPSILON)
       error->all(
           FLERR, Error::NOLASTLINE,
           "Yield stress in pair style, {}, does not agree with value {} in fix gran/wall/region",
           norm_model->Y, norm_model2->Y);
-    if (norm_model->psi_b != norm_model2->psi_b)
+    if (fabs(norm_model->psi_b - norm_model2->psi_b) > EPSILON)
       error->all(FLERR, Error::NOLASTLINE,
                  "Bulk response trigger in pair style, {}, does not agree with value {} in fix "
                  "gran/wall/region",
                  norm_model->psi_b, norm_model2->psi_b);
-    if (norm_model->CoR != norm_model2->CoR)
+    if (fabs(norm_model->get_damp() - norm_model2->get_damp()) > EPSILON)
       error->all(FLERR, Error::NOLASTLINE,
-                 "Coefficient of restitution in pair style, {}, does not agree with value {} in "
+                 "Damping in pair style, {}, does not agree with value {} in "
                  "fix gran/wall/region",
-                 norm_model->CoR, norm_model2->CoR);
+                 norm_model->get_damp(), norm_model2->get_damp());
   }
 
   fix_history = dynamic_cast<FixNeighHistory *>(modify->get_fix_by_id("NEIGH_HISTORY_GRANULAR"));
@@ -208,6 +211,7 @@ void FixGranularMDR::pre_force(int)
   double *sigmayy = atom->dvector[index_sigmayy];
   double *sigmazz = atom->dvector[index_sigmazz];
   double *history_setup_flag = atom->dvector[index_history_setup_flag];
+  double *dRavg = atom->dvector[index_dRavg];
 
   int new_atom;
   int nlocal = atom->nlocal;
@@ -241,17 +245,24 @@ void FixGranularMDR::pre_force(int)
     if (update->setupflag && (!new_atom)) continue;
 
     const double R = radius[i];
-    const double Vo = 4.0 / 3.0 * MY_PI * pow(Ro[i], 3.0);
-    const double Vgeoi = 4.0 / 3.0 * MY_PI * pow(R, 3.0) - Vcaps[i];
+    const double Rsq = R * R;
+    const double Vo = 4.0 / 3.0 * MY_PI * cube(Ro[i]);
+    const double Vgeoi = 4.0 / 3.0 * MY_PI * Rsq * R - Vcaps[i];
 
     Vgeo[i] = MIN(Vgeoi, Vo);
     Velas[i] = Vo * (1.0 + eps_bar[i]);
-    Atot[i] = 4.0 * MY_PI * pow(R, 2.0) + Atot_sum[i];
+    Atot[i] = 4.0 * MY_PI * Rsq + Atot_sum[i];
     psi[i] = (Atot[i] - Acon1[i]) / Atot[i];
 
     if (psi_b_coeff < psi[i]) {
-      const double dR = MAX(dRnumerator[i] / (dRdenominator[i] - 4.0 * MY_PI * pow(R, 2.0)), 0.0);
-      if ((radius[i] + dR) < (1.5 * Ro[i])) radius[i] += dR;
+      double w_confinement;
+      ( psi[i] > 0.1 ) ? w_confinement = 1.0 / (1.0 + exp(-75.0 * (psi[i] - 0.2))) : w_confinement = 0.0;
+      const double dR = MAX(dRnumerator[i] / (dRdenominator[i] - 4.0 * MY_PI * Rsq) * w_confinement, 0.0);
+
+      const double N_window = 10.0;
+      if (dR > 0.0) dRavg[i] += (dR - dRavg[i]) / N_window;
+
+      if (((radius[i] + dR) < (1.5 * Ro[i])) && (dR > 0.0)) radius[i] += dRavg[i];
     }
     Acon0[i] = Acon1[i];
   }
diff --git a/src/GRANULAR/fix_granular_mdr.h b/src/GRANULAR/fix_granular_mdr.h
index f0ba76d155..fce727fe5d 100644
--- a/src/GRANULAR/fix_granular_mdr.h
+++ b/src/GRANULAR/fix_granular_mdr.h
@@ -51,7 +51,8 @@ namespace Granular_MDR_NS {
     PENALTY,           // contact penalty
     DELTA_MAX,
     DELTAP_0,
-    DELTAP_1
+    DELTAP_1,
+    DAMP_SCALE
   };
 
 }    // namespace Granular_MDR_NS
@@ -97,8 +98,7 @@ class FixGranularMDR : public Fix {
   int index_sigmayy;            // yy-component of the stress tensor, not necessary forforce calculation
   int index_sigmazz;            // zz-component of the stress tensor, not necessary forforce calculation
   int index_history_setup_flag; // flag to check if history variables have beeninitialized
-  int index_contacts;           // total contacts on particle
-  int index_adhesive_length;    // total length of adhesive contact on a particle
+  int index_dRavg;              // average radius update increment
 };
 
 }    // namespace LAMMPS_NS
diff --git a/src/GRANULAR/gran_sub_mod_damping.cpp b/src/GRANULAR/gran_sub_mod_damping.cpp
index 2fdd7e1f82..f85b866f8d 100644
--- a/src/GRANULAR/gran_sub_mod_damping.cpp
+++ b/src/GRANULAR/gran_sub_mod_damping.cpp
@@ -13,11 +13,14 @@
 
 #include "gran_sub_mod_damping.h"
 
+#include "error.h"
 #include "gran_sub_mod_normal.h"
+#include "fix_granular_mdr.h"
 #include "granular_model.h"
 #include "math_special.h"
 #include "math_const.h"
 
+#include "style_gran_sub_mod.h"    // IWYU pragma: keep
 #include <cmath>
 
 using namespace LAMMPS_NS;
@@ -41,6 +44,9 @@ GranSubModDamping::GranSubModDamping(GranularModel *gm, LAMMPS *lmp) : GranSubMo
 
 void GranSubModDamping::init()
 {
+  if (gm->normal_model->name == "mdr")
+    error->all(FLERR, "Only damping mdr may be used with the mdr normal model");
+
   damp = gm->normal_model->get_damp();
 }
 
@@ -127,6 +133,9 @@ GranSubModDampingTsuji::GranSubModDampingTsuji(GranularModel *gm, LAMMPS *lmp) :
 
 void GranSubModDampingTsuji::init()
 {
+  if (gm->normal_model->name == "mdr")
+    error->all(FLERR, "Only damping mdr may be used with the mdr normal model");
+
   double tmp = gm->normal_model->get_damp();
   damp = 1.2728 - 4.2783 * tmp + 11.087 * square(tmp);
   damp += -22.348 * cube(tmp) + 27.467 * powint(tmp, 4);
@@ -160,6 +169,9 @@ GranSubModDampingCoeffRestitution::GranSubModDampingCoeffRestitution(GranularMod
 
 void GranSubModDampingCoeffRestitution::init()
 {
+  if (gm->normal_model->name == "mdr")
+    error->all(FLERR, "Only damping mdr may be used with the mdr normal model");
+
   // Calculate prefactor, assume Hertzian as default
   double cor = gm->normal_model->get_damp();
   double logcor = log(cor);
@@ -170,3 +182,48 @@ void GranSubModDampingCoeffRestitution::init()
     damp /= sqrt(MY_PI * MY_PI + logcor * logcor);
   }
 }
+
+/* ----------------------------------------------------------------------
+   MDR damping
+------------------------------------------------------------------------- */
+
+GranSubModDampingMDR::GranSubModDampingMDR(GranularModel *gm, LAMMPS *lmp) :
+    GranSubModDamping(gm, lmp)
+{
+  num_coeffs = 1;
+}
+
+void GranSubModDampingMDR::coeffs_to_local()
+{
+  damp_type = coeffs[0]; // damping type 1 = mdr stiffness or 2 = velocity
+  if (damp_type != 1 && damp_type != 2)
+    error->all(FLERR, "Illegal MDR damping model, damping type must an integer equal to 1 or 2");
+}
+
+/* ---------------------------------------------------------------------- */
+
+void GranSubModDampingMDR::init()
+{
+  if (gm->normal_model->name != "mdr")
+    error->all(FLERR, "Damping mdr can only be used with mdr normal model");
+
+  damp = gm->normal_model->get_damp();
+}
+
+/* ---------------------------------------------------------------------- */
+
+double GranSubModDampingMDR::calculate_forces()
+{
+  using namespace Granular_MDR_NS;
+  double *history = & gm->history[gm->normal_model->history_index];
+  if (damp_type == 1) {
+    damp_prefactor = damp * history[DAMP_SCALE];
+  } else if (damp_type == 2) {
+    if (history[DAMP_SCALE] == 0.0) {
+      damp_prefactor = 0.0;
+    } else {
+      damp_prefactor = damp;
+    }
+  }
+  return -damp_prefactor * gm->vnnr;
+}
diff --git a/src/GRANULAR/gran_sub_mod_damping.h b/src/GRANULAR/gran_sub_mod_damping.h
index 98c31d680a..ac1363f64f 100644
--- a/src/GRANULAR/gran_sub_mod_damping.h
+++ b/src/GRANULAR/gran_sub_mod_damping.h
@@ -19,6 +19,7 @@ GranSubModStyle(mass_velocity,GranSubModDampingMassVelocity,DAMPING);
 GranSubModStyle(viscoelastic,GranSubModDampingViscoelastic,DAMPING);
 GranSubModStyle(tsuji,GranSubModDampingTsuji,DAMPING);
 GranSubModStyle(coeff_restitution,GranSubModDampingCoeffRestitution,DAMPING);
+GranSubModStyle(mdr,GranSubModDampingMDR,DAMPING);
 // clang-format on
 #else
 
@@ -48,7 +49,6 @@ namespace Granular_NS {
   class GranSubModDampingNone : public GranSubModDamping {
    public:
     GranSubModDampingNone(class GranularModel *, class LAMMPS *);
-    void init() override{};
     double calculate_forces() override;
   };
 
@@ -95,6 +95,19 @@ namespace Granular_NS {
 
   /* ---------------------------------------------------------------------- */
 
+  class GranSubModDampingMDR : public GranSubModDamping {
+   public:
+    GranSubModDampingMDR(class GranularModel *, class LAMMPS *);
+    void coeffs_to_local() override;
+    void init() override;
+    double calculate_forces() override;
+
+   protected:
+    int damp_type;
+  };
+
+  /* ---------------------------------------------------------------------- */
+
 }    // namespace Granular_NS
 }    // namespace LAMMPS_NS
 
diff --git a/src/GRANULAR/gran_sub_mod_normal.cpp b/src/GRANULAR/gran_sub_mod_normal.cpp
index 226d26d86b..a4e2aecdc4 100644
--- a/src/GRANULAR/gran_sub_mod_normal.cpp
+++ b/src/GRANULAR/gran_sub_mod_normal.cpp
@@ -19,16 +19,18 @@
 #include "fix_granular_mdr.h"
 #include "granular_model.h"
 #include "math_const.h"
+#include "math_special.h"
 #include "modify.h"
 #include "update.h"
 
 #include <cmath>
-#include <iomanip>
-#include <sstream>
 
 using namespace LAMMPS_NS;
 using namespace Granular_NS;
 using namespace MathConst;
+using MathSpecial::square;
+using MathSpecial::cube;
+using MathSpecial::powint;
 
 static constexpr double PISQ = 9.8696044010893579923;            // PI^2
 static constexpr double PIINV = 0.318309886183790691216;         // 1/PI
@@ -47,7 +49,7 @@ static constexpr int MDR_MAX_IT = 100;                           // Newton-Raphs
 static constexpr double MDR_EPSILON1 = 1e-10;                    // Newton-Raphson for MDR
 static constexpr double MDR_EPSILON2 = 1e-16;                    // Newton-Raphson for MDR
 static constexpr double MDR_EPSILON3 = 1e-20;                    // For precision checks
-static constexpr double MDR_OVERLAP_LIMIT = 0.75;                // Maximum contact overlap for MDR
+static constexpr double MDR_OVERLAP_LIMIT = 0.95;                // Maximum contact overlap for MDR
 
 static const char cite_mdr[] =
     "MDR contact model command: (i) https://doi.org/10.1016/j.jmps.2023.105492 || (ii) https://doi.org/10.1016/j.jmps.2023.105493 || (iii) https://doi.org/10.31224/4289\n\n"
@@ -73,8 +75,9 @@ static const char cite_mdr[] =
     " author =  {Zunker, William and Dunatunga, Sachith and Thakur, Subhash and Tang, Pingjun and Kamrin, Ken},\n"
     " title =   {Experimentally validated DEM for large deformation powder compaction:\n"
     "            mechanically-derived contact model and screening of non-physical contacts},\n"
+    " journal = {Powder Technology},\n"
     " year =    {2025},\n"
-    " journal = {engrXiv},\n"
+    " pages =   {120972},\n"
     "}\n\n";
 
 /* ----------------------------------------------------------------------
@@ -442,9 +445,10 @@ GranSubModNormalMDR::GranSubModNormalMDR(GranularModel *gm, LAMMPS *lmp) :
 
   num_coeffs = 6;
   contact_radius_flag = 1;
-  size_history = 26;
+  size_history = 27;
   nsvector = 1;
   fix_mdr_flag = 0;
+  material_properties = 1;
   id_fix = nullptr;
 
   nondefault_history_transfer = 1;
@@ -467,29 +471,30 @@ GranSubModNormalMDR::~GranSubModNormalMDR()
 
 void GranSubModNormalMDR::coeffs_to_local()
 {
-  E = coeffs[0];      // Young's modulus
-  nu = coeffs[1];     // Poisson's ratio
-  Y = coeffs[2];      // yield stress
-  gamma = coeffs[3];  // effective surface energy
-  psi_b = coeffs[4];  // bulk response trigger based on ratio of remaining free area: A_{free}/A_{total}
-  CoR = coeffs[5];    // coefficent of restitution
+  Emod = coeffs[0];      // Young's modulus
+  poiss = coeffs[1];     // Poisson's ratio
+  Y = coeffs[2];         // yield stress
+  gamma = coeffs[3];     // effective surface energy
+  psi_b = coeffs[4];     // bulk response trigger based on ratio of remaining free area: A_{free}/A_{total}
+  damp = coeffs[5];      // coefficent of restitution
 
-  if (E <= 0.0) error->all(FLERR, "Illegal MDR normal model, Young's modulus must be greater than 0");
-  if (nu < 0.0 || nu > 0.5) error->all(FLERR, "Illegal MDR normal model, Poisson's ratio must be between 0 and 0.5");
+  if (Emod <= 0.0) error->all(FLERR, "Illegal MDR normal model, Young's modulus must be greater than 0");
+  if (poiss < 0.0 || poiss > 0.5) error->all(FLERR, "Illegal MDR normal model, Poisson's ratio must be between 0 and 0.5");
   if (Y < 0.0) error->all(FLERR, "Illegal MDR normal model, yield stress must be greater than or equal to 0");
   if (gamma < 0.0) error->all(FLERR, "Illegal MDR normal model, effective surface energy must be greater than or equal to 0");
   if (psi_b < 0.0 || psi_b > 1.0) error->all(FLERR, "Illegal MDR normal model, psi_b must be between 0 and 1.0");
-  if (CoR < 0.0 || CoR > 1.0) error->all(FLERR, "Illegal MDR normal model, coefficent of restitution must be between 0 and 1.0");
+  if (damp < 0.0) error->all(FLERR, "Illegal MDR normal model, damping coefficent must be greater than or equal to 0");
 
-  G = E / (2.0 * (1.0 + nu));            // shear modulus
-  kappa = E / (3.0 * (1.0 - 2.0 * nu));  // bulk modulus
-  Eeff = E / (1.0 - pow(nu, 2.0));       // composite plane strain modulus
+  G = Emod / (2.0 * (1.0 + poiss));            // shear modulus
+  kappa = Emod / (3.0 * (1.0 - 2.0 * poiss));  // bulk modulus
+  Eeff = Emod / (1.0 - square(poiss));       // composite plane strain modulus
 
   // precomputing factors
 
   Eeffinv = 1.0 / Eeff;
   Eeffsq = Eeff * Eeff;
   Eeffsqinv = Eeffinv * Eeffinv;
+  Eeff2particle = 0.5 * Eeff;
 
   gammasq = gamma * gamma;
   gamma3 = gammasq * gamma;
@@ -528,6 +533,7 @@ void GranSubModNormalMDR::init()
   index_sigmaxx = atom->find_custom("sigmaxx", tmp1, tmp2);             // xx-component of the stress tensor, not necessary for force calculation
   index_sigmayy = atom->find_custom("sigmayy", tmp1, tmp2);             // yy-component of the stress tensor, not necessary for force calculation
   index_sigmazz = atom->find_custom("sigmazz", tmp1, tmp2);             // zz-component of the stress tensor, not necessary for force calculation
+  index_dRavg = atom->find_custom("dRavg", tmp1, tmp2);                 // radius update increment
 }
 
 /* ---------------------------------------------------------------------- */
@@ -566,6 +572,7 @@ double GranSubModNormalMDR::calculate_forces()
   double *sigmaxx = atom->dvector[index_sigmaxx];
   double *sigmayy = atom->dvector[index_sigmayy];
   double *sigmazz = atom->dvector[index_sigmazz];
+  double *dRavg = atom->dvector[index_dRavg];
 
   const int itag_true = atom->tag[gm->i]; // true i particle tag
   const int jtag_true = atom->tag[gm->j]; // true j particle tag
@@ -579,6 +586,7 @@ double GranSubModNormalMDR::calculate_forces()
   double F1 = 0.0;                        // force on contact side 1
   double delta = gm->delta;               // apparent overlap
   double Ac_avg = 0.0;                    // average contact area across both sides
+  double a_damp = 0.0;                    // damping contact radius
 
   double *history = & gm->history[history_index]; // load in all history variables
   int history_update = gm->history_update;
@@ -596,7 +604,6 @@ double GranSubModNormalMDR::calculate_forces()
   if (gm->delta >= *deltamax_offset) *deltamax_offset = gm->delta;
   double deltamax = *deltamax_offset;
 
-
   for (int contactSide = 0; contactSide < 2; contactSide++) {
 
     double *delta_offset, *deltao_offset, *delta_MDR_offset, *delta_BULK_offset;
@@ -607,17 +614,17 @@ double GranSubModNormalMDR::calculate_forces()
       // displacement partitioning only necessary for particle-particle contact
 
       // itag and jtag persist after neighbor list builds, use tags to compare to match
-      //   contact history variables consistently across steps for a particle pair.
+      // contact history variables consistently across steps for a particle pair.
       if ((contactSide == 0 && itag_true > jtag_true) || (contactSide != 0 && itag_true < jtag_true)) {
-          gm->i = i_true;
-          gm->j = j_true;
-          gm->radi = radi_true;
-          gm->radj = radj_true;
+        gm->i = i_true;
+        gm->j = j_true;
+        gm->radi = radi_true;
+        gm->radj = radj_true;
       } else {
-          gm->i = j_true;
-          gm->j = i_true;
-          gm->radi = radj_true;
-          gm->radj = radi_true;
+        gm->i = j_true;
+        gm->j = i_true;
+        gm->radi = radj_true;
+        gm->radj = radi_true;
       }
 
       // determine the two maximum experienced geometric overlaps on either side of rigid flat
@@ -715,7 +722,7 @@ double GranSubModNormalMDR::calculate_forces()
       if (history_update && phertz > pY) {
         *Yflag_offset = 1.0;
         *deltaY_offset = delta_MDR;
-        *cA_offset = MY_PI * (pow(*deltaY_offset, 2) - *deltaY_offset * R);
+        *cA_offset = MY_PI * (square(*deltaY_offset) - *deltaY_offset * R);
       }
     }
 
@@ -737,7 +744,7 @@ double GranSubModNormalMDR::calculate_forces()
       amax = sqrt(deltamax_MDR * R);
     } else {
       // plastic contact
-      amax = sqrt(2.0 * deltamax_MDR * R - pow(deltamax_MDR, 2) + cA * PIINV);
+      amax = sqrt(2.0 * deltamax_MDR * R - square(deltamax_MDR) + cA * PIINV);
       amaxsq = amax * amax;
       A = 4.0 * pY * Eeffinv * amax;
       Ainv = 1.0 / A;
@@ -748,13 +755,13 @@ double GranSubModNormalMDR::calculate_forces()
 
       // force caused by full submersion of elliptical indenter to depth of A/2
       double Fmax = Eeff * (A * B * 0.25) * acos(1 - 2 * deltae1Dmax * Ainv);
-      Fmax -= (2 - 4 * deltae1Dmax * Ainv) * sqrt(deltae1Dmax * Ainv - pow(deltae1Dmax * Ainv, 2));
+      Fmax -= (2 - 4 * deltae1Dmax * Ainv) * sqrt(deltae1Dmax * Ainv - square(deltae1Dmax * Ainv));
 
       // depth of particle center
       const double zR = R - (deltamax_MDR - deltae1Dmax);
 
-      deltaR = 2 * amaxsq * (-1 + nu) - (-1 + 2 * nu) * zR * (-zR + sqrt(amaxsq + pow(zR, 2)));
-      deltaR *= Fmax / (MY_2PI * amaxsq * G * sqrt(amaxsq + pow(zR, 2)));
+      deltaR = 2 * amaxsq * (-1 + poiss) - (-1 + 2 * poiss) * zR * (-zR + sqrt(amaxsq + square(zR)));
+      deltaR *= Fmax / (MY_2PI * amaxsq * G * sqrt(amaxsq + square(zR)));
 
       // transformed elastic displacement
       deltae1D = (delta_MDR - deltamax_MDR + deltae1Dmax + deltaR) / (1 + deltaR / deltae1Dmax);
@@ -805,17 +812,17 @@ double GranSubModNormalMDR::calculate_forces()
         }
 
         if (std::isnan(F_MDR))
-          error->one(FLERR, "F_MDR is NaN, case 1: no tensile springs");
+          error->one(FLERR, "F_MDR is NaN, case 1: no tensile springs for atoms {} and {}", itag_true, jtag_true);
 
         if (history_update) *aAdh_offset = a_fac * a_na;
       } else {
         // case 2+3, tensile springs
         const double lmax = sqrt(MY_2PI * aAdh * gamma * Eeffinv);
-        g_aAdh = A * 0.5 - A * Binv * sqrt(Bsq * 0.25 - pow(aAdh, 2));
+        g_aAdh = A * 0.5 - A * Binv * sqrt(Bsq * 0.25 - square(aAdh));
         g_aAdh = round_up_negative_epsilon(g_aAdh);
 
         double tmp = 27 * A4 * B4 * gamma * Eeffinv;
-        tmp -= 2 * pow(B, 6) * gamma3 * PISQ * pow(Eeffinv, 3);
+        tmp -= 2 * powint(B, 6) * gamma3 * PISQ * cube(Eeffinv);
         tmp += sqrt(27) * Asq * B4 * sqrt(27 * A4 * Eeffsq * gammasq - 4 * Bsq * gamma4 * PISQ) * Eeffsqinv;
         tmp = cbrt(tmp);
 
@@ -843,12 +850,12 @@ double GranSubModNormalMDR::calculate_forces()
             double aAdh_tmp = aAdh;
             double fa, fa2, fa_tmp, dfda;
             for (int lv1 = 0; lv1 < MDR_MAX_IT; ++lv1) {
-              fa_tmp = deltae1D - A * 0.5 + A * sqrt(Bsq * 0.25 - pow(aAdh_tmp, 2)) * Binv;
+              fa_tmp = deltae1D - A * 0.5 + A * sqrt(Bsq * 0.25 - square(aAdh_tmp)) * Binv;
               fa = fa_tmp + sqrt(MY_2PI * aAdh_tmp * gamma * Eeffinv);
               if (abs(fa) < MDR_EPSILON1) {
                 break;
               }
-              dfda = -aAdh_tmp * A / (B * sqrt(-pow(aAdh_tmp, 2) + Bsq * 0.25));
+              dfda = -aAdh_tmp * A / (B * sqrt(-square(aAdh_tmp) + Bsq * 0.25));
               dfda += gamma * SQRTHALFPI / sqrt(aAdh_tmp * gamma * Eeff);
               aAdh_tmp = aAdh_tmp - fa / dfda;
               fa2 = fa_tmp + sqrt(MY_2PI * aAdh_tmp * gamma * Eeffinv);
@@ -861,15 +868,20 @@ double GranSubModNormalMDR::calculate_forces()
             }
             aAdh = aAdh_tmp;
 
-            g_aAdh = A * 0.5 - A * Binv * sqrt(Bsq * 0.25 - pow(aAdh, 2));
-            g_aAdh = round_up_negative_epsilon(g_aAdh);
+            if (aAdh < acrit) {
+              aAdh = 0.0;
+              F_MDR = 0.0;
+            } else {
+              g_aAdh = A * 0.5 - A * Binv * sqrt(Bsq * 0.25 - square(aAdh));
+              g_aAdh = round_up_negative_epsilon(g_aAdh);
 
-            const double deltaeAdh = g_aAdh;
-            const double F_na = calculate_nonadhesive_mdr_force(deltaeAdh, Ainv, Eeff, A, B);
-            const double F_Adhes = 2.0 * Eeff * (deltae1D - deltaeAdh) * aAdh;
-            F_MDR = F_na + F_Adhes;
-            if (std::isnan(F_MDR))
-              error->one(FLERR, "F_MDR is NaN, case 3: tensile springs exceed critical length");
+              const double deltaeAdh = g_aAdh;
+              const double F_na = calculate_nonadhesive_mdr_force(deltaeAdh, Ainv, Eeff, A, B);
+              const double F_Adhes = 2.0 * Eeff * (deltae1D - deltaeAdh) * aAdh;
+              F_MDR = F_na + F_Adhes;
+              if (std::isnan(F_MDR))
+                error->one(FLERR, "F_MDR is NaN, case 3: tensile springs exceed critical length");
+            }
           }
           if (history_update) *aAdh_offset = aAdh;
         }
@@ -883,7 +895,7 @@ double GranSubModNormalMDR::calculate_forces()
 
     // area related calculations
     double Ac;
-    (*Yflag_offset == 0.0) ? Ac = MY_PI * delta * R : Ac = MY_PI * (2.0 * delta * R - pow(delta, 2)) + cA;
+    (*Yflag_offset == 0.0) ? Ac = MY_PI * delta * R : Ac = MY_PI * (2.0 * delta * R - square(delta)) + cA;
     if (Ac < 0.0) Ac = 0.0;
     if (history_update) {
       Atot_sum[i] += wij * (Ac - MY_2PI * R * (deltamax_MDR + delta_BULK));
@@ -891,6 +903,9 @@ double GranSubModNormalMDR::calculate_forces()
     }
     Ac_avg += wij * Ac;
 
+    // contact radius for damping
+    (gamma > 0.0) ? a_damp += aAdh : a_damp += a_na;
+
     // bulk force calculation
     double F_BULK;
     (delta_BULK <= 0.0) ? F_BULK = 0.0 : F_BULK = (1.0 / Vgeo[i]) * Acon0[i] * delta_BULK * kappa * Ac;
@@ -903,7 +918,7 @@ double GranSubModNormalMDR::calculate_forces()
       *Ac_offset = wij * Ac;
 
       // radius update scheme quantity calculation
-      Vcaps[i] += MY_PI * THIRD * pow(delta, 2) * (3.0 * R - delta);
+      Vcaps[i] += MY_PI * THIRD * square(delta) * (3.0 * R - delta);
     }
 
     const double Fntmp = wij * (F_MDR + F_BULK);
@@ -917,9 +932,9 @@ double GranSubModNormalMDR::calculate_forces()
     if (history_update) eps_bar[i] += eps_bar_contact;
 
     if (history_update && delta_MDR == deltamax_MDR && *Yflag_offset > 0.0 && F_MDR > 0.0) {
-      const double Vo = FOURTHIRDS * MY_PI * pow(Ro, 3);
+      const double Vo = FOURTHIRDS * MY_PI * cube(Ro);
       dRnumerator[i] -= Vo * (eps_bar_contact - *eps_bar_offset);
-      dRnumerator[i] -= wij * MY_PI * ddeltao * (2 * deltao * Ro - pow(deltao, 2) + pow(R, 2) - pow(Ro, 2));
+      dRnumerator[i] -= wij * MY_PI * ddeltao * (2 * deltao * Ro - square(deltao) + square(R) - square(Ro));
       dRdenominator[i] += wij * 2.0 * MY_PI * R * (deltao + R - Ro);
     }
 
@@ -944,41 +959,32 @@ double GranSubModNormalMDR::calculate_forces()
   const double wij = MAX(1.0 - pij, 0.0);
 
   // assign final force
+  double damp_scale;
   if (gm->contact_type != PAIR) {
-    F = wij * F0;
+    a_damp = a_damp/2.0;
+    damp_scale = sqrt(gm->meff * 2.0 * Eeff2particle * a_damp);
+    double *deltao_offset = &history[DELTAO_0];
+    const double wfm = std::exp(10.7 * (*deltao_offset) / Rinitial[gm->i] - 10.0) + 1.0; // wall force magnifier
+    F = wij * F0 * wfm;
   } else {
+    damp_scale = sqrt(gm->meff * 2.0 * Eeff * a_damp);
     F = wij * (F0 + F1) * 0.5;
   }
 
-  // calculate damping force
-  if (F > 0.0) {
-    double Eeff2;
-    double Reff2;
-    if (gm->contact_type == PAIR) {
-      Eeff2 = E / (2.0 * (1.0 - pow(nu, 2)));
-      Reff2 = 1.0 / ((1.0 / gm->radi + 1.0 / gm->radj));
-    } else {
-      Eeff2 = E / (1.0 - pow(nu, 2));
-      Reff2 = gm->radi;
-    }
-    const double kn = Eeff2 * Reff2;
-    const double beta = -log(CoR) / sqrt(pow(log(CoR), 2) + PISQ);
-    const double damp_prefactor = beta * sqrt(gm->meff * kn);
-    const double F_DAMP = -damp_prefactor * gm->vnnr;
-
-    F += wij * F_DAMP;
+  if (history_update) {
+    double *damp_scale_offset = & history[DAMP_SCALE];
+    (a_damp <= 0.0) ? *damp_scale_offset = 0.0 : *damp_scale_offset = damp_scale;
   }
 
   return F;
 }
 
-
 /* ---------------------------------------------------------------------- */
 
 double GranSubModNormalMDR::calculate_nonadhesive_mdr_force(double delta, double Ainv, double Eeff, double A, double B)
 {
   double F_na = acos(1.0 - 2.0 * delta * Ainv);
-  F_na -= (2 - 4 * delta * Ainv) * sqrt(delta * Ainv - pow(delta * Ainv, 2));
+  F_na -= (2 - 4 * delta * Ainv) * sqrt(delta * Ainv - square(delta * Ainv));
   F_na *= 0.25 * Eeff * A * B;
 
   return F_na;
diff --git a/src/GRANULAR/gran_sub_mod_normal.h b/src/GRANULAR/gran_sub_mod_normal.h
index db96227f13..9805bd81c0 100644
--- a/src/GRANULAR/gran_sub_mod_normal.h
+++ b/src/GRANULAR/gran_sub_mod_normal.h
@@ -143,11 +143,11 @@ namespace Granular_NS {
     void coeffs_to_local() override;
     void init() override;
     double calculate_forces() override;
-    double E, nu, Y, gamma, CoR, psi_b; // specified coeffs
+    double Y, gamma, psi_b; // specified coeffs
 
    protected:
     double G, kappa, Eeff; // derived coeffs
-    double Eeffsq, Eeffinv, Eeffsqinv;
+    double Eeffsq, Eeffinv, Eeffsqinv, Eeff2particle;
     double gammasq, gamma3, gamma4;
 
     int warn_flag;
@@ -155,6 +155,7 @@ namespace Granular_NS {
     int index_Ro, index_Vgeo, index_Velas, index_Vcaps, index_eps_bar, index_dRnumerator;
     int index_dRdenominator, index_Acon0, index_Acon1, index_Atot, index_Atot_sum, index_ddelta_bar;
     int index_psi, index_sigmaxx, index_sigmayy, index_sigmazz, index_contacts, index_adhesive_length;
+    int index_dRavg;
     int fix_mdr_flag;
 
     char *id_fix;
diff --git a/src/GRANULAR/granular_model.cpp b/src/GRANULAR/granular_model.cpp
index e96debd59e..1a3a46b12e 100644
--- a/src/GRANULAR/granular_model.cpp
+++ b/src/GRANULAR/granular_model.cpp
@@ -250,12 +250,7 @@ void GranularModel::init()
 
   // Must have valid normal, damping, and tangential models
   if (normal_model->name == "none") error->all(FLERR, "Must specify normal granular model");
-  if (normal_model->name == "mdr") {
-     if (damping_model->name != "none")
-       error->all(FLERR, "MDR require 'none' damping model. To damp, specify a coefficient of restitution < 1.");
-  } else {
-    if (damping_model->name == "none") error->all(FLERR, "Must specify damping granular model");
-  }
+  if (damping_model->name == "none") error->all(FLERR, "Must specify damping granular model");
   if (tangential_model->name == "none") error->all(FLERR, "Must specify tangential granular model");
 
   // Twisting, rolling, and heat are optional
diff --git a/src/KOKKOS/meam_dens_init_kokkos.h b/src/KOKKOS/meam_dens_init_kokkos.h
index dd63be96bd..23985c7082 100644
--- a/src/KOKKOS/meam_dens_init_kokkos.h
+++ b/src/KOKKOS/meam_dens_init_kokkos.h
@@ -236,7 +236,6 @@ MEAMKokkos<DeviceType>::meam_dens_init(int inum_half, int ntype, typename AT::t_
   this->d_neighbors_half = d_neighbors_half;
   this->d_neighbors_full = d_neighbors_full;
   this->d_offset = d_offset;
-  this->nlocal = nlocal;
 
   if (need_dup) {
     dup_rho0 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_rho0);
diff --git a/src/KOKKOS/meam_force_kokkos.h b/src/KOKKOS/meam_force_kokkos.h
index 1875e22dcf..703bee23d7 100644
--- a/src/KOKKOS/meam_force_kokkos.h
+++ b/src/KOKKOS/meam_force_kokkos.h
@@ -17,7 +17,6 @@ void MEAMKokkos<DeviceType>::meam_force(
 {
   EV_FLOAT ev;
 
-  this->eflag_either = eflag_either;
   this->eflag_global = eflag_global;
   this->eflag_atom = eflag_atom;
   this->vflag_global = vflag_global;
diff --git a/src/KOKKOS/mliap_unified_couple_kokkos.pyx b/src/KOKKOS/mliap_unified_couple_kokkos.pyx
index 1492962013..91dce645ac 100644
--- a/src/KOKKOS/mliap_unified_couple_kokkos.pyx
+++ b/src/KOKKOS/mliap_unified_couple_kokkos.pyx
@@ -498,7 +498,7 @@ cdef public object mliap_unified_connect_kokkos(char *fname, MLIAPDummyModel * m
         unified = LOADED_MODEL
     elif str_fname.endswith(".pt") or str_fname.endswith('.pth'):
         import torch
-        unified = torch.load(str_fname)
+        unified = torch.load(str_fname,weights_only=False)
     else:
         with open(str_fname, 'rb') as pfile:
             unified = pickle.load(pfile)
diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h
index 399142dfaf..63e637c108 100644
--- a/src/KOKKOS/pair_kokkos.h
+++ b/src/KOKKOS/pair_kokkos.h
@@ -961,7 +961,7 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&P
       lastcall = fpair->lmp->update->ntimestep;
       vectorsize = GetMaxNeighs(list);
       if (vectorsize == 0) vectorsize = 1;
-      vectorsize = MathSpecial::powint(2,(int(log2(vectorsize) + 0.5))); // round to nearest power of 2
+      vectorsize = MathSpecial::powint(2.0,(int(log2(double(vectorsize)) + 0.5))); // round to nearest power of 2
 
   #if defined(KOKKOS_ENABLE_HIP)
       int max_vectorsize = 64;
diff --git a/src/LATBOLTZ/fix_lb_fluid.cpp b/src/LATBOLTZ/fix_lb_fluid.cpp
index 286b56cab5..d5161bba42 100644
--- a/src/LATBOLTZ/fix_lb_fluid.cpp
+++ b/src/LATBOLTZ/fix_lb_fluid.cpp
@@ -2344,7 +2344,7 @@ void FixLbFluid::SetupBuffers()
                   MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &dump_file_handle_raw);
 
     MPI_File_set_size(dump_file_handle_raw, 0);
-    MPI_File_set_view(dump_file_handle_raw, 0, MPI_DOUBLE, dump_file_mpitype, "native",
+    MPI_File_set_view(dump_file_handle_raw, 0, MPI_DOUBLE, dump_file_mpitype, (char *)"native",
                       MPI_INFO_NULL);
   }
 }
diff --git a/src/MC/Install.sh b/src/MC/Install.sh
index ccf6767c4d..efe6b7c07b 100755
--- a/src/MC/Install.sh
+++ b/src/MC/Install.sh
@@ -51,6 +51,8 @@ action fix_charge_regulation.cpp
 action fix_charge_regulation.h
 action fix_gcmc.cpp
 action fix_gcmc.h
+action fix_neighbor_swap.cpp  compute_voronoi_atom.cpp
+action fix_neighbor_swap.h    compute_voronoi_atom.h
 action fix_mol_swap.cpp
 action fix_mol_swap.h
 action fix_sgcmc.cpp   pair_eam.cpp
diff --git a/src/MC/fix_neighbor_swap.cpp b/src/MC/fix_neighbor_swap.cpp
new file mode 100644
index 0000000000..d44181a981
--- /dev/null
+++ b/src/MC/fix_neighbor_swap.cpp
@@ -0,0 +1,901 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Jacob Tavenner
+------------------------------------------------------------------------- */
+
+#include "fix_neighbor_swap.h"
+
+#include "angle.h"
+#include "atom.h"
+#include "bond.h"
+#include "comm.h"
+#include "compute.h"
+#include "compute_voronoi_atom.h"
+#include "dihedral.h"
+#include "domain.h"
+#include "error.h"
+#include "fix.h"
+#include "force.h"
+#include "group.h"
+#include "improper.h"
+#include "kspace.h"
+#include "math_special.h"
+#include "memory.h"
+#include "modify.h"
+#include "neighbor.h"
+#include "pair.h"
+#include "random_park.h"
+#include "region.h"
+#include "update.h"
+
+#include <cctype>
+#include <cfloat>
+#include <cmath>
+#include <cstring>
+
+using namespace LAMMPS_NS;
+using namespace FixConst;
+
+static const char cite_fix_neighbor_swap_c[] =
+    "fix neighbor/swap command: doi:10.1016/j.commatsci.2022.111929\n\n"
+    "@Article{Tavenner2023111929,\n"
+    " author = {Jacob P. Tavenner and Mikhail I. Mendelev and John W. Lawson},\n"
+    " title = {Molecular dynamics based kinetic Monte Carlo simulation for accelerated "
+    "diffusion},\n"
+    " journal = {Computational Materials Science},\n"
+    " year = {2023},\n"
+    " volume = {218},\n"
+    " pages = {111929}\n"
+    " url = {https://www.sciencedirect.com/science/article/pii/S0927025622006401}\n"
+    "}\n\n";
+
+/* ---------------------------------------------------------------------- */
+
+FixNeighborSwap::FixNeighborSwap(LAMMPS *lmp, int narg, char **arg) :
+    Fix(lmp, narg, arg), region(nullptr), idregion(nullptr), type_list(nullptr), qtype(nullptr),
+    c_voro(nullptr), voro_neighbor_list(nullptr), sqrt_mass_ratio(nullptr),
+    local_swap_iatom_list(nullptr), random_equal(nullptr), c_pe(nullptr)
+{
+  if (narg < 10) utils::missing_cmd_args(FLERR, "fix neighbor/swap", error);
+
+  dynamic_group_allow = 1;
+
+  vector_flag = 1;
+  size_vector = 2;
+  global_freq = 1;
+  extvector = 0;
+  restart_global = 1;
+  time_depend = 1;
+
+  // required args
+
+  nevery = utils::inumeric(FLERR, arg[3], false, lmp);
+  ncycles = utils::inumeric(FLERR, arg[4], false, lmp);
+  seed = utils::inumeric(FLERR, arg[5], false, lmp);
+  double temperature = utils::numeric(FLERR, arg[6], false, lmp);
+  r_0 = utils::inumeric(FLERR, arg[7], false, lmp);
+
+  // Voro compute check
+
+  int icompute = modify->find_compute(utils::strdup(arg[8]));
+  if (icompute < 0) error->all(FLERR, "Could not find neighbor compute ID");
+  c_voro = modify->compute[icompute];
+  if (c_voro->local_flag == 0)
+    error->all(FLERR, "Neighbor compute does not compute local info");
+  if (c_voro->size_local_cols != 3)
+    error->all(FLERR, "Neighbor compute does not give i, j, size as expected");
+
+  if (nevery <= 0) error->all(FLERR, "Illegal fix neighbor/swap command nevery value");
+  if (ncycles < 0) error->all(FLERR, "Illegal fix neighbor/swap command ncycles value");
+  if (seed <= 0) error->all(FLERR, "Illegal fix neighbor/swap command seed value");
+  if (temperature <= 0.0) error->all(FLERR, "Illegal fix neighbor/swap command temperature value");
+
+  beta = 1.0 / (force->boltz * temperature);
+
+  memory->create(type_list, atom->ntypes, "neighbor/swap:type_list");
+  memory->create(rate_list, atom->ntypes, "neighbor/swap:rate_list");
+
+  // read options from end of input line
+
+  options(narg - 8, &arg[8]);
+
+  // random number generator, same for all procs
+
+  random_equal = new RanPark(lmp, seed);
+
+  // set up reneighboring
+
+  force_reneighbor = 1;
+  next_reneighbor = update->ntimestep + 1;
+
+  // zero out counters
+
+  nswap_attempts = 0.0;
+  nswap_successes = 0.0;
+
+  atom_swap_nmax = 0;
+  voro_neighbor_list = nullptr;
+  local_swap_iatom_list = nullptr;
+  local_swap_neighbor_list = nullptr;
+  local_swap_probability = nullptr;
+  local_swap_type_list = nullptr;
+
+  // set comm size needed by this Fix
+
+  if (atom->q_flag)
+    comm_forward = 2;
+  else
+    comm_forward = 1;
+}
+
+/* ---------------------------------------------------------------------- */
+
+FixNeighborSwap::~FixNeighborSwap()
+{
+  memory->destroy(type_list);
+  memory->destroy(rate_list);
+  memory->destroy(qtype);
+  memory->destroy(sqrt_mass_ratio);
+  memory->destroy(local_swap_iatom_list);
+  memory->destroy(local_swap_neighbor_list);
+  memory->destroy(local_swap_probability);
+  memory->destroy(local_swap_type_list);
+  delete[] idregion;
+  delete random_equal;
+}
+
+/* ----------------------------------------------------------------------
+   parse optional parameters at end of input line
+------------------------------------------------------------------------- */
+
+void FixNeighborSwap::options(int narg, char **arg)
+{
+  if (narg < 0) error->all(FLERR, "Illegal fix neighbor/swap command\n");
+
+  ke_flag = 1;
+  diff_flag = 0;
+  rates_flag = 0;
+  nswaptypes = 0;
+
+  int iarg = 0;
+  while (iarg < narg) {
+    if (strcmp(arg[iarg], "region") == 0) {
+      if (iarg + 2 > narg) error->all(FLERR, "Illegal fix neighbor/swap command");
+      region = domain->get_region_by_id(arg[iarg + 1]);
+      if (!region) error->all(FLERR, "Region ID for fix neighbor/swap does not exist");
+      idregion = utils::strdup(arg[iarg + 1]);
+      iarg += 2;
+    } else if (strcmp(arg[iarg], "ke") == 0) {
+      if (iarg + 2 > narg) error->all(FLERR, "Illegal fix neighbor/swap command");
+      ke_flag = utils::logical(FLERR, arg[iarg + 1], false, lmp);
+      iarg += 2;
+    } else if (strcmp(arg[iarg], "types") == 0) {
+      if (iarg + 3 > narg) error->all(FLERR, "Illegal fix neighbor/swap command");
+      if (diff_flag != 0) error->all(FLERR, "Illegal fix neighbor/swap command");
+      iarg++;
+      nswaptypes = 0;
+      while (iarg < narg) {
+        if (isalpha(arg[iarg][0])) break;
+        if (nswaptypes >= atom->ntypes) error->all(FLERR, "Illegal fix neighbor/swap command");
+        type_list[nswaptypes] = utils::numeric(FLERR, arg[iarg], false, lmp);
+        nswaptypes++;
+        iarg++;
+      }
+    } else if (strcmp(arg[iarg], "diff") == 0) {
+      if (iarg + 2 > narg) error->all(FLERR, "Illegal fix neighbor/swap command");
+      if (nswaptypes != 0) error->all(FLERR, "Illegal fix neighbor/swap command");
+      type_list[nswaptypes] = utils::numeric(FLERR, arg[iarg + 1], false, lmp);
+      diff_flag = 1;
+      nswaptypes++;
+      iarg += 2;
+    } else if (strcmp(arg[iarg], "rates") == 0) {
+      if (iarg + atom->ntypes >= narg) error->all(FLERR, "Illegal fix neighbor/swap command");
+      iarg++;
+      int i = 0;
+      while (iarg < narg) {
+        if (isalpha(arg[iarg][0])) break;
+        if (i >= atom->ntypes) error->all(FLERR, "Illegal fix neighbor/swap command");
+        rate_list[i] = utils::numeric(FLERR, arg[iarg], false, lmp);
+        i++;
+        iarg++;
+      }
+      rates_flag = 1;
+      if (i != atom->ntypes) error->all(FLERR, "Illegal fix neighbor/swap command");
+    } else
+      error->all(FLERR, "Illegal fix neighbor/swap command");
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+int FixNeighborSwap::setmask()
+{
+  int mask = 0;
+  mask |= PRE_EXCHANGE;
+  return mask;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixNeighborSwap::init()
+{
+  c_pe = modify->get_compute_by_id("thermo_pe");
+
+  int *type = atom->type;
+
+  if (nswaptypes < 2 && !diff_flag)
+    error->all(FLERR, "Must specify at least 2 types in fix neighbor/swap command");
+
+  // set index and check validity of region
+
+  if (idregion) {
+    region = domain->get_region_by_id(idregion);
+    if (!region) error->all(FLERR, "Region {} for fix setforce does not exist", idregion);
+  }
+
+  for (int iswaptype = 0; iswaptype < nswaptypes; iswaptype++)
+    if (type_list[iswaptype] <= 0 || type_list[iswaptype] > atom->ntypes)
+      error->all(FLERR, "Invalid atom type in fix neighbor/swap command");
+
+  if (atom->q_flag) {
+    double qmax, qmin;
+    int firstall, first;
+    memory->create(qtype, nswaptypes, "neighbor/swap:qtype");
+    for (int iswaptype = 0; iswaptype < nswaptypes; iswaptype++) {
+      first = 1;
+      for (int i = 0; i < atom->nlocal; i++) {
+        if (atom->mask[i] & groupbit) {
+          if (type[i] == type_list[iswaptype]) {
+            if (first) {
+              qtype[iswaptype] = atom->q[i];
+              first = 0;
+            } else if (qtype[iswaptype] != atom->q[i])
+              error->one(FLERR, "All atoms of a swapped type must have the same charge.");
+          }
+        }
+      }
+      MPI_Allreduce(&first, &firstall, 1, MPI_INT, MPI_MIN, world);
+      if (firstall)
+        error->all(FLERR,
+                   "At least one atom of each swapped type must be present to define charges.");
+      if (first) qtype[iswaptype] = -DBL_MAX;
+      MPI_Allreduce(&qtype[iswaptype], &qmax, 1, MPI_DOUBLE, MPI_MAX, world);
+      if (first) qtype[iswaptype] = DBL_MAX;
+      MPI_Allreduce(&qtype[iswaptype], &qmin, 1, MPI_DOUBLE, MPI_MIN, world);
+      if (qmax != qmin) error->all(FLERR, "All atoms of a swapped type must have same charge.");
+    }
+  }
+
+  memory->create(sqrt_mass_ratio, atom->ntypes + 1, atom->ntypes + 1,
+                 "neighbor/swap:sqrt_mass_ratio");
+  for (int itype = 1; itype <= atom->ntypes; itype++)
+    for (int jtype = 1; jtype <= atom->ntypes; jtype++)
+      sqrt_mass_ratio[itype][jtype] = sqrt(atom->mass[itype] / atom->mass[jtype]);
+
+  // check to see if itype and jtype cutoffs are the same
+  // if not, reneighboring will be needed between swaps
+
+  double **cutsq = force->pair->cutsq;
+  unequal_cutoffs = false;
+  for (int iswaptype = 0; iswaptype < nswaptypes; iswaptype++)
+    for (int jswaptype = 0; jswaptype < nswaptypes; jswaptype++)
+      for (int ktype = 1; ktype <= atom->ntypes; ktype++)
+        if (cutsq[type_list[iswaptype]][ktype] != cutsq[type_list[jswaptype]][ktype])
+          unequal_cutoffs = true;
+
+  // check that no swappable atoms are in atom->firstgroup
+  // swapping such an atom might not leave firstgroup atoms first
+
+  if (atom->firstgroup >= 0) {
+    int *mask = atom->mask;
+    int firstgroupbit = group->bitmask[atom->firstgroup];
+
+    int flag = 0;
+    for (int i = 0; i < atom->nlocal; i++)
+      if ((mask[i] == groupbit) && (mask[i] && firstgroupbit)) flag = 1;
+
+    int flagall;
+    MPI_Allreduce(&flag, &flagall, 1, MPI_INT, MPI_SUM, world);
+
+    if (flagall) error->all(FLERR, "Cannot do neighbor/swap on atoms in atom_modify first group");
+  }
+}
+
+/* ----------------------------------------------------------------------
+   attempt Monte Carlo swaps
+------------------------------------------------------------------------- */
+
+void FixNeighborSwap::pre_exchange()
+{
+  // just return if should not be called on this timestep
+
+  if (next_reneighbor != update->ntimestep) return;
+
+  // ensure current system is ready to compute energy
+
+  if (domain->triclinic) domain->x2lamda(atom->nlocal);
+  domain->pbc();
+  comm->exchange();
+  comm->borders();
+  if (domain->triclinic) domain->lamda2x(atom->nlocal + atom->nghost);
+  if (modify->n_pre_neighbor) modify->pre_neighbor();
+  neighbor->build(1);
+
+  // energy_stored = energy of current state
+  // will be updated after accepted swaps
+
+  energy_stored = energy_full();
+
+  // attempt Ncycle atom swaps
+
+  int nsuccess = 0;
+  update_iswap_atoms_list();
+  for (int i = 0; i < ncycles; i++) nsuccess += attempt_swap();
+
+  // udpate MC stats
+
+  nswap_attempts += ncycles;
+  nswap_successes += nsuccess;
+
+  next_reneighbor = update->ntimestep + nevery;
+}
+
+/* ----------------------------------------------------------------------
+   attempt a swap of a pair of atoms
+   compare before/after energy and accept/reject the swap
+------------------------------------------------------------------------- */
+
+int FixNeighborSwap::attempt_swap()
+{
+  // int nlocal = atom->nlocal;
+  tagint *id = atom->tag;
+
+  if (niswap == 0) return 0;
+
+  // pre-swap energy
+
+  double energy_before = energy_stored;
+
+  // pick a random atom i
+
+  int i = pick_i_swap_atom();
+
+  // get global id and position of atom i
+  // get_global_i(i);
+
+  // build nearest-neighbor list based on atom i
+
+  build_i_neighbor_list(i);
+  if (njswap <= 0) return 0;
+
+  // pick a neighbor atom j based on i neighbor list
+  jtype_selected = -1;
+  int j = pick_j_swap_neighbor(i);
+
+  int itype = type_list[0];
+  int jtype = jtype_selected;
+
+  // Accept swap if types are equal, no change to system
+  if (itype == jtype) { return 1; }
+
+  // swap their properties
+  if (i >= 0) {
+    atom->type[i] = jtype;
+    if (atom->q_flag) atom->q[i] = qtype[jtype_selected];
+  }
+  if (j >= 0) {
+    atom->type[j] = itype;
+    if (atom->q_flag) atom->q[j] = qtype[0];
+  }
+
+  // if unequal_cutoffs, call comm->borders() and rebuild neighbor list
+  // else communicate ghost atoms
+  // call to comm->exchange() is a no-op but clears ghost atoms
+
+  if (unequal_cutoffs) {
+    if (domain->triclinic) domain->x2lamda(atom->nlocal);
+    domain->pbc();
+    comm->exchange();
+    comm->borders();
+    if (domain->triclinic) domain->lamda2x(atom->nlocal + atom->nghost);
+    if (modify->n_pre_neighbor) modify->pre_neighbor();
+    neighbor->build(1);
+  } else {
+    comm->forward_comm(this);
+  }
+
+  // post-swap energy
+
+  double energy_after = energy_full();
+
+  // if swap accepted, return 1
+  // if ke_flag, rescale atom velocities
+
+  if (random_equal->uniform() < exp(beta * (energy_before - energy_after))) {
+    update_iswap_atoms_list();
+    if (ke_flag) {
+      if (i >= 0) {
+        atom->v[i][0] *= sqrt_mass_ratio[itype][jtype];
+        atom->v[i][1] *= sqrt_mass_ratio[itype][jtype];
+        atom->v[i][2] *= sqrt_mass_ratio[itype][jtype];
+      }
+      if (j >= 0) {
+        atom->v[j][0] *= sqrt_mass_ratio[jtype][itype];
+        atom->v[j][1] *= sqrt_mass_ratio[jtype][itype];
+        atom->v[j][2] *= sqrt_mass_ratio[jtype][itype];
+      }
+    }
+    energy_stored = energy_after;
+    return 1;
+  }
+
+  // swap not accepted, return 0
+  // restore the swapped itype & jtype atoms
+  // do not need to re-call comm->borders() and rebuild neighbor list
+  //   since will be done on next cycle or in Verlet when this fix finishes
+
+  if (i >= 0) {
+    atom->type[i] = itype;
+    if (atom->q_flag) atom->q[i] = qtype[0];
+  }
+  if (j >= 0) {
+    atom->type[j] = jtype;
+    if (atom->q_flag) atom->q[j] = qtype[jtype_selected];
+  }
+
+  return 0;
+}
+
+/* ----------------------------------------------------------------------
+   compute system potential energy
+------------------------------------------------------------------------- */
+
+double FixNeighborSwap::energy_full()
+{
+  int eflag = 1;
+  int vflag = 0;
+
+  if (modify->n_pre_force) modify->pre_force(vflag);
+
+  if (force->pair) force->pair->compute(eflag, vflag);
+
+  if (atom->molecular != Atom::ATOMIC) {
+    if (force->bond) force->bond->compute(eflag, vflag);
+    if (force->angle) force->angle->compute(eflag, vflag);
+    if (force->dihedral) force->dihedral->compute(eflag, vflag);
+    if (force->improper) force->improper->compute(eflag, vflag);
+  }
+
+  if (force->kspace) force->kspace->compute(eflag, vflag);
+
+  if (modify->n_post_force_any) modify->post_force(vflag);
+
+  update->eflag_global = update->ntimestep;
+  double total_energy = c_pe->compute_scalar();
+
+  return total_energy;
+}
+
+/* ----------------------------------------------------------------------
+------------------------------------------------------------------------- */
+
+int FixNeighborSwap::pick_i_swap_atom()
+{
+  tagint *id = atom->tag;
+  int id_center_local = -1;
+  int i = -1;
+
+  int iwhichglobal = static_cast<int>(niswap * random_equal->uniform());
+  if ((iwhichglobal >= niswap_before) && (iwhichglobal < niswap_before + niswap_local)) {
+    int iwhichlocal = iwhichglobal - niswap_before;
+    i = local_swap_iatom_list[iwhichlocal];
+    id_center_local = id[i];
+    MPI_Allreduce(&id[i], &id_center, 1, MPI_INT, MPI_MAX, world);
+  } else {
+    MPI_Allreduce(&id[i], &id_center, 1, MPI_INT, MPI_MAX, world);
+  }
+
+  return i;
+}
+
+/* ----------------------------------------------------------------------
+------------------------------------------------------------------------- */
+
+int FixNeighborSwap::pick_j_swap_neighbor(int i)
+{
+  int j = -1;
+  int jtype_selected_local = -1;
+
+  // Generate random double from 0 to maximum global probability
+  double selected_prob = static_cast<double>(global_probability * random_equal->uniform());
+
+  // Find which local swap atom corresponds to probability
+  if ((selected_prob >= prev_probability) &&
+      (selected_prob < prev_probability + local_probability)) {
+    double search_prob = selected_prob - prev_probability;
+    for (int n = 0; n < njswap_local; n++) {
+      if (search_prob > local_swap_probability[n]) {
+        search_prob -= local_swap_probability[n];
+      } else {
+        j = local_swap_neighbor_list[n];
+        jtype_selected_local = local_swap_type_list[n];
+        MPI_Allreduce(&jtype_selected_local, &jtype_selected, 1, MPI_INT, MPI_MAX, world);
+        return j;
+      }
+    }
+    error->all(FLERR, "Did not select local neighbor swap atom");
+  }
+
+  MPI_Allreduce(&jtype_selected_local, &jtype_selected, 1, MPI_INT, MPI_MAX, world);
+  return j;
+}
+
+/* ----------------------------------------------------------------------
+------------------------------------------------------------------------- */
+
+double FixNeighborSwap::get_distance(double *i, double *j)
+{
+  double r = sqrt(MathSpecial::square((i[0] - j[0])) + MathSpecial::square((i[1] - j[1])) +
+                  MathSpecial::square((i[2] - j[2])));
+  return r;
+}
+
+/* ----------------------------------------------------------------------
+------------------------------------------------------------------------- */
+
+void FixNeighborSwap::build_i_neighbor_list(int i_center)
+{
+  int nghost = atom->nghost;
+  int nlocal = atom->nlocal;
+  int *type = atom->type;
+  double **x = atom->x;
+  tagint *id = atom->tag;
+
+  // Allocate local_swap_neighbor_list size
+
+  memory->sfree(local_swap_neighbor_list);
+  atom_swap_nmax = atom->nmax;
+  local_swap_neighbor_list =
+      (int *) memory->smalloc(atom_swap_nmax * sizeof(int), "MCSWAP:local_swap_neighbor_list");
+
+  memory->sfree(local_swap_probability);
+  local_swap_probability = (double *) memory->smalloc(atom_swap_nmax * sizeof(double),
+                                                      "MCSWAP:local_swap_probability_list");
+
+  memory->sfree(local_swap_type_list);
+  local_swap_type_list =
+      (int *) memory->smalloc(atom_swap_nmax * sizeof(int), "MCSWAP:local_swap_type_list");
+
+  // Compute voronoi and access neighbor list
+
+  c_voro->compute_local();
+
+  voro_neighbor_list = c_voro->array_local;
+  njswap_local = 0;
+  local_probability = 0.0;
+
+  for (int n = 0; n < c_voro->size_local_rows; n++) {
+
+    int temp_j_id = -1;
+    int temp_j = -1;
+
+    // Find local voronoi entry with selected central atom
+    if ((int) voro_neighbor_list[n][0] == id_center) {
+      temp_j_id = voro_neighbor_list[n][1];
+      temp_j = -1;
+    } else if (((int) voro_neighbor_list[n][1] == id_center) && (i_center < 0)) {
+      temp_j_id = voro_neighbor_list[n][0];
+      temp_j = -1;
+    } else {
+      continue;
+    }
+
+    // Find which local atom corresponds to neighbor
+    for (int j = 0; j < nlocal; j++) {
+      if (temp_j_id == id[j]) {
+        temp_j = j;
+        break;
+      }
+    }
+
+    // If temp_j not on this processor, skip
+    if (temp_j < 0) continue;
+
+    if (region) {
+      if (region->match(x[temp_j][0], x[temp_j][1], x[temp_j][2]) == 1) {
+        if (atom->mask[temp_j] & groupbit) {
+          if (diff_flag) {
+            // Calculate distance from i to each j, adjust probability of selection
+
+            // Get distance if own center atom
+            double r = INFINITY;
+            if (i_center >= 0) { double r = get_distance(x[temp_j], x[i_center]); }
+
+            // Get local id of ghost center atom when ghost
+            for (int i = nlocal; i < nlocal + nghost; i++) {
+              if ((id[i] == id_center) && (get_distance(x[temp_j], x[i]) < r)) {
+                r = get_distance(x[temp_j], x[i]);
+              }
+            }
+
+            if (rates_flag) {
+              local_swap_probability[njswap_local] =
+                  rate_list[type[temp_j] - 1] * exp(-MathSpecial::square(r / r_0));
+            } else {
+              local_swap_probability[njswap_local] = exp(-MathSpecial::square(r / r_0));
+            }
+            local_probability += local_swap_probability[njswap_local];
+            local_swap_type_list[njswap_local] = type[temp_j];
+            local_swap_neighbor_list[njswap_local] = temp_j;
+            njswap_local++;
+          } else {
+            for (int jswaptype = 1; jswaptype < nswaptypes; jswaptype++) {
+              if (type[temp_j] == type_list[jswaptype]) {
+                // Calculate distance from i to each j, adjust probability of selection
+                // Get distance if own center atom
+                double r = INFINITY;
+                if (i_center >= 0) { double r = get_distance(x[temp_j], x[i_center]); }
+
+                // Get local id of ghost center atom when ghost
+                for (int i = nlocal; i < nlocal + nghost; i++) {
+                  if ((id[i] == id_center) && (get_distance(x[temp_j], x[i]) < r)) {
+                    r = get_distance(x[temp_j], x[i]);
+                  }
+                }
+
+                if (rates_flag) {
+                  local_swap_probability[njswap_local] =
+                      rate_list[type[temp_j] - 1] * exp(-MathSpecial::square(r / r_0));
+                } else {
+                  local_swap_probability[njswap_local] = exp(-MathSpecial::square(r / r_0));
+                }
+                local_probability += local_swap_probability[njswap_local];
+
+                local_swap_type_list[njswap_local] = jswaptype;
+                local_swap_neighbor_list[njswap_local] = temp_j;
+                njswap_local++;
+              }
+            }
+          }
+        }
+      }
+    } else {
+      if (atom->mask[temp_j] & groupbit) {
+        if (diff_flag) {
+          // Calculate distance from i to each j, adjust probability of selection
+          // Get distance if own center atom
+          double r = INFINITY;
+          if (i_center >= 0) { r = get_distance(x[temp_j], x[i_center]); }
+
+          // Get local id of ghost center atoms
+          for (int i = nlocal; i < nlocal + nghost; i++) {
+            if ((id[i] == id_center) && (get_distance(x[temp_j], x[i]) < r))
+              r = get_distance(x[temp_j], x[i]);
+          }
+
+          if (rates_flag) {
+            local_swap_probability[njswap_local] =
+                rate_list[type[temp_j] - 1] * exp(-MathSpecial::square(r / r_0));
+          } else {
+            local_swap_probability[njswap_local] = exp(-MathSpecial::square(r / r_0));
+          }
+          local_probability += local_swap_probability[njswap_local];
+
+          local_swap_type_list[njswap_local] = type[temp_j];
+          local_swap_neighbor_list[njswap_local] = temp_j;
+          njswap_local++;
+        } else {
+          for (int jswaptype = 1; jswaptype < nswaptypes; jswaptype++) {
+            if (type[temp_j] == type_list[jswaptype]) {
+              // Calculate distance from i to each j, adjust probability of selection
+              // Get distance if own center atom
+              double r = INFINITY;
+              if (i_center >= 0) { double r = get_distance(x[temp_j], x[i_center]); }
+
+              // Get local id of ghost center atom when ghost
+              for (int i = nlocal; i < nlocal + nghost; i++) {
+                if ((id[i] == id_center) && (get_distance(x[temp_j], x[i]) < r)) {
+                  r = get_distance(x[temp_j], x[i]);
+                }
+              }
+
+              if (rates_flag) {
+                local_swap_probability[njswap_local] =
+                    rate_list[type[temp_j] - 1] * exp(-MathSpecial::square(r / r_0));
+              } else {
+                local_swap_probability[njswap_local] = exp(-MathSpecial::square(r / r_0));
+              }
+              local_probability += local_swap_probability[njswap_local];
+
+              local_swap_type_list[njswap_local] = jswaptype;
+              local_swap_neighbor_list[njswap_local] = temp_j;
+              njswap_local++;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  MPI_Allreduce(&njswap_local, &njswap, 1, MPI_INT, MPI_SUM, world);
+  MPI_Scan(&njswap_local, &njswap_before, 1, MPI_INT, MPI_SUM, world);
+  njswap_before -= njswap_local;
+
+  MPI_Allreduce(&local_probability, &global_probability, 1, MPI_DOUBLE, MPI_SUM, world);
+  MPI_Scan(&local_probability, &prev_probability, 1, MPI_DOUBLE, MPI_SUM, world);
+  prev_probability -= local_probability;
+}
+
+/* ----------------------------------------------------------------------
+   update the list of swap atoms
+------------------------------------------------------------------------- */
+
+void FixNeighborSwap::update_iswap_atoms_list()
+{
+  int nlocal = atom->nlocal;
+  int *type = atom->type;
+  double **x = atom->x;
+
+  if (atom->nmax > atom_swap_nmax) {
+    memory->sfree(local_swap_iatom_list);
+    atom_swap_nmax = atom->nmax;
+    local_swap_iatom_list =
+        (int *) memory->smalloc(atom_swap_nmax * sizeof(int), "MCSWAP:local_swap_iatom_list");
+  }
+
+  niswap_local = 0;
+
+  if (region) {
+
+    for (int i = 0; i < nlocal; i++) {
+      if (region->match(x[i][0], x[i][1], x[i][2]) == 1) {
+        if (atom->mask[i] & groupbit) {
+          if (type[i] == type_list[0]) {
+            local_swap_iatom_list[niswap_local] = i;
+            niswap_local++;
+          }
+        }
+      }
+    }
+
+  } else {
+    for (int i = 0; i < nlocal; i++) {
+      if (atom->mask[i] & groupbit) {
+        if (type[i] == type_list[0]) {
+          local_swap_iatom_list[niswap_local] = i;
+          niswap_local++;
+        }
+      }
+    }
+  }
+
+  MPI_Allreduce(&niswap_local, &niswap, 1, MPI_INT, MPI_SUM, world);
+  MPI_Scan(&niswap_local, &niswap_before, 1, MPI_INT, MPI_SUM, world);
+  niswap_before -= niswap_local;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int FixNeighborSwap::pack_forward_comm(int n, int *list, double *buf, int /*pbc_flag*/,
+                                       int * /*pbc*/)
+{
+  int i, j, m;
+
+  int *type = atom->type;
+  double *q = atom->q;
+
+  m = 0;
+
+  if (atom->q_flag) {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = type[j];
+      buf[m++] = q[j];
+    }
+  } else {
+    for (i = 0; i < n; i++) {
+      j = list[i];
+      buf[m++] = type[j];
+    }
+  }
+
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixNeighborSwap::unpack_forward_comm(int n, int first, double *buf)
+{
+  int i, m, last;
+
+  int *type = atom->type;
+  double *q = atom->q;
+
+  m = 0;
+  last = first + n;
+
+  if (atom->q_flag) {
+    for (i = first; i < last; i++) {
+      type[i] = static_cast<int>(buf[m++]);
+      q[i] = buf[m++];
+    }
+  } else {
+    for (i = first; i < last; i++) type[i] = static_cast<int>(buf[m++]);
+  }
+}
+
+/* ----------------------------------------------------------------------
+  return acceptance ratio
+------------------------------------------------------------------------- */
+
+double FixNeighborSwap::compute_vector(int n)
+{
+  if (n == 0) return nswap_attempts;
+  if (n == 1) return nswap_successes;
+  return 0.0;
+}
+
+/* ----------------------------------------------------------------------
+   memory usage of local atom-based arrays
+------------------------------------------------------------------------- */
+
+double FixNeighborSwap::memory_usage()
+{
+  double bytes = (double) atom_swap_nmax * sizeof(int);
+  return bytes;
+}
+
+/* ----------------------------------------------------------------------
+   pack entire state of Fix into one write
+------------------------------------------------------------------------- */
+
+void FixNeighborSwap::write_restart(FILE *fp)
+{
+  int n = 0;
+  double list[6];
+  list[n++] = random_equal->state();
+  list[n++] = ubuf(next_reneighbor).d;
+  list[n++] = nswap_attempts;
+  list[n++] = nswap_successes;
+  list[n++] = ubuf(update->ntimestep).d;
+
+  if (comm->me == 0) {
+    int size = n * sizeof(double);
+    fwrite(&size, sizeof(int), 1, fp);
+    fwrite(list, sizeof(double), n, fp);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   use state info from restart file to restart the Fix
+------------------------------------------------------------------------- */
+
+void FixNeighborSwap::restart(char *buf)
+{
+  int n = 0;
+  double *list = (double *) buf;
+
+  seed = static_cast<int>(list[n++]);
+  random_equal->reset(seed);
+
+  next_reneighbor = (bigint) ubuf(list[n++]).i;
+
+  nswap_attempts = static_cast<int>(list[n++]);
+  nswap_successes = static_cast<int>(list[n++]);
+
+  bigint ntimestep_restart = (bigint) ubuf(list[n++]).i;
+  if (ntimestep_restart != update->ntimestep)
+    error->all(FLERR, "Must not reset timestep when restarting fix neighbor/swap");
+}
diff --git a/src/MC/fix_neighbor_swap.h b/src/MC/fix_neighbor_swap.h
new file mode 100644
index 0000000000..821eda1bdc
--- /dev/null
+++ b/src/MC/fix_neighbor_swap.h
@@ -0,0 +1,100 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+// clang-format off
+FixStyle(neighbor/swap,FixNeighborSwap);
+// clang-format on
+#else
+
+#ifndef LMP_FIX_NEIGH_SWAP_H
+#define LMP_FIX_NEIGH_SWAP_H
+
+#include "fix.h"
+
+namespace LAMMPS_NS {
+
+class FixNeighborSwap : public Fix {
+ public:
+  FixNeighborSwap(class LAMMPS *, int, char **);
+  ~FixNeighborSwap();
+  int setmask();
+  void init();
+  void pre_exchange();
+  int pack_forward_comm(int, int *, double *, int, int *);
+  void unpack_forward_comm(int, int, double *);
+  double compute_vector(int);
+  double memory_usage();
+  void write_restart(FILE *);
+  void restart(char *);
+
+ private:
+  int nevery, seed;
+  int ke_flag;       // yes = conserve ke, no = do not conserve ke
+  int diff_flag;     // yes = simulate diffusion of central atom, no = swap only to certain types
+  int rates_flag;    // yes = use modified type rates, no = swap rates are equivilent across types
+  int ncycles;
+  int niswap, njswap;                  // # of i,j swap atoms on all procs
+  int niswap_local, njswap_local;      // # of swap atoms on this proc
+  int niswap_before, njswap_before;    // # of swap atoms on procs < this proc
+  // int global_i_ID;                     // global id of selected i atom
+  class Region *region;    // swap region
+  char *idregion;          // swap region id
+
+  int nswaptypes;
+  int jtype_selected;
+  int id_center;
+  double x_center;
+  double y_center;
+  double z_center;
+  int *type_list;
+  double *rate_list;
+
+  double nswap_attempts;
+  double nswap_successes;
+
+  bool unequal_cutoffs;
+
+  int atom_swap_nmax;
+  double beta, r_0;
+  double local_probability;     // Total swap probability stored on this proc
+  double global_probability;    // Total swap probability across all proc
+  double prev_probability;      // Swap probability on proc < this proc
+  double *qtype;
+  double energy_stored;
+  double **sqrt_mass_ratio;
+  double **voro_neighbor_list;
+  int *local_swap_iatom_list;
+  int *local_swap_neighbor_list;
+  int *local_swap_type_list;    // Type list index of atoms stored on this proc
+  double *local_swap_probability;
+
+  class RanPark *random_equal;
+
+  class Compute *c_voro;
+  class Compute *c_pe;
+
+  void options(int, char **);
+  int attempt_swap();
+  double energy_full();
+  int pick_i_swap_atom();
+  int pick_j_swap_neighbor(int);
+  double get_distance(double[3], double[3]);
+  void build_i_neighbor_list(int);
+  void update_iswap_atoms_list();
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/ML-IAP/mliap_unified_couple.pyx b/src/ML-IAP/mliap_unified_couple.pyx
index 891ba8e28a..36c3eddda7 100644
--- a/src/ML-IAP/mliap_unified_couple.pyx
+++ b/src/ML-IAP/mliap_unified_couple.pyx
@@ -363,7 +363,7 @@ cdef public object mliap_unified_connect(char *fname, MLIAPDummyModel * model,
         unified = LOADED_MODEL
     elif str_fname.endswith(".pt") or str_fname.endswith('.pth'):
         import torch
-        unified = torch.load(str_fname)
+        unified = torch.load(str_fname,weights_only=False)
     else:
         with open(str_fname, 'rb') as pfile:
             unified = pickle.load(pfile)
diff --git a/src/ML-PACE/pair_pace.cpp b/src/ML-PACE/pair_pace.cpp
index 79d2f92455..c7b5414ef3 100644
--- a/src/ML-PACE/pair_pace.cpp
+++ b/src/ML-PACE/pair_pace.cpp
@@ -145,7 +145,7 @@ void PairPACE::compute(int eflag, int vflag)
   // ilist: list of "i" atoms for which neighbor lists exist
   ilist = list->ilist;
 
-  //numneigh: the length of each these neigbor list
+  //numneigh: the length of each these neighbor list
   numneigh = list->numneigh;
 
   // the pointer to the list of neighbors of "i"
diff --git a/src/ML-PACE/pair_pace_extrapolation.cpp b/src/ML-PACE/pair_pace_extrapolation.cpp
index 2946729905..5ac857d748 100644
--- a/src/ML-PACE/pair_pace_extrapolation.cpp
+++ b/src/ML-PACE/pair_pace_extrapolation.cpp
@@ -151,7 +151,7 @@ void PairPACEExtrapolation::compute(int eflag, int vflag)
   // ilist: list of "i" atoms for which neighbor lists exist
   ilist = list->ilist;
 
-  //numneigh: the length of each these neigbor list
+  //numneigh: the length of each these neighbor list
   numneigh = list->numneigh;
 
   // the pointer to the list of neighbors of "i"
diff --git a/src/ML-POD/compute_podd_atom.cpp b/src/ML-POD/compute_podd_atom.cpp
index 4ab6e23393..364bfb54ac 100644
--- a/src/ML-POD/compute_podd_atom.cpp
+++ b/src/ML-POD/compute_podd_atom.cpp
@@ -58,8 +58,8 @@ ComputePODDAtom::ComputePODDAtom(LAMMPS *lmp, int narg, char **arg) :
   pod = nullptr;
   elements = nullptr;
 
-  if (((((MAXBIGINT*3.0)*atom->natoms)*podptr->nClusters)*podptr->Mdesc) > (MAXSMALLINT*1.0))
-    error->all(FLERR, "Per-atom data too large");
+  if ((((3.0*atom->natoms)*podptr->nClusters)*podptr->Mdesc) > (MAXSMALLINT*1.0))
+      error->all(FLERR, "Too many atoms ({}) for compute {}", atom->natoms, style);
   size_peratom_cols = 3 * atom->natoms * podptr->Mdesc * podptr->nClusters;
   peratom_flag = 1;
 }
@@ -110,8 +110,8 @@ void ComputePODDAtom::compute_peratom()
   if (atom->natoms > nmax) {
     memory->destroy(pod);
     nmax = atom->natoms;
-    if (((((MAXBIGINT*3.0)*atom->natoms)*podptr->nClusters)*podptr->Mdesc) > (MAXSMALLINT*1.0))
-      error->all(FLERR, "Per-atom data too large");
+    if ((((3.0*atom->natoms)*podptr->nClusters)*podptr->Mdesc) > (MAXSMALLINT*1.0))
+      error->all(FLERR, "Too many atoms ({}) for compute {}", atom->natoms, style);
     int numdesc = 3 * atom->natoms * podptr->Mdesc * podptr->nClusters;
     memory->create(pod, nmax, numdesc,"podd/atom:pod");
     array_atom = pod;
diff --git a/src/PERI/fix_peri_neigh.cpp b/src/PERI/fix_peri_neigh.cpp
index 6adfc96f3f..1eba6006b8 100644
--- a/src/PERI/fix_peri_neigh.cpp
+++ b/src/PERI/fix_peri_neigh.cpp
@@ -279,7 +279,7 @@ void FixPeriNeigh::setup(int /*vflag*/)
     }
   }
 
-  // sanity check: does any atom appear twice in any neigborlist?
+  // sanity check: does any atom appear twice in any neighborlist?
   // should only be possible if using pbc and domain < 2*delta
 
   if (domain->xperiodic || domain->yperiodic || domain->zperiodic) {
diff --git a/src/PHONON/third_order.cpp b/src/PHONON/third_order.cpp
index e1d9161ef1..0facaecc9b 100644
--- a/src/PHONON/third_order.cpp
+++ b/src/PHONON/third_order.cpp
@@ -162,7 +162,7 @@ void ThirdOrder::command(int narg, char **arg)
   conversion = 1;
   folded = 0;
 
-  // set Neigborlist attributes to NULL
+  // set Neighborlist attributes to NULL
   ijnum = nullptr;
   neighbortags = nullptr;
 
diff --git a/src/RHEO/compute_rheo_interface.cpp b/src/RHEO/compute_rheo_interface.cpp
index 0cd6e46417..c2440100ae 100644
--- a/src/RHEO/compute_rheo_interface.cpp
+++ b/src/RHEO/compute_rheo_interface.cpp
@@ -344,31 +344,22 @@ void ComputeRHEOInterface::store_forces()
   // If forces are overwritten by a fix, there are no pressure forces
   // so just normalize
   auto fixlist = modify->get_fix_by_style("setforce");
-  if (fixlist.size() != 0) {
-    for (const auto &fix : fixlist) {
-      for (int i = 0; i < atom->nlocal; i++) {
-        if (rmass)
-          minv = 1.0 / rmass[i];
-        else
-          minv = 1.0 / mass[type[i]];
-        if (mask[i] & fix->groupbit)
-          for (int a = 0; a < 3; a++) fp_store[i][a] = f[i][a] * minv;
-        else
-          for (int a = 0; a < 3; a++) fp_store[i][a] = (f[i][a] - fp_store[i][a]) * minv;
-      }
-    }
-  } else {
-    if (rmass) {
-      for (int i = 0; i < atom->nlocal; i++) {
-        minv = 1.0 / rmass[i];
-        for (int a = 0; a < 3; a++) fp_store[i][a] = (f[i][a] - fp_store[i][a]) * minv;
-      }
-    } else {
-      for (int i = 0; i < atom->nlocal; i++) {
-        minv = 1.0 / mass[type[i]];
-        for (int a = 0; a < 3; a++) fp_store[i][a] = (f[i][a] - fp_store[i][a]) * minv;
-      }
-    }
+  int skip_from_setforce;
+  for (int i = 0; i < atom->nlocal; i++) {
+    if (rmass)
+      minv = 1.0 / rmass[i];
+    else
+      minv = 1.0 / mass[type[i]];
+
+    skip_from_setforce = 0;
+    for (const auto &fix : fixlist)
+      if (mask[i] & fix->groupbit)
+        skip_from_setforce = 1;
+
+    if (skip_from_setforce)
+      for (int a = 0; a < 3; a++) fp_store[i][a] = f[i][a] * minv;
+    else
+      for (int a = 0; a < 3; a++) fp_store[i][a] = (f[i][a] - fp_store[i][a]) * minv;
   }
 
   // Forward comm forces
diff --git a/src/RHEO/fix_rheo_pressure.cpp b/src/RHEO/fix_rheo_pressure.cpp
index 121473f7e3..6088c44348 100644
--- a/src/RHEO/fix_rheo_pressure.cpp
+++ b/src/RHEO/fix_rheo_pressure.cpp
@@ -191,14 +191,13 @@ void FixRHEOPressure::setup_pre_force(int /*vflag*/)
 void FixRHEOPressure::pre_force(int /*vflag*/)
 {
   int *mask = atom->mask;
-  int *type = atom->type;
   double *rho = atom->rho;
   double *pressure = atom->pressure;
 
   int nlocal = atom->nlocal;
 
   for (int i = 0; i < nlocal; i++)
-    if (mask[i] & groupbit) pressure[i] = calc_pressure(rho[i], type[i]);
+    if (mask[i] & groupbit) pressure[i] = calc_pressure(rho[i], i);
 
   if (comm_forward) comm->forward_comm(this);
 }
diff --git a/src/RIGID/fix_rigid.cpp b/src/RIGID/fix_rigid.cpp
index b0c6d46ee4..95c274e8e3 100644
--- a/src/RIGID/fix_rigid.cpp
+++ b/src/RIGID/fix_rigid.cpp
@@ -1516,6 +1516,11 @@ void FixRigid::set_xv()
         MathExtra::omega_to_angmom(omega[ibody],exone,eyone,ezone,
                                    inertiaatom,angmom_one[i]);
       }
+      if (atom->quat_flag) {
+        quatatom = atom->quat[i];
+        MathExtra::quatquat(quat[ibody],orient[i],quatatom);
+        MathExtra::qnormalize(quatatom);
+      }
       if (eflags[i] & DIPOLE) {
         MathExtra::quat_to_mat(quat[ibody],p);
         MathExtra::matvec(p,dorient[i],mu[i]);
@@ -1713,7 +1718,7 @@ void FixRigid::setup_bodies_static()
   }
 
   // grow extended arrays and set extended flags for each particle
-  // orientflag = 4 if any particle stores ellipsoid or tri orientation
+  // orientflag = 4 if any particle stores ellipsoid or tri orientation or quat
   // orientflag = 1 if any particle stores line orientation
   // dorientflag = 1 if any particle stores dipole orientation
 
@@ -1721,6 +1726,7 @@ void FixRigid::setup_bodies_static()
     if (atom->ellipsoid_flag) orientflag = 4;
     if (atom->line_flag) orientflag = 1;
     if (atom->tri_flag) orientflag = 4;
+    if (atom->quat_flag) orientflag = 4;
     if (atom->mu_flag) dorientflag = 1;
     grow_arrays(atom->nmax);
 
@@ -2061,7 +2067,12 @@ void FixRigid::setup_bodies_static()
                                 ez_space[ibody],delta,displace[i]);
 
     if (extended) {
-      if (eflags[i] & ELLIPSOID) {
+      if (atom->quat_flag) {
+        quatatom = atom->quat[i];
+        MathExtra::qconjugate(quat[ibody],qc);
+        MathExtra::quatquat(qc,quatatom,orient[i]);
+        MathExtra::qnormalize(orient[i]);
+      } else if (eflags[i] & ELLIPSOID) {
         quatatom = ebonus[ellipsoid[i]].quat;
         MathExtra::qconjugate(quat[ibody],qc);
         MathExtra::quatquat(qc,quatatom,orient[i]);
diff --git a/src/RIGID/fix_rigid_small.cpp b/src/RIGID/fix_rigid_small.cpp
index 6ba2e5eb1c..d55b1fadd9 100644
--- a/src/RIGID/fix_rigid_small.cpp
+++ b/src/RIGID/fix_rigid_small.cpp
@@ -1409,6 +1409,11 @@ void FixRigidSmall::set_xv()
         MathExtra::omega_to_angmom(b->omega,exone,eyone,ezone,
                                    inertiaatom,angmom[i]);
       }
+      if (atom->quat_flag) {
+        quatatom = atom->quat[i];
+        MathExtra::quatquat(b->quat,orient[i],quatatom);
+        MathExtra::qnormalize(quatatom);
+      }
       if (eflags[i] & DIPOLE) {
         MathExtra::quat_to_mat(b->quat,p);
         MathExtra::matvec(p,dorient[i],mu[i]);
@@ -1849,7 +1854,7 @@ void FixRigidSmall::setup_bodies_static()
   }
 
   // grow extended arrays and set extended flags for each particle
-  // orientflag = 4 if any particle stores ellipsoid or tri orientation
+  // orientflag = 4 if any particle stores ellipsoid or tri orientation or quat
   // orientflag = 1 if any particle stores line orientation
   // dorientflag = 1 if any particle stores dipole orientation
 
@@ -1857,6 +1862,7 @@ void FixRigidSmall::setup_bodies_static()
     if (atom->ellipsoid_flag) orientflag = 4;
     if (atom->line_flag) orientflag = 1;
     if (atom->tri_flag) orientflag = 4;
+    if (atom->quat_flag) orientflag = 4;
     if (atom->mu_flag) dorientflag = 1;
     grow_arrays(atom->nmax);
 
@@ -2209,7 +2215,12 @@ void FixRigidSmall::setup_bodies_static()
                                 delta,displace[i]);
 
     if (extended) {
-      if (eflags[i] & ELLIPSOID) {
+      if (atom->quat_flag) {
+        quatatom = atom->quat[i];
+        MathExtra::qconjugate(b->quat,qc);
+        MathExtra::quatquat(qc,quatatom,orient[i]);
+        MathExtra::qnormalize(orient[i]);
+      } else if (eflags[i] & ELLIPSOID) {
         quatatom = ebonus[ellipsoid[i]].quat;
         MathExtra::qconjugate(b->quat,qc);
         MathExtra::quatquat(qc,quatatom,orient[i]);
diff --git a/src/atom.cpp b/src/atom.cpp
index 0e0b1350ac..ac9ebb1634 100644
--- a/src/atom.cpp
+++ b/src/atom.cpp
@@ -2874,6 +2874,8 @@ void Atom::remove_custom(int index, int flag, int cols)
   }
 }
 
+// TODO: complete list of exported properties.
+
 /** Provide access to internal data of the Atom class by keyword
  *
 \verbatim embed:rst
@@ -3035,7 +3037,6 @@ void *Atom::extract(const char *name)
   if (strcmp(name,"x") == 0) return (void *) x;
   if (strcmp(name,"v") == 0) return (void *) v;
   if (strcmp(name,"f") == 0) return (void *) f;
-  if (strcmp(name,"molecule") == 0) return (void *) molecule;
   if (strcmp(name,"q") == 0) return (void *) q;
   if (strcmp(name,"mu") == 0) return (void *) mu;
   if (strcmp(name,"omega") == 0) return (void *) omega;
@@ -3051,6 +3052,33 @@ void *Atom::extract(const char *name)
   if (strcmp(name,"temperature") == 0) return (void *) temperature;
   if (strcmp(name,"heatflow") == 0) return (void *) heatflow;
 
+  // MOLECULE PACKAGE
+
+  if (strcmp(name,"molecule") == 0) return (void *) molecule;
+  if (strcmp(name,"molindex") == 0) return (void *) molindex;
+  if (strcmp(name,"nspecial") == 0) return (void *) nspecial;
+  if (strcmp(name,"special") == 0) return (void *) special;
+  if (strcmp(name,"num_bond") == 0) return (void *) num_bond;
+  if (strcmp(name,"bond_type") == 0) return (void *) bond_type;
+  if (strcmp(name,"bond_atom") == 0) return (void *) bond_atom;
+  if (strcmp(name,"num_angle") == 0) return (void *) num_angle;
+  if (strcmp(name,"angle_type") == 0) return (void *) angle_type;
+  if (strcmp(name,"angle_atom1") == 0) return (void *) angle_atom1;
+  if (strcmp(name,"angle_atom2") == 0) return (void *) angle_atom2;
+  if (strcmp(name,"angle_atom3") == 0) return (void *) angle_atom3;
+  if (strcmp(name,"num_dihedral") == 0) return (void *) num_dihedral;
+  if (strcmp(name,"dihedral_type") == 0) return (void *) dihedral_type;
+  if (strcmp(name,"dihedral_atom1") == 0) return (void *) dihedral_atom1;
+  if (strcmp(name,"dihedral_atom2") == 0) return (void *) dihedral_atom2;
+  if (strcmp(name,"dihedral_atom3") == 0) return (void *) dihedral_atom3;
+  if (strcmp(name,"dihedral_atom4") == 0) return (void *) dihedral_atom4;
+  if (strcmp(name,"num_improper") == 0) return (void *) num_improper;
+  if (strcmp(name,"improper_type") == 0) return (void *) improper_type;
+  if (strcmp(name,"improper_atom1") == 0) return (void *) improper_atom1;
+  if (strcmp(name,"improper_atom2") == 0) return (void *) improper_atom2;
+  if (strcmp(name,"improper_atom3") == 0) return (void *) improper_atom3;
+  if (strcmp(name,"improper_atom4") == 0) return (void *) improper_atom4;
+
   // PERI PACKAGE
 
   if (strcmp(name,"vfrac") == 0) return (void *) vfrac;
@@ -3172,7 +3200,6 @@ int Atom::extract_datatype(const char *name)
   if (strcmp(name,"x") == 0) return LAMMPS_DOUBLE_2D;
   if (strcmp(name,"v") == 0) return LAMMPS_DOUBLE_2D;
   if (strcmp(name,"f") == 0) return LAMMPS_DOUBLE_2D;
-  if (strcmp(name,"molecule") == 0) return LAMMPS_TAGINT;
   if (strcmp(name,"q") == 0) return LAMMPS_DOUBLE;
   if (strcmp(name,"mu") == 0) return LAMMPS_DOUBLE_2D;
   if (strcmp(name,"omega") == 0) return LAMMPS_DOUBLE_2D;
@@ -3188,6 +3215,34 @@ int Atom::extract_datatype(const char *name)
   if (strcmp(name,"temperature") == 0) return LAMMPS_DOUBLE;
   if (strcmp(name,"heatflow") == 0) return LAMMPS_DOUBLE;
 
+  // MOLECULE package
+
+  if (strcmp(name,"molecule") == 0) return LAMMPS_TAGINT;
+  if (strcmp(name,"molindex") == 0) return LAMMPS_INT;
+  if (strcmp(name,"molatom") == 0) return LAMMPS_INT;
+  if (strcmp(name,"nspecial") == 0) return LAMMPS_INT_2D;
+  if (strcmp(name,"special") == 0) return LAMMPS_TAGINT_2D;
+  if (strcmp(name,"num_bond") == 0) return LAMMPS_INT;
+  if (strcmp(name,"bond_type") == 0) return LAMMPS_INT_2D;
+  if (strcmp(name,"bond_atom") == 0) return LAMMPS_TAGINT_2D;
+  if (strcmp(name,"num_angle") == 0) return LAMMPS_INT;
+  if (strcmp(name,"angle_type") == 0) return LAMMPS_INT_2D;
+  if (strcmp(name,"angle_atom1") == 0) return LAMMPS_TAGINT_2D;
+  if (strcmp(name,"angle_atom2") == 0) return LAMMPS_TAGINT_2D;
+  if (strcmp(name,"angle_atom3") == 0) return LAMMPS_TAGINT_2D;
+  if (strcmp(name,"num_dihedral") == 0) return LAMMPS_INT;
+  if (strcmp(name,"dihedral_type") == 0) return LAMMPS_INT_2D;
+  if (strcmp(name,"dihedral_atom1") == 0) return LAMMPS_TAGINT_2D;
+  if (strcmp(name,"dihedral_atom2") == 0) return LAMMPS_TAGINT_2D;
+  if (strcmp(name,"dihedral_atom3") == 0) return LAMMPS_TAGINT_2D;
+  if (strcmp(name,"dihedral_atom4") == 0) return LAMMPS_TAGINT_2D;
+  if (strcmp(name,"num_improper") == 0) return LAMMPS_INT;
+  if (strcmp(name,"improper_type") == 0) return LAMMPS_INT_2D;
+  if (strcmp(name,"improper_atom1") == 0) return LAMMPS_TAGINT_2D;
+  if (strcmp(name,"improper_atom2") == 0) return LAMMPS_TAGINT_2D;
+  if (strcmp(name,"improper_atom3") == 0) return LAMMPS_TAGINT_2D;
+  if (strcmp(name,"improper_atom4") == 0) return LAMMPS_TAGINT_2D;
+
   // PERI package (and in part MACHDYN)
 
   if (strcmp(name,"vfrac") == 0) return LAMMPS_DOUBLE;
diff --git a/src/compute_bond_local.cpp b/src/compute_bond_local.cpp
index e9632d254f..6354c67638 100644
--- a/src/compute_bond_local.cpp
+++ b/src/compute_bond_local.cpp
@@ -428,22 +428,19 @@ int ComputeBondLocal::compute_bonds(int flag)
           if (dstr) input->variable->internal_set(dvar, sqrt(rsq));
         }
 
-        // to make sure dx, dy and dz are always from the lower to the higher id
-        double directionCorrection = tag[atom1] > tag[atom2] ? -1.0 : 1.0;
-
         for (int n = 0; n < nvalues; n++) {
           switch (bstyle[n]) {
             case DIST:
               ptr[n] = sqrt(rsq);
               break;
             case DX:
-              ptr[n] = dx * directionCorrection;
+              ptr[n] = dx;
               break;
             case DY:
-              ptr[n] = dy * directionCorrection;
+              ptr[n] = dy;
               break;
             case DZ:
-              ptr[n] = dz * directionCorrection;
+              ptr[n] = dz;
               break;
             case ENGPOT:
               ptr[n] = engpot;
diff --git a/src/compute_pair_local.cpp b/src/compute_pair_local.cpp
index 57f15264f0..fa5d164844 100644
--- a/src/compute_pair_local.cpp
+++ b/src/compute_pair_local.cpp
@@ -277,22 +277,19 @@ int ComputePairLocal::compute_pairs(int flag)
         else
           ptr = alocal[m];
 
-        // to make sure dx, dy and dz are always from the lower to the higher id
-        double directionCorrection = itag > jtag ? -1.0 : 1.0;
-
         for (n = 0; n < nvalues; n++) {
           switch (pstyle[n]) {
             case DIST:
               ptr[n] = sqrt(rsq);
               break;
             case DX:
-              ptr[n] = delx * directionCorrection;
+              ptr[n] = delx;
               break;
             case DY:
-              ptr[n] = dely * directionCorrection;
+              ptr[n] = dely;
               break;
             case DZ:
-              ptr[n] = delz * directionCorrection;
+              ptr[n] = delz;
               break;
             case ENG:
               ptr[n] = eng;
diff --git a/src/fix_langevin.cpp b/src/fix_langevin.cpp
index 60a55bbbb4..5f754b6d2b 100644
--- a/src/fix_langevin.cpp
+++ b/src/fix_langevin.cpp
@@ -170,6 +170,9 @@ FixLangevin::FixLangevin(LAMMPS *lmp, int narg, char **arg) :
   // no need to set peratom_flag, b/c data is for internal use only
 
   if (gjfflag) {
+    if (comm->me == 0)
+      error->warning(FLERR, "The GJF formulation in fix {} is deprecated and will be removed soon. "
+                     "\nPlease use fix gjf instead: https://docs.lammps.org/fix_gjf.html", style);
     FixLangevin::grow_arrays(atom->nmax);
     atom->add_callback(Atom::GROW);
 
diff --git a/src/library.cpp b/src/library.cpp
index 1e4243c59d..a8acbade52 100644
--- a/src/library.cpp
+++ b/src/library.cpp
@@ -1255,6 +1255,7 @@ be called without a valid LAMMPS object handle (it is ignored).
 * :ref:`Image masks <extract_image_masks>`
 * :ref:`System status <extract_system_status>`
 * :ref:`System sizes <extract_system_sizes>`
+* :ref:`Neighbor list settings <extract_neighbor_settings>`
 * :ref:`Atom style flags <extract_atom_flags>`
 
 .. _extract_integer_sizes:
@@ -1386,6 +1387,16 @@ internally by the :doc:`Fortran interface <Fortran>` and are not likely to be us
      - number of dihedral types
    * - nimpropertypes
      - number of improper types
+   * - bond_per_atom
+     - size of per-atom bond data arrays
+   * - angle_per_atom
+     - size of per-atom angle data arrays
+   * - dihedral_per_atom
+     - size of per-atom dihedral data arrays
+   * - improper_per_atom
+     - size of per-atom improper data arrays
+   * - maxspecial
+     - size of per-atom special data array
    * - nellipsoids
      - number of atoms that have ellipsoid data
    * - nlines
@@ -1395,6 +1406,31 @@ internally by the :doc:`Fortran interface <Fortran>` and are not likely to be us
    * - nbodies
      - number of atoms that have body data (see :doc:`the Body particle HowTo <Howto_body>`)
 
+.. _extract_neighbor_settings:
+
+**Neighbor list settings**
+
+.. list-table::
+   :header-rows: 1
+   :widths: 17 83
+
+   * - neigh_every
+     - neighbor lists are rebuild every this many steps
+   * - neigh_delay
+     - neighbor lists are rebuild delayed this many steps
+   * - neigh_dist_check
+     - 0 if always rebuild, 1 rebuild after 1/2 skin
+   * - neigh_ago
+     - neighbor lists were rebuilt this many steps ago
+   * - nbondlist
+     - number of entries in bondlist (get list with :ref:`lammps_extract_global() <extract_neighbor_lists>`)
+   * - nanglelist
+     - number of entries in anglelist (get list with :ref:`lammps_extract_global() <extract_neighbor_lists>`)
+   * - ndihedrallist
+     - number of entries in dihedrallist (get list with :ref:`lammps_extract_global() <extract_neighbor_lists>`)
+   * - nimproperlist
+     - number of entries in improperlist (get list with :ref:`lammps_extract_global() <extract_neighbor_lists>`)
+
 .. _extract_atom_flags:
 
 **Atom style flags**
@@ -1479,11 +1515,25 @@ int lammps_extract_setting(void *handle, const char *keyword)
   if (strcmp(keyword,"nangletypes") == 0) return lmp->atom->nangletypes;
   if (strcmp(keyword,"ndihedraltypes") == 0) return lmp->atom->ndihedraltypes;
   if (strcmp(keyword,"nimpropertypes") == 0) return lmp->atom->nimpropertypes;
+  if (strcmp(keyword,"bond_per_atom") == 0) return lmp->atom->bond_per_atom;
+  if (strcmp(keyword,"angle_per_atom") == 0) return lmp->atom->angle_per_atom;
+  if (strcmp(keyword,"dihedral_per_atom") == 0) return lmp->atom->dihedral_per_atom;
+  if (strcmp(keyword,"improper_per_atom") == 0) return lmp->atom->improper_per_atom;
+  if (strcmp(keyword,"maxspecial") == 0) return lmp->atom->maxspecial;
   if (strcmp(keyword,"nellipsoids") == 0) return lmp->atom->nellipsoids;
   if (strcmp(keyword,"nlines") == 0) return lmp->atom->nlines;
   if (strcmp(keyword,"ntris") == 0) return lmp->atom->ntris;
   if (strcmp(keyword,"nbodies") == 0) return lmp->atom->nbodies;
 
+  if (strcmp(keyword,"neigh_every") == 0) return lmp->neighbor->every;
+  if (strcmp(keyword,"neigh_delay") == 0) return lmp->neighbor->delay;
+  if (strcmp(keyword,"neigh_dist_check") == 0) return lmp->neighbor->dist_check;
+  if (strcmp(keyword,"neigh_ago") == 0) return lmp->neighbor->ago;
+  if (strcmp(keyword,"nbondlist") == 0) return lmp->neighbor->nbondlist;
+  if (strcmp(keyword,"nanglelist") == 0) return lmp->neighbor->nanglelist;
+  if (strcmp(keyword,"ndihedrallist") == 0) return lmp->neighbor->ndihedrallist;
+  if (strcmp(keyword,"nimproperlist") == 0) return lmp->neighbor->nimproperlist;
+
   if (strcmp(keyword,"molecule_flag") == 0) return lmp->atom->molecule_flag;
   if (strcmp(keyword,"q_flag") == 0) return lmp->atom->q_flag;
   if (strcmp(keyword,"mu_flag") == 0) return lmp->atom->mu_flag;
@@ -1567,6 +1617,16 @@ int lammps_extract_global_datatype(void * /*handle*/, const char *name)
   if (strcmp(name,"special_lj") == 0) return LAMMPS_DOUBLE;
   if (strcmp(name,"special_coul") == 0) return LAMMPS_DOUBLE;
 
+  if (strcmp(name,"neigh_bondlist") == 0) return LAMMPS_INT_2D;
+  if (strcmp(name,"neigh_anglelist") == 0) return LAMMPS_INT_2D;
+  if (strcmp(name,"neigh_dihedrallist") == 0) return LAMMPS_INT_2D;
+  if (strcmp(name,"neigh_improperlist") == 0) return LAMMPS_INT_2D;
+
+  if (strcmp(name,"eflag_global") == 0) return LAMMPS_BIGINT;
+  if (strcmp(name,"eflag_atom") == 0) return LAMMPS_BIGINT;
+  if (strcmp(name,"vflag_global") == 0) return LAMMPS_BIGINT;
+  if (strcmp(name,"vflag_atom") == 0) return LAMMPS_BIGINT;
+
   if (strcmp(name,"map_style") == 0) return LAMMPS_INT;
 #if defined(LAMMPS_BIGBIG)
   if (strcmp(name,"map_tag_max") == 0) return LAMMPS_BIGINT;
@@ -1651,6 +1711,8 @@ report the "native" data type.  The following tables are provided:
 * :ref:`Timestep settings <extract_timestep_settings>`
 * :ref:`Simulation box settings <extract_box_settings>`
 * :ref:`System property settings <extract_system_settings>`
+* :ref:`Neighbor topology data <extract_neighbor_lists>`
+* :ref:`Energy and virial tally settings <extract_tally_settings>`
 * :ref:`Git revision and version settings <extract_git_settings>`
 * :ref:`Unit settings <extract_unit_settings>`
 
@@ -1800,10 +1862,10 @@ report the "native" data type.  The following tables are provided:
      - Type
      - Length
      - Description
-   * - ntypes
-     - int
+   * - natoms
+     - bigint
      - 1
-     - number of atom types
+     - total number of atoms in the simulation.
    * - nbonds
      - bigint
      - 1
@@ -1820,10 +1882,6 @@ report the "native" data type.  The following tables are provided:
      - bigint
      - 1
      - total number of impropers in the simulation.
-   * - natoms
-     - bigint
-     - 1
-     - total number of atoms in the simulation.
    * - nlocal
      - int
      - 1
@@ -1836,6 +1894,10 @@ report the "native" data type.  The following tables are provided:
      - int
      - 1
      - maximum of nlocal+nghost across all MPI ranks (for per-atom data array size).
+   * - ntypes
+     - int
+     - 1
+     - number of atom types
    * - special_lj
      - double
      - 4
@@ -1897,6 +1959,66 @@ report the "native" data type.  The following tables are provided:
      - 1
      - string with the current KSpace style.
 
+.. _extract_neighbor_lists:
+
+**Neighbor topology data**
+
+Get length of lists with :ref:`lammps_extract_setting() <extract_neighbor_settings>`.
+
+.. list-table::
+   :header-rows: 1
+   :widths: 20 12 16 52
+
+   * - Name
+     - Type
+     - Length
+     - Description
+   * - neigh_bondlist
+     - 2d int
+     - nbondlist
+     - list of bonds (atom1, atom2, type)
+   * - neigh_anglelist
+     - 2d int
+     - nanglelist
+     - list of angles (atom1, atom2, atom3, type)
+   * - neigh_dihedrallist
+     - 2d int
+     - ndihedrallist
+     - list of dihedrals (atom1, atom2, atom3, atom4, type)
+   * - neigh_improperlist
+     - 2d int
+     - nimproperlist
+     - list of impropers (atom1, atom2, atom3, atom4, type)
+
+.. _extract_tally_settings:
+
+**Energy and virial tally settings**
+
+.. list-table::
+   :header-rows: 1
+   :widths: 20 12 16 52
+
+   * - Name
+     - Type
+     - Length
+     - Description
+   * - eflag_global
+     - bigint
+     - 1
+     - timestep global energy is tallied on
+   * - eflag_atom
+     - bigint
+     - 1
+     - timestep per-atom energy is tallied on
+   * - vflag_global
+     - bigint
+     - 1
+     - timestep global virial is tallied on
+   * - vflag_atom
+     - bigint
+     - 1
+     - timestep per-atom virial is tallied on
+
 .. _extract_git_settings:
 
 **Git revision and version settings**
@@ -2094,7 +2216,6 @@ void *lammps_extract_global(void *handle, const char *name)
     return (void *) &lmp->comm->procgrid;
 
   if (strcmp(name,"natoms") == 0) return (void *) &lmp->atom->natoms;
-  if (strcmp(name,"ntypes") == 0) return (void *) &lmp->atom->ntypes;
   if (strcmp(name,"nbonds") == 0) return (void *) &lmp->atom->nbonds;
   if (strcmp(name,"nangles") == 0) return (void *) &lmp->atom->nangles;
   if (strcmp(name,"ndihedrals") == 0) return (void *) &lmp->atom->ndihedrals;
@@ -2102,11 +2223,22 @@ void *lammps_extract_global(void *handle, const char *name)
   if (strcmp(name,"nlocal") == 0) return (void *) &lmp->atom->nlocal;
   if (strcmp(name,"nghost") == 0) return (void *) &lmp->atom->nghost;
   if (strcmp(name,"nmax") == 0) return (void *) &lmp->atom->nmax;
+  if (strcmp(name,"ntypes") == 0) return (void *) &lmp->atom->ntypes;
   if (strcmp(name,"special_lj") == 0) return (void *) lmp->force->special_lj;
   if (strcmp(name,"special_coul") == 0) return (void *) lmp->force->special_coul;
 
   if (strcmp(name,"q_flag") == 0) return (void *) &lmp->atom->q_flag;
 
+  if (strcmp(name,"neigh_bondlist") == 0) return (void *) lmp->neighbor->bondlist;
+  if (strcmp(name,"neigh_anglelist") == 0) return (void *) lmp->neighbor->anglelist;
+  if (strcmp(name,"neigh_dihedrallist") == 0) return (void *) lmp->neighbor->dihedrallist;
+  if (strcmp(name,"neigh_improperlist") == 0) return (void *) lmp->neighbor->improperlist;
+
+  if (strcmp(name,"eflag_global") == 0) return (void *) &lmp->update->eflag_global;
+  if (strcmp(name,"eflag_atom") == 0) return (void *) &lmp->update->eflag_atom;
+  if (strcmp(name,"vflag_global") == 0) return (void *) &lmp->update->vflag_global;
+  if (strcmp(name,"vflag_atom") == 0) return (void *) &lmp->update->vflag_atom;
+
   if (strcmp(name,"map_style") == 0) return (void *) &lmp->atom->map_style;
   if (strcmp(name,"map_tag_max") == 0) return (void *) &lmp->atom->map_tag_max;
   if (strcmp(name,"sametag") == 0) return (void *) lmp->atom->sametag;
diff --git a/src/neighbor.cpp b/src/neighbor.cpp
index a2aa3f411b..fcedfd7613 100644
--- a/src/neighbor.cpp
+++ b/src/neighbor.cpp
@@ -188,8 +188,8 @@ pairclass(nullptr), pairnames(nullptr), pairmasks(nullptr)
 
   // topology lists
 
+  nbondlist = nanglelist = ndihedrallist = nimproperlist = 0;
   bondwhich = anglewhich = dihedralwhich = improperwhich = NONE;
-
   neigh_bond = nullptr;
   neigh_angle = nullptr;
   neigh_dihedral = nullptr;
diff --git a/src/potential_file_reader.cpp b/src/potential_file_reader.cpp
index 2a93a4a524..7bac388ba0 100644
--- a/src/potential_file_reader.cpp
+++ b/src/potential_file_reader.cpp
@@ -86,11 +86,21 @@ PotentialFileReader::~PotentialFileReader()
 /** Set comment (= text after '#') handling preference for the file to be read
  *
  * \param   value   Comment text is ignored if true, or not if false */
+
 void PotentialFileReader::ignore_comments(bool value)
 {
   reader->ignore_comments = value;
 }
 
+/** Set line buffer size of the internal TextFileReader class instance.
+ *
+ * \param   bufsize   New size of the line buffer */
+
+void PotentialFileReader::set_bufsize(int bufsize)
+{
+  reader->set_bufsize(bufsize);
+}
+
 /** Reset file to the beginning */
 
 void PotentialFileReader::rewind()
diff --git a/src/potential_file_reader.h b/src/potential_file_reader.h
index c07b4b83f6..534457a2f8 100644
--- a/src/potential_file_reader.h
+++ b/src/potential_file_reader.h
@@ -41,6 +41,7 @@ class PotentialFileReader : protected Pointers {
                       const int auto_convert = 0);
   ~PotentialFileReader() override;
 
+  void set_bufsize(int bufsize);
   void ignore_comments(bool value);
 
   void rewind();
diff --git a/src/read_data.cpp b/src/read_data.cpp
index 79d88148c5..cf7b224db2 100644
--- a/src/read_data.cpp
+++ b/src/read_data.cpp
@@ -174,13 +174,13 @@ void ReadData::command(int narg, char **arg)
         addflag = VALUE;
         bigint offset = utils::bnumeric(FLERR, arg[iarg + 1], false, lmp);
         if (offset > MAXTAGINT)
-          error->all(FLERR, "Read data add atomID offset {} is too big", offset);
+          error->all(FLERR, "Read data add IDoffset {} is too big", offset);
         id_offset = offset;
 
         if (atom->molecule_flag) {
           offset = utils::bnumeric(FLERR, arg[iarg + 2], false, lmp);
           if (offset > MAXTAGINT)
-            error->all(FLERR, "Read data add molID offset {} is too big", offset);
+            error->all(FLERR, "Read data add MOLoffset {} is too big", offset);
           mol_offset = offset;
           iarg++;
         }
diff --git a/src/reset_atoms_mol.cpp b/src/reset_atoms_mol.cpp
index 54d3bbcc76..363e2f08eb 100644
--- a/src/reset_atoms_mol.cpp
+++ b/src/reset_atoms_mol.cpp
@@ -211,7 +211,7 @@ void ResetAtomsMol::reset()
 
     // if offset < 0 (default), reset it
     // if group = all, offset = 0
-    // else offset = largest molID of non-group atoms
+    // else offset = largest molecule ID of non-group atoms
 
     if (offset < 0) {
       if (groupbit != 1) {
diff --git a/src/text_file_reader.cpp b/src/text_file_reader.cpp
index 715e82ab32..d598c274f0 100644
--- a/src/text_file_reader.cpp
+++ b/src/text_file_reader.cpp
@@ -93,7 +93,9 @@ TextFileReader::~TextFileReader()
   delete[] line;
 }
 
-/** adjust line buffer size */
+/** adjust line buffer size
+ *
+ * \param  newsize  New size of the internal line buffer */
 
 void TextFileReader::set_bufsize(int newsize)
 {
diff --git a/unittest/c-library/test_library_properties.cpp b/unittest/c-library/test_library_properties.cpp
index 3878b13774..39b31e8217 100644
--- a/unittest/c-library/test_library_properties.cpp
+++ b/unittest/c-library/test_library_properties.cpp
@@ -303,6 +303,15 @@ TEST_F(LibraryProperties, setting)
     EXPECT_EQ(lammps_extract_setting(lmp, "ndihedraltypes"), 0);
     EXPECT_EQ(lammps_extract_setting(lmp, "nimpropertypes"), 0);
 
+    EXPECT_EQ(lammps_extract_setting(lmp, "neigh_every"), 1);
+    EXPECT_EQ(lammps_extract_setting(lmp, "neigh_delay"), 0);
+    EXPECT_EQ(lammps_extract_setting(lmp, "neigh_dist_check"), 1);
+    EXPECT_EQ(lammps_extract_setting(lmp, "neigh_ago"), -1);
+    EXPECT_EQ(lammps_extract_setting(lmp, "nbondlist"), 0);
+    EXPECT_EQ(lammps_extract_setting(lmp, "nanglelist"), 0);
+    EXPECT_EQ(lammps_extract_setting(lmp, "ndihedrallist"), 0);
+    EXPECT_EQ(lammps_extract_setting(lmp, "nimproperlist"), 0);
+
     EXPECT_EQ(lammps_extract_setting(lmp, "molecule_flag"), 0);
     EXPECT_EQ(lammps_extract_setting(lmp, "q_flag"), 0);
     EXPECT_EQ(lammps_extract_setting(lmp, "mu_flag"), 0);
@@ -313,7 +322,7 @@ TEST_F(LibraryProperties, setting)
         std::string input = path_join(INPUT_DIR, "in.fourmol");
         if (!verbose) ::testing::internal::CaptureStdout();
         lammps_file(lmp, input.c_str());
-        lammps_command(lmp, "run 2 post no");
+        lammps_command(lmp, "run 3 post no");
         if (!verbose) ::testing::internal::GetCapturedStdout();
         EXPECT_EQ(lammps_extract_setting(lmp, "triclinic"), 0);
         EXPECT_EQ(lammps_extract_setting(lmp, "box_exist"), 1);
@@ -328,6 +337,15 @@ TEST_F(LibraryProperties, setting)
         EXPECT_EQ(lammps_extract_setting(lmp, "ndihedraltypes"), 5);
         EXPECT_EQ(lammps_extract_setting(lmp, "nimpropertypes"), 2);
 
+        EXPECT_EQ(lammps_extract_setting(lmp, "neigh_every"), 2);
+        EXPECT_EQ(lammps_extract_setting(lmp, "neigh_delay"), 2);
+        EXPECT_EQ(lammps_extract_setting(lmp, "neigh_dist_check"), 0);
+        EXPECT_EQ(lammps_extract_setting(lmp, "neigh_ago"), 1);
+        EXPECT_EQ(lammps_extract_setting(lmp, "nbondlist"), 24);
+        EXPECT_EQ(lammps_extract_setting(lmp, "nanglelist"), 30);
+        EXPECT_EQ(lammps_extract_setting(lmp, "ndihedrallist"), 31);
+        EXPECT_EQ(lammps_extract_setting(lmp, "nimproperlist"), 2);
+
         EXPECT_EQ(lammps_extract_setting(lmp, "molecule_flag"), 1);
         EXPECT_EQ(lammps_extract_setting(lmp, "q_flag"), 1);
         EXPECT_EQ(lammps_extract_setting(lmp, "mu_flag"), 0);
@@ -365,10 +383,48 @@ TEST_F(LibraryProperties, global)
     char *c_ptr = (char *)lammps_extract_global(lmp, "units");
     EXPECT_THAT(c_ptr, StrEq("real"));
 
-    EXPECT_EQ(lammps_extract_global_datatype(lmp, "ntimestep"), LAMMPS_INT64);
-    auto *b_ptr = (int64_t *)lammps_extract_global(lmp, "ntimestep");
+    EXPECT_EQ(lammps_extract_global_datatype(lmp, "ntimestep"), LAMMPS_BIGINT);
+    auto *b_ptr = (bigint *)lammps_extract_global(lmp, "ntimestep");
     EXPECT_EQ((*b_ptr), 2);
 
+    EXPECT_EQ(lammps_extract_global_datatype(lmp, "natoms"), LAMMPS_BIGINT);
+    b_ptr = (bigint *)lammps_extract_global(lmp, "natoms");
+    EXPECT_EQ((*b_ptr), 29);
+    EXPECT_EQ(lammps_extract_global_datatype(lmp, "nbonds"), LAMMPS_BIGINT);
+    b_ptr = (bigint *)lammps_extract_global(lmp, "nbonds");
+    EXPECT_EQ((*b_ptr), 24);
+    EXPECT_EQ(lammps_extract_global_datatype(lmp, "nangles"), LAMMPS_BIGINT);
+    b_ptr = (bigint *)lammps_extract_global(lmp, "nangles");
+    EXPECT_EQ((*b_ptr), 30);
+    EXPECT_EQ(lammps_extract_global_datatype(lmp, "ndihedrals"), LAMMPS_BIGINT);
+    b_ptr = (bigint *)lammps_extract_global(lmp, "ndihedrals");
+    EXPECT_EQ((*b_ptr), 31);
+    EXPECT_EQ(lammps_extract_global_datatype(lmp, "nimpropers"), LAMMPS_BIGINT);
+    b_ptr = (bigint *)lammps_extract_global(lmp, "nimpropers");
+    EXPECT_EQ((*b_ptr), 2);
+
+    EXPECT_EQ(lammps_extract_global_datatype(lmp, "neigh_bondlist"), LAMMPS_INT_2D);
+    EXPECT_NE(lammps_extract_global(lmp, "neigh_bondlist"), nullptr);
+    EXPECT_EQ(lammps_extract_global_datatype(lmp, "neigh_anglelist"), LAMMPS_INT_2D);
+    EXPECT_NE(lammps_extract_global(lmp, "neigh_anglelist"), nullptr);
+    EXPECT_EQ(lammps_extract_global_datatype(lmp, "neigh_dihedrallist"), LAMMPS_INT_2D);
+    EXPECT_NE(lammps_extract_global(lmp, "neigh_dihedrallist"), nullptr);
+    EXPECT_EQ(lammps_extract_global_datatype(lmp, "neigh_improperlist"), LAMMPS_INT_2D);
+    EXPECT_NE(lammps_extract_global(lmp, "neigh_improperlist"), nullptr);
+
+    EXPECT_EQ(lammps_extract_global_datatype(lmp, "eflag_global"), LAMMPS_BIGINT);
+    b_ptr = (bigint *)lammps_extract_global(lmp, "eflag_global");
+    EXPECT_EQ((*b_ptr), 2);
+    EXPECT_EQ(lammps_extract_global_datatype(lmp, "eflag_atom"), LAMMPS_BIGINT);
+    b_ptr = (bigint *)lammps_extract_global(lmp, "eflag_atom");
+    EXPECT_EQ((*b_ptr), 0);
+    EXPECT_EQ(lammps_extract_global_datatype(lmp, "vflag_global"), LAMMPS_BIGINT);
+    b_ptr = (bigint *)lammps_extract_global(lmp, "vflag_global");
+    EXPECT_EQ((*b_ptr), 2);
+    EXPECT_EQ(lammps_extract_global_datatype(lmp, "vflag_atom"), LAMMPS_BIGINT);
+    b_ptr = (bigint *)lammps_extract_global(lmp, "vflag_atom");
+    EXPECT_EQ((*b_ptr), 0);
+
     EXPECT_EQ(lammps_extract_global_datatype(lmp, "dt"), LAMMPS_DOUBLE);
     auto *d_ptr = (double *)lammps_extract_global(lmp, "dt");
     EXPECT_DOUBLE_EQ((*d_ptr), 0.1);
@@ -584,17 +640,36 @@ TEST_F(LibraryProperties, neighlist)
     EXPECT_DOUBLE_EQ(minval, 1.0);
     EXPECT_DOUBLE_EQ(maxval, 2.1);
 
+    char errbuf[128];
+    lammps_set_show_error(lmp, 0);
     const int nlocal = lammps_extract_setting(lmp, "nlocal");
     EXPECT_EQ(nlocal, numatoms);
     EXPECT_NE(lammps_find_pair_neighlist(lmp, "sw", 1, 0, 0), -1);
+    EXPECT_EQ(lammps_has_error(lmp), 0);
     EXPECT_NE(lammps_find_pair_neighlist(lmp, "morse", 1, 0, 0), -1);
+    EXPECT_EQ(lammps_has_error(lmp), 0);
     EXPECT_NE(lammps_find_pair_neighlist(lmp, "lj/cut", 1, 1, 0), -1);
+    EXPECT_EQ(lammps_has_error(lmp), 0);
     EXPECT_NE(lammps_find_pair_neighlist(lmp, "lj/cut", 1, 2, 0), -1);
+    EXPECT_EQ(lammps_has_error(lmp), 0);
     EXPECT_EQ(lammps_find_pair_neighlist(lmp, "lj/cut", 1, 0, 0), -1);
+    EXPECT_EQ(lammps_has_error(lmp), 1);
+    EXPECT_EQ(lammps_get_last_error_message(lmp, errbuf, 128), 1);
+    errbuf[69] = '\0';
+    EXPECT_THAT(std::string(errbuf),
+                StrEq("ERROR: lammps_find_pair_neighlist(): Pair style lj/cut does not exist"));
     EXPECT_EQ(lammps_find_pair_neighlist(lmp, "hybrid/overlay", 1, 0, 0), -1);
+    EXPECT_EQ(lammps_has_error(lmp), 0);
     EXPECT_NE(lammps_find_compute_neighlist(lmp, "dist", 0), -1);
+    EXPECT_EQ(lammps_has_error(lmp), 0);
     EXPECT_EQ(lammps_find_fix_neighlist(lmp, "dist", 0), -1);
+    EXPECT_EQ(lammps_has_error(lmp), 0);
     EXPECT_EQ(lammps_find_compute_neighlist(lmp, "xxx", 0), -1);
+    EXPECT_EQ(lammps_has_error(lmp), 1);
+    EXPECT_EQ(lammps_get_last_error_message(lmp, errbuf, 128), 1);
+    errbuf[66] = '\0';
+    EXPECT_THAT(std::string(errbuf),
+                StrEq("ERROR: lammps_find_compute_neighlist(): Compute xxx does not exist"));
 
     // full neighbor list for 4 type 1 atoms
     // all have 3 type 1 atom neighbors
diff --git a/unittest/python/python-numpy.py b/unittest/python/python-numpy.py
index 4930527a61..be9109b9a3 100644
--- a/unittest/python/python-numpy.py
+++ b/unittest/python/python-numpy.py
@@ -153,7 +153,7 @@ class PythonNumpy(unittest.TestCase):
         self.assertEqual(values[0,0], 0.5)
         self.assertEqual(values[0,3], -0.5)
         self.assertEqual(values[1,0], 1.5)
-        self.assertEqual(values[1,3], 1.5)
+        self.assertEqual(values[1,3], -1.5)
 
     def testExtractAtom(self):
         self.lmp.command("units lj")