Merge branch 'develop' of https://github.com/lammps/lammps into kk_update_3.7

2022-10-10 13:44:02 -07:00
parent e51be5d6e0 1fb07387b9
commit c113253e2d
133 changed files with 6313 additions and 1175 deletions
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@ -105,7 +105,7 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
    if(CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.3 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.4)
      set(CMAKE_TUNE_DEFAULT "-xCOMMON-AVX512")
    else()
-      set(CMAKE_TUNE_DEFAULT "-xHost")
+      set(CMAKE_TUNE_DEFAULT "-xHost -fp-model fast=2 -no-prec-div -qoverride-limits -diag-disable=10441 -diag-disable=2196")
    endif()
  endif()
 endif()
@ -854,8 +854,11 @@ if(BUILD_SHARED_LIBS OR PKG_PYTHON)
    find_package(Python COMPONENTS Interpreter)
  endif()
  if(Python_EXECUTABLE)
-    file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/python)
+    file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/python/lib)
-    install(CODE "execute_process(COMMAND ${Python_EXECUTABLE} setup.py build -b ${CMAKE_BINARY_DIR}/python install --prefix=${CMAKE_INSTALL_PREFIX} --root=\$ENV{DESTDIR}/ WORKING_DIRECTORY ${LAMMPS_PYTHON_DIR})")
+    file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/python/src)
    file(COPY ${LAMMPS_SOURCE_DIR}/version.h  DESTINATION ${CMAKE_BINARY_DIR}/python/src)
    file(COPY ${LAMMPS_PYTHON_DIR}/README ${LAMMPS_PYTHON_DIR}/pyproject.toml ${LAMMPS_PYTHON_DIR}/setup.py ${LAMMPS_PYTHON_DIR}/lammps  DESTINATION ${CMAKE_BINARY_DIR}/python/lib)
    install(CODE "if(\"\$ENV{DESTDIR}\" STREQUAL \"\")\n execute_process(COMMAND ${Python_EXECUTABLE} -m pip install -v ${CMAKE_BINARY_DIR}/python/lib --prefix=${CMAKE_INSTALL_PREFIX})\n else()\n execute_process(COMMAND ${Python_EXECUTABLE} -m pip install -v ${CMAKE_BINARY_DIR}/python/lib --prefix=${CMAKE_INSTALL_PREFIX} --root=\$ENV{DESTDIR})\n endif()")
  endif()
 endif()
--- a/cmake/Modules/Packages/ML-PACE.cmake
+++ b/cmake/Modules/Packages/ML-PACE.cmake
@ -1,6 +1,6 @@
-set(PACELIB_URL "https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/v.2021.10.25.fix2.tar.gz" CACHE STRING "URL for PACE evaluator library sources")
+set(PACELIB_URL "https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/v.2022.09.27.fix10Oct.tar.gz" CACHE STRING "URL for PACE evaluator library sources")
-set(PACELIB_MD5 "32394d799bc282bb57696c78c456e64f" CACHE STRING "MD5 checksum of PACE evaluator library tarball")
+set(PACELIB_MD5 "766cebcc0e5c4b8430c2f3cd202d9905" CACHE STRING "MD5 checksum of PACE evaluator library tarball")
 mark_as_advanced(PACELIB_URL)
 mark_as_advanced(PACELIB_MD5)
@ -15,23 +15,9 @@ execute_process(
 )
 get_newest_file(${CMAKE_BINARY_DIR}/lammps-user-pace-* lib-pace)
-# enforce building libyaml-cpp as static library and turn off optional features
+add_subdirectory(${lib-pace} build-pace)
 set(YAML_BUILD_SHARED_LIBS OFF)
 set(YAML_CPP_BUILD_CONTRIB OFF)
 set(YAML_CPP_BUILD_TOOLS OFF)
 add_subdirectory(${lib-pace}/yaml-cpp build-yaml-cpp)
 set(YAML_CPP_INCLUDE_DIR ${lib-pace}/yaml-cpp/include)
 file(GLOB PACE_EVALUATOR_INCLUDE_DIR ${lib-pace}/ML-PACE)
 file(GLOB PACE_EVALUATOR_SOURCES ${lib-pace}/ML-PACE/*.cpp)
 list(FILTER PACE_EVALUATOR_SOURCES EXCLUDE REGEX pair_pace.cpp)
 add_library(pace STATIC ${PACE_EVALUATOR_SOURCES})
 set_target_properties(pace PROPERTIES CXX_EXTENSIONS ON OUTPUT_NAME lammps_pace${LAMMPS_MACHINE})
 target_include_directories(pace PUBLIC ${PACE_EVALUATOR_INCLUDE_DIR} ${YAML_CPP_INCLUDE_DIR})
 target_link_libraries(pace PRIVATE yaml-cpp-pace)
 if(CMAKE_PROJECT_NAME STREQUAL "lammps")
  target_link_libraries(lammps PRIVATE pace)
 endif()
--- a/cmake/presets/intel.cmake
+++ b/cmake/presets/intel.cmake
@ -1,4 +1,4 @@
-# preset that will enable Intel compilers with support for MPI and OpenMP (on Linux boxes)
+# preset that will enable the classic Intel compilers with support for MPI and OpenMP (on Linux boxes)
 set(CMAKE_CXX_COMPILER "icpc" CACHE STRING "" FORCE)
 set(CMAKE_C_COMPILER "icc" CACHE STRING "" FORCE)
@ -18,11 +18,11 @@ set(MPI_CXX_COMPILER "mpicxx" CACHE STRING "" FORCE)
 unset(HAVE_OMP_H_INCLUDE CACHE)
 set(OpenMP_C "icc" CACHE STRING "" FORCE)
-set(OpenMP_C_FLAGS "-qopenmp" CACHE STRING "" FORCE)
+set(OpenMP_C_FLAGS "-qopenmp -qopenmp-simd" CACHE STRING "" FORCE)
 set(OpenMP_C_LIB_NAMES "omp" CACHE STRING "" FORCE)
 set(OpenMP_CXX "icpc" CACHE STRING "" FORCE)
-set(OpenMP_CXX_FLAGS "-qopenmp" CACHE STRING "" FORCE)
+set(OpenMP_CXX_FLAGS "-qopenmp -qopenmp-simd" CACHE STRING "" FORCE)
 set(OpenMP_CXX_LIB_NAMES "omp" CACHE STRING "" FORCE)
-set(OpenMP_Fortran_FLAGS "-qopenmp" CACHE STRING "" FORCE)
+set(OpenMP_Fortran_FLAGS "-qopenmp -qopenmp-simd" CACHE STRING "" FORCE)
 set(OpenMP_omp_LIBRARY "libiomp5.so" CACHE PATH "" FORCE)
--- a/cmake/presets/oneapi.cmake
+++ b/cmake/presets/oneapi.cmake
@ -18,11 +18,11 @@ set(MPI_CXX_COMPILER "mpicxx" CACHE STRING "" FORCE)
 unset(HAVE_OMP_H_INCLUDE CACHE)
 set(OpenMP_C "icx" CACHE STRING "" FORCE)
-set(OpenMP_C_FLAGS "-qopenmp" CACHE STRING "" FORCE)
+set(OpenMP_C_FLAGS "-qopenmp -qopenmp-simd" CACHE STRING "" FORCE)
 set(OpenMP_C_LIB_NAMES "omp" CACHE STRING "" FORCE)
 set(OpenMP_CXX "icpx" CACHE STRING "" FORCE)
-set(OpenMP_CXX_FLAGS "-qopenmp" CACHE STRING "" FORCE)
+set(OpenMP_CXX_FLAGS "-qopenmp -qopenmp-simd" CACHE STRING "" FORCE)
 set(OpenMP_CXX_LIB_NAMES "omp" CACHE STRING "" FORCE)
-set(OpenMP_Fortran_FLAGS "-qopenmp" CACHE STRING "" FORCE)
+set(OpenMP_Fortran_FLAGS "-qopenmp -qopenmp-simd" CACHE STRING "" FORCE)
 set(OpenMP_omp_LIBRARY "libiomp5.so" CACHE PATH "" FORCE)
--- a/doc/src/Build_manual.rst
+++ b/doc/src/Build_manual.rst
@ -216,7 +216,7 @@ be multiple tests run automatically:
 - A test that only standard, printable ASCII text characters are used.
  This runs the command ``env LC_ALL=C grep -n '[^ -~]' src/*.rst`` and
  thus prints all offending lines with filename and line number
-  prepended to the screen.  Special characters like greek letters
+  prepended to the screen.  Special characters like Greek letters
  (:math:`\alpha~~\sigma~~\epsilon`), super- or subscripts
  (:math:`x^2~~\mathrm{U}_{LJ}`), mathematical expressions
  (:math:`\frac{1}{2}\mathrm{N}~~x\to\infty`), or the Angstrom symbol
--- a/doc/src/Commands_pair.rst
+++ b/doc/src/Commands_pair.rst
@ -236,6 +236,7 @@ OPT.
   * :doc:`oxrna2/xstk <pair_oxrna2>`
   * :doc:`oxrna2/coaxstk <pair_oxrna2>`
   * :doc:`pace (k) <pair_pace>`
   * :doc:`pace/extrapolation <pair_pace>`
   * :doc:`peri/eps <pair_peri>`
   * :doc:`peri/lps (o) <pair_peri>`
   * :doc:`peri/pmb (o) <pair_peri>`
--- a/doc/src/Fortran.rst
+++ b/doc/src/Fortran.rst
--- a/doc/src/Library_objects.rst
+++ b/doc/src/Library_objects.rst
@ -6,6 +6,7 @@ fixes, or variables in LAMMPS using the following functions:
 - :cpp:func:`lammps_extract_compute`
 - :cpp:func:`lammps_extract_fix`
 - :cpp:func:`lammps_extract_variable_datatype`
 - :cpp:func:`lammps_extract_variable`
 - :cpp:func:`lammps_set_variable`
@ -21,6 +22,11 @@ fixes, or variables in LAMMPS using the following functions:
 -----------------------
 .. doxygenfunction:: lammps_extract_variable_datatype
   :project: progguide
 -----------------------
 .. doxygenfunction:: lammps_extract_variable
   :project: progguide
@ -36,3 +42,5 @@ fixes, or variables in LAMMPS using the following functions:
 .. doxygenenum:: _LMP_STYLE_CONST
 .. doxygenenum:: _LMP_TYPE_CONST
 .. doxygenenum:: _LMP_VAR_CONST
--- a/doc/src/Library_properties.rst
+++ b/doc/src/Library_properties.rst
@ -16,8 +16,8 @@ This section documents the following functions:
 --------------------
 The library interface allows the extraction of different kinds of
-information about the active simulation instance and also - in some
+information about the active simulation instance and also---in some
-cases - to apply modifications to it.  This enables combining of a
+cases---to apply modifications to it.  This enables combining of a
 LAMMPS simulation with other processing and simulation methods computed
 by the calling code, or by another code that is coupled to LAMMPS via
 the library interface.  In some cases the data returned is direct
@ -25,9 +25,9 @@ reference to the original data inside LAMMPS, cast to a void pointer.
 In that case the data needs to be cast to a suitable pointer for the
 calling program to access it, and you may need to know the correct
 dimensions and lengths.  This also means you can directly change those
-value(s) from the calling program, e.g. to modify atom positions.  Of
+value(s) from the calling program (e.g., to modify atom positions).  Of
-course, this should be done with care.  When accessing per-atom data,
+course, changing values should be done with care.  When accessing per-atom
-please note that this data is the per-processor **local** data and is
+data, please note that these data are the per-processor **local** data and are
 indexed accordingly. Per-atom data can change sizes and ordering at
 every neighbor list rebuild or atom sort event as atoms migrate between
 sub-domains and processors.
--- a/doc/src/Run_basics.rst
+++ b/doc/src/Run_basics.rst
@ -30,12 +30,13 @@ executable itself can be placed elsewhere.
 .. note::
-   The redirection operator "<" will not always work when running
+   The redirection operator "<" will not always work when running in
-   in parallel with mpirun or mpiexec; for those systems the -in form is required.
+   parallel with ``mpirun`` or ``mpiexec``; for those systems the -in
   form is required.
 As LAMMPS runs it prints info to the screen and a logfile named
-*log.lammps*\ .  More info about output is given on the
+*log.lammps*\ .  More info about output is given on the :doc:`screen and
-:doc:`screen and logfile output <Run_output>` page.
+logfile output <Run_output>` page.
 If LAMMPS encounters errors in the input script or while running a
 simulation it will print an ERROR message and stop or a WARNING
--- a/doc/src/Run_options.rst
+++ b/doc/src/Run_options.rst
@ -93,13 +93,13 @@ switch is not set (the default), LAMMPS will operate as if the KOKKOS
 package were not installed; i.e. you can run standard LAMMPS or with
 the GPU or OPENMP packages, for testing or benchmarking purposes.
-Additional optional keyword/value pairs can be specified which
+Additional optional keyword/value pairs can be specified which determine
-determine how Kokkos will use the underlying hardware on your
+how Kokkos will use the underlying hardware on your platform.  These
-platform.  These settings apply to each MPI task you launch via the
+settings apply to each MPI task you launch via the ``mpirun`` or
-"mpirun" or "mpiexec" command.  You may choose to run one or more MPI
+``mpiexec`` command.  You may choose to run one or more MPI tasks per
-tasks per physical node.  Note that if you are running on a desktop
+physical node.  Note that if you are running on a desktop machine, you
-machine, you typically have one physical node.  On a cluster or
+typically have one physical node.  On a cluster or supercomputer there
-supercomputer there may be dozens or 1000s of physical nodes.
+may be dozens or 1000s of physical nodes.
 Either the full word or an abbreviation can be used for the keywords.
 Note that the keywords do not use a leading minus sign.  I.e. the
@ -148,9 +148,9 @@ one of these 4 environment variables
   MV2_COMM_WORLD_LOCAL_RANK (Mvapich)
   OMPI_COMM_WORLD_LOCAL_RANK (OpenMPI)
-which are initialized by the "srun", "mpirun" or "mpiexec" commands.
+which are initialized by the ``srun``, ``mpirun``, or ``mpiexec``
-The environment variable setting for each MPI rank is used to assign a
+commands.  The environment variable setting for each MPI rank is used to
-unique GPU ID to the MPI task.
+assign a unique GPU ID to the MPI task.
 .. parsed-literal::
--- a/doc/src/Speed_gpu.rst
+++ b/doc/src/Speed_gpu.rst
@ -76,10 +76,11 @@ instructions.
 **Run with the GPU package from the command line:**
-The mpirun or mpiexec command sets the total number of MPI tasks used
+The ``mpirun`` or ``mpiexec`` command sets the total number of MPI tasks
-by LAMMPS (one or multiple per compute node) and the number of MPI
+used by LAMMPS (one or multiple per compute node) and the number of MPI
-tasks used per node.  E.g. the mpirun command in MPICH does this via
+tasks used per node.  E.g. the ``mpirun`` command in MPICH does this via
-its -np and -ppn switches.  Ditto for OpenMPI via -np and -npernode.
+its ``-np`` and ``-ppn`` switches.  Ditto for OpenMPI via ``-np`` and
 ``-npernode``.
 When using the GPU package, you cannot assign more than one GPU to a
 single MPI task.  However multiple MPI tasks can share the same GPU,
@ -129,8 +130,8 @@ GPU package pair styles.
 **Or run with the GPU package by editing an input script:**
-The discussion above for the mpirun/mpiexec command, MPI tasks/node,
+The discussion above for the ``mpirun`` or ``mpiexec`` command, MPI
-and use of multiple MPI tasks/GPU is the same.
+tasks/node, and use of multiple MPI tasks/GPU is the same.
 Use the :doc:`suffix gpu <suffix>` command, or you can explicitly add an
 "gpu" suffix to individual styles in your input script, e.g.
--- a/doc/src/Speed_kokkos.rst
+++ b/doc/src/Speed_kokkos.rst
@ -72,12 +72,12 @@ See the :ref:`Build extras <kokkos>` page for instructions.
 Running LAMMPS with the KOKKOS package
 """"""""""""""""""""""""""""""""""""""
-All Kokkos operations occur within the context of an individual MPI
+All Kokkos operations occur within the context of an individual MPI task
-task running on a single node of the machine. The total number of MPI
+running on a single node of the machine. The total number of MPI tasks
-tasks used by LAMMPS (one or multiple per compute node) is set in the
+used by LAMMPS (one or multiple per compute node) is set in the usual
-usual manner via the mpirun or mpiexec commands, and is independent of
+manner via the ``mpirun`` or ``mpiexec`` commands, and is independent of
-Kokkos. E.g. the mpirun command in OpenMPI does this via its -np and
+Kokkos. E.g. the mpirun command in OpenMPI does this via its ``-np`` and
-npernode switches. Ditto for MPICH via -np and -ppn.
+``-npernode`` switches. Ditto for MPICH via ``-np`` and ``-ppn``.
 Running on a multi-core CPU
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
@ -310,7 +310,8 @@ Alternatively the effect of the "-sf" or "-pk" switches can be
 duplicated by adding the :doc:`package kokkos <package>` or :doc:`suffix kk <suffix>` commands to your input script.
 The discussion above for building LAMMPS with the KOKKOS package, the
-mpirun/mpiexec command, and setting appropriate thread are the same.
+``mpirun`` or ``mpiexec`` command, and setting appropriate thread
 properties are the same.
 You must still use the "-k on" :doc:`command-line switch <Run_options>`
 to enable the KOKKOS package, and specify its additional arguments for
--- a/doc/src/Speed_omp.rst
+++ b/doc/src/Speed_omp.rst
@ -33,8 +33,8 @@ These examples assume one or more 16-core nodes.
   mpirun -np 4 lmp_omp -sf omp -pk omp 4 -in in.script           # 4 MPI tasks, 4 threads/task
   mpirun -np 32 -ppn 4 lmp_omp -sf omp -pk omp 4 -in in.script   # 8 nodes, 4 MPI tasks/node, 4 threads/task
-The mpirun or mpiexec command sets the total number of MPI tasks used
+The ``mpirun`` or ``mpiexec`` command sets the total number of MPI tasks
-by LAMMPS (one or multiple per compute node) and the number of MPI
+used by LAMMPS (one or multiple per compute node) and the number of MPI
 tasks used per node.  E.g. the mpirun command in MPICH does this via
 its -np and -ppn switches.  Ditto for OpenMPI via -np and -npernode.
@ -58,8 +58,8 @@ OMP_NUM_THREADS environment variable.
 Or run with the OPENMP package by editing an input script
 """""""""""""""""""""""""""""""""""""""""""""""""""""""""""
-The discussion above for the mpirun/mpiexec command, MPI tasks/node,
+The discussion above for the ``mpirun`` or ``mpiexec`` command, MPI
-and threads/MPI task is the same.
+tasks/node, and threads/MPI task is the same.
 Use the :doc:`suffix omp <suffix>` command, or you can explicitly add an
 "omp" suffix to individual styles in your input script, e.g.
--- a/doc/src/pair_pace.rst
+++ b/doc/src/pair_pace.rst
@ -1,11 +1,15 @@
 .. index:: pair_style pace
 .. index:: pair_style pace/kk
 .. index:: pair_style pace/extrapolation
 pair_style pace command
 =======================
 Accelerator Variants: *pace/kk*
 pair_style pace/extrapolation command
 =====================================
 Syntax
 """"""
@ -22,6 +26,10 @@ Syntax
       *recursive* = use recursive algorithm for basis functions
       *chunksize* value = number of atoms in each pass
 .. code-block:: LAMMPS
   pair_style pace/extrapolation
 Examples
 """"""""
@ -31,50 +39,96 @@ Examples
   pair_style pace product chunksize 2048
   pair_coeff * * Cu-PBE-core-rep.ace Cu
   pair_style pace/extrapolation
   pair_coeff * * Cu.yaml Cu.asi Cu
 Description
 """""""""""
 Pair style *pace* computes interactions using the Atomic Cluster
 Expansion (ACE), which is a general expansion of the atomic energy in
-multi-body basis functions. :ref:`(Drautz) <Drautz20191>`.
+multi-body basis functions. :ref:`(Drautz) <Drautz20191>`.  The *pace*
-The *pace* pair style
+pair style provides an efficient implementation that is described in
-provides an efficient implementation that
+this paper :ref:`(Lysogorskiy) <Lysogorskiy20211>`.
 is described in this paper :ref:`(Lysogorskiy) <Lysogorskiy20211>`.
-In ACE, the total energy is decomposed into a sum over
+In ACE, the total energy is decomposed into a sum over atomic
-atomic energies. The energy of atom *i* is expressed as a
+energies. The energy of atom *i* is expressed as a linear or non-linear
-linear or non-linear function of one or more density functions.
+function of one or more density functions.  By projecting the density
-By projecting the
+onto a local atomic base, the lowest order contributions to the energy
-density onto a local atomic base, the lowest order contributions
+can be expressed as a set of scalar polynomials in basis function
-to the energy can be expressed as a set of scalar polynomials in
+contributions summed over neighbor atoms.
 basis function contributions summed over neighbor atoms.
 Only a single pair_coeff command is used with the *pace* style which
 specifies an ACE coefficient file followed by N additional arguments
-specifying the mapping of ACE elements to LAMMPS atom types,
+specifying the mapping of ACE elements to LAMMPS atom types, where N is
-where N is the number of LAMMPS atom types:
+the number of LAMMPS atom types:
 * ACE coefficient file
 * N element names = mapping of ACE elements to atom types
 Only a single pair_coeff command is used with the *pace* style which
-specifies an ACE file that fully defines the potential.
+specifies an ACE file that fully defines the potential.  Note that
-Note that unlike for other potentials, cutoffs are
+unlike for other potentials, cutoffs are not set in the pair_style or
-not set in the pair_style or pair_coeff command; they are specified in
+pair_coeff command; they are specified in the ACE file.
 the ACE file.
 The pair_style *pace* command may be followed by the optional keyword
-*product* or *recursive*, which determines which of two algorithms
+*product* or *recursive*, which determines which of two algorithms is
-is used for the calculation of basis functions and derivatives.
+used for the calculation of basis functions and derivatives.  The
-The default is *recursive*.
+default is *recursive*.
-The keyword *chunksize* is only applicable when
+The keyword *chunksize* is only applicable when using the pair style
-using the pair style *pace* with the KOKKOS package on GPUs and is
+*pace* with the KOKKOS package on GPUs and is ignored otherwise.  This
-ignored otherwise.  This keyword controls the number of atoms
+keyword controls the number of atoms in each pass used to compute the
-in each pass used to compute the atomic cluster expansion and is used to
+atomic cluster expansion and is used to avoid running out of memory.
-avoid running out of memory.  For example if there are 8192 atoms in the
+For example if there are 8192 atoms in the simulation and the
-simulation and the *chunksize* is set to 4096, the ACE
+*chunksize* is set to 4096, the ACE calculation will be broken up into
-calculation will be broken up into two passes (running on a single GPU).
+two passes (running on a single GPU).
 Extrapolation grade
 """""""""""""""""""
 Calculation of extrapolation grade in PACE is implemented in `pair_style
 pace/extrapolation`.  It is based on the MaxVol algorithm similar to
 Moment Tensor Potential (MTP) by Shapeev et al.  and is described in
 :ref:`(Lysogorskiy2) <Lysogorskiy2022>`.  In order to compute
 extrapolation grade one needs to provide:
 #. ACE potential in B-basis form (`.yaml` format) and
 #. Active Set Inverted (ASI) file for corresponding potential (`.asi` format)
 Calculation of extrapolation grades requires matrix-vector
 multiplication for each atom and is slower than the usual `pair_style
 pace recursive`, therefore it is *not* computed by default.
 Extrapolation grade calculation is involved by `fix pair`, which
 requests to compute `gamma`, as shown in example below:
 .. code-block:: LAMMPS
    pair_style 	pace/extrapolation
    pair_coeff  * * Cu.yaml Cu.asi Cu
    fix pace_gamma all pair 10 pace/extrapolation gamma 1
    compute max_pace_gamma all reduce max f_pace_gamma
    variable dump_skip equal "c_max_pace_gamma < 5"
    dump pace_dump all custom 20 extrapolative_structures.dump id x y z f_pace_gamma
    dump_modify pace_dump skip v_dump_skip
    variable max_pace_gamma equal c_max_pace_gamma
    fix extreme_extrapolation all halt 10 v_max_pace_gamma > 25
 Here extrapolation grade gamma is computed every 10 steps and is stored
 in `f_pace_gamma` per-atom variable.  The largest value of extrapolation
 grade among all atoms in a structure is reduced to `c_max_pace_gamma`
 variable.  Only if this value exceeds extrapolation threshold 5, then
 the structure will be dumped into `extrapolative_structures.dump` file,
 but not more often than every 20 steps.
 On all other steps `pair_style pace recursive` will be used.
 ----------
 See the :doc:`pair_coeff <pair_coeff>` page for alternate ways
 to specify the path for the ACE coefficient file.
@ -90,9 +144,10 @@ specify a pair_coeff command with I != J arguments for this style.
 This pair style does not support the :doc:`pair_modify <pair_modify>`
 shift, table, and tail options.
-This pair style does not write its information to :doc:`binary restart files <restart>`, since it is stored in potential files.  Thus, you
+This pair style does not write its information to :doc:`binary restart
-need to re-specify the pair_style and pair_coeff commands in an input
+files <restart>`, since it is stored in potential files.  Thus, you need
-script that reads a restart file.
+to re-specify the pair_style and pair_coeff commands in an input script
 that reads a restart file.
 This pair style can only be used via the *pair* keyword of the
 :doc:`run_style respa <run_style>` command.  It does not support the
@ -107,19 +162,20 @@ This pair style can only be used via the *pair* keyword of the
 Restrictions
 """"""""""""
-This pair style is part of the ML-PACE package.  It is only enabled if LAMMPS
+This pair style is part of the ML-PACE package.  It is only enabled if
-was built with that package.
+LAMMPS was built with that package.  See the :doc:`Build package
-See the :doc:`Build package <Build_package>` page for more info.
+<Build_package>` page for more info.
 Related commands
 """"""""""""""""
-:doc:`pair_style snap  <pair_snap>`
+:doc:`pair_style snap  <pair_snap>`,
 :doc:`fix pair  <fix_pair>`
 Default
 """""""
-recursive, chunksize = 4096
+recursive, chunksize = 4096,
 .. _Drautz20191:
@ -127,4 +183,8 @@ recursive, chunksize = 4096
 .. _Lysogorskiy20211:
-**(Lysogorskiy)** Lysogorskiy, van der Oord, Bochkarev, Menon, Rinaldi, Hammerschmidt, Mrovec, Thompson, Csanyi, Ortner, Drautz, TBD (2021).
+**(Lysogorskiy)** Lysogorskiy, van der Oord, Bochkarev, Menon, Rinaldi, Hammerschmidt, Mrovec, Thompson, Csanyi, Ortner, Drautz, npj Comp Mat, 7, 97 (2021).
 .. _Lysogorskiy2022:
 **(Lysogorskiy2022)** Lysogorskiy, Bochkarev, Mrovec, Drautz, TBS (2022).
--- a/doc/src/pair_style.rst
+++ b/doc/src/pair_style.rst
@ -313,6 +313,7 @@ accelerated styles exist.
 * :doc:`oxrna2/stk <pair_oxrna2>` -
 * :doc:`oxrna2/xstk <pair_oxrna2>` -
 * :doc:`pace <pair_pace>` - Atomic Cluster Expansion (ACE) machine-learning potential
 * :doc:`pace/extrapolation <pair_pace>` - Atomic Cluster Expansion (ACE) machine-learning potential with extrapolation grades
 * :doc:`peri/eps <pair_peri>` - peridynamic EPS potential
 * :doc:`peri/lps <pair_peri>` - peridynamic LPS potential
 * :doc:`peri/pmb <pair_peri>` - peridynamic PMB potential
--- a/doc/utils/sphinx-config/false_positives.txt
+++ b/doc/utils/sphinx-config/false_positives.txt
@ -78,6 +78,7 @@ Alexey
 ali
 aliceblue
 Allinger
 allocatable
 allocator
 allocators
 allosws
@ -657,6 +658,7 @@ Dcut
 de
 dE
 De
 deallocate
 deallocated
 debye
 Debye
@ -691,6 +693,7 @@ dequidt
 Dequidt
 der
 dereference
 dereferenced
 derekt
 Deresiewicz
 Derjagin
@ -1486,6 +1489,7 @@ interfacial
 interial
 interlayer
 intermolecular
 interoperable
 Interparticle
 interstitials
 intertube
@ -2387,6 +2391,7 @@ Nmols
 nn
 nnodes
 npits
 npj
 nO
 Nocedal
 nocite
@ -3163,6 +3168,7 @@ sfree
 Sg
 Shan
 Shanno
 Shapeev
 shapex
 shapey
 shapez
@ -3619,6 +3625,7 @@ Universite
 unix
 unmaintained
 unoptimized
 unordered
 unpadded
 unphysical
 unphysically
--- a/fortran/README
+++ b/fortran/README
@ -1,9 +1,9 @@
-This directory contains Fortran code which interface LAMMPS as a library
+This directory contains Fortran code that acts as an interface to LAMMPS as a
-and allows the LAMMPS library interface to be invoked from Fortran codes.
+library and allows the LAMMPS library interface to be invoked from Fortran
-It requires a Fortran compiler that supports the Fortran 2003 standard.
+code.  It requires a Fortran compiler that supports the Fortran 2003 standard.
 This interface is based on and supersedes the previous Fortran interfaces
-in the examples/COUPLE/fortran* folders, but is fully supported by the
+in the examples/COUPLE/fortran* folders, but it is fully supported by the
 LAMMPS developers and included in the documentation and unit testing.
 Details on this Fortran interface and how to build programs using it
--- a/fortran/lammps.f90
+++ b/fortran/lammps.f90
--- a/lib/gpu/Makefile.oneapi
+++ b/lib/gpu/Makefile.oneapi
@ -1,5 +1,5 @@
 # /* ----------------------------------------------------------------------   
-#  Generic Linux Makefile for OpenCL
+#  Linux Makefile for Intel oneAPI - Mixed precision
 # ------------------------------------------------------------------------- */
 # which file will be copied to Makefile.lammps
@ -11,11 +11,14 @@ EXTRAMAKE = Makefile.lammps.opencl
 LMP_INC = -DLAMMPS_SMALLBIG
-OCL_INC =
+OCL_INC = -I$(ONEAPI_ROOT)/compiler/latest/linux/include/sycl/
-OCL_CPP = mpiicpc -std=c++11 -xHost -O2 -qopenmp -qopenmp-simd  -DMPICH_IGNORE_CXX_SEEK $(LMP_INC) $(OCL_INC)
+CPP_OPT = -xHost -O2 -qopenmp -qopenmp-simd -fp-model fast=2 -no-prec-div \
-OCL_LINK = -lOpenCL
+          -qoverride-limits
 OCL_CPP = mpiicpc -std=c++11 -diag-disable=10441 -DMPICH_IGNORE_CXX_SEEK \
          $(LMP_INC) $(OCL_INC) $(CPP_OPT)
 OCL_LINK = -L$(ONEAPI_ROOT)/compiler/latest/linux/lib -lOpenCL
 OCL_PREC = -D_SINGLE_DOUBLE
-OCL_TUNE = -DMPI_GERYON -DGERYON_NUMA_FISSION -DUCL_NO_EXIT -fp-model fast=2 -no-prec-div
+OCL_TUNE = -DMPI_GERYON -DCUDA_PROXY -DGERYON_NUMA_FISSION -DUCL_NO_EXIT
 BIN_DIR = ./
 OBJ_DIR = ./
--- a/lib/gpu/README
+++ b/lib/gpu/README
@ -264,6 +264,20 @@ GERYON_KERNEL_DUMP      Dump all compiled OpenCL programs with compiler
                        flags and build logs
 GPU_CAST                Casting performed on GPU, untested recently
 THREE_CONCURRENT        Concurrent 3-body calcs in separate queues, untested
 LAL_SERIALIZE_INIT      Force serialization of initialization and compilation
                        for multiple MPI tasks sharing the same accelerator.
                        Some accelerator API implementations have had issues
                        with temporary file conflicts in the past.
 GERYON_FORCE_SHARED_MAIN_MEM_ON      Should only be used for builds where the
                                     accelerator is guaranteed to share physical
                                     main memory with the host (e.g. integrated
                                     GPU or CPU device). Default behavior is to
                                     auto-detect. Impacts OpenCL only.
 GERYON_FORCE_SHARED_MAIN_MEM_OFF     Should only be used for builds where the
                                     accelerator is guaranteed to have discrete
                                     physical main memory vs the host (discrete
                                     GPU card). Default behavior is to
                                     auto-detect. Impacts OpenCL only.
 ------------------------------------------------------------------------------
--- a/lib/gpu/geryon/ocl_device.h
+++ b/lib/gpu/geryon/ocl_device.h
@ -126,10 +126,13 @@ class UCL_Device {
  /// Return the number of devices that support OpenCL
  inline int num_devices() { return _num_devices; }
-  /// Specify whether profiling (device timers) will be used for the device (yes=true)
+  /// Specify whether profiling (device timers) will be used (yes=true)
  /** No-op for CUDA and HIP **/
-  inline void configure_profiling(const bool profiling_on)
+  inline void configure_profiling(const bool profiling_on) {
-    { _cq_profiling = profiling_on; }
+    #ifndef GERYON_NO_OCL_MARKERS
    _cq_profiling = profiling_on;
    #endif
  }
  /// Set the OpenCL device to the specified device number
  /** A context and default command queue will be created for the device *
@ -176,8 +179,8 @@ class UCL_Device {
 #ifdef CL_VERSION_2_0
    if (_cq_profiling) {
-      cl_queue_properties props[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE,
+      cl_queue_properties props[] = {CL_QUEUE_PROPERTIES,
-                                     0};
+                                     CL_QUEUE_PROFILING_ENABLE, 0};
      _cq.back()=clCreateCommandQueueWithProperties(_context, _cl_device, props,
                                                    &errorv);
    } else {
@ -187,8 +190,8 @@ class UCL_Device {
    }
 #else
    if (_cq_profiling)
-      _cq.back()=clCreateCommandQueue(_context, _cl_device, CL_QUEUE_PROFILING_ENABLE,
+      _cq.back()=clCreateCommandQueue(_context, _cl_device,
-                                      &errorv);
+                                      CL_QUEUE_PROFILING_ENABLE, &errorv);
    else
      _cq.back()=clCreateCommandQueue(_context, _cl_device, 0, &errorv);
 #endif
@ -403,7 +406,11 @@ class UCL_Device {
 // Grabs the properties for all devices
 UCL_Device::UCL_Device() {
  _device=-1;
  #ifndef GERYON_NO_OCL_MARKERS
  _cq_profiling=true;
  #else
  _cq_profiling=false;
  #endif
  // --- Get Number of Platforms
  cl_uint nplatforms;
@ -482,6 +489,7 @@ int UCL_Device::set_platform(int pid) {
  _num_devices = 0;
  for (int i=0; i<num_unpart; i++) {
    cl_uint num_subdevices = 1;
    cl_device_id *subdevice_list = device_list + i;
    #ifdef CL_VERSION_1_2
    cl_device_affinity_domain adomain;
@ -494,25 +502,29 @@ int UCL_Device::set_platform(int pid) {
    props[0]=CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN;
    props[1]=CL_DEVICE_AFFINITY_DOMAIN_NUMA;
    props[2]=0;
    cl_int err = CL_SUCCESS;
    if (adomain & CL_DEVICE_AFFINITY_DOMAIN_NUMA)
-      CL_SAFE_CALL(clCreateSubDevices(device_list[i], props, 0, NULL,
+      err = clCreateSubDevices(device_list[i], props, 0, NULL,
-                                      &num_subdevices));
+                               &num_subdevices);
-    if (num_subdevices > 1) {
+    if (err == CL_SUCCESS && num_subdevices > 1) {
-      cl_device_id *subdevice_list = new cl_device_id[num_subdevices];
+      subdevice_list = new cl_device_id[num_subdevices];
-      CL_SAFE_CALL(clCreateSubDevices(device_list[i], props, num_subdevices,
+      err = clCreateSubDevices(device_list[i], props, num_subdevices,
-                                      subdevice_list, &num_subdevices));
+                               subdevice_list, &num_subdevices);
-      for (cl_uint j=0; j<num_subdevices; j++) {
+      if (err != CL_SUCCESS) {
        _cl_devices.push_back(device_list[i]);
        add_properties(device_list[i]);
        _num_devices++;
      }
        delete[] subdevice_list;
-    } else {
+        num_subdevices = 1;
-      _cl_devices.push_back(device_list[i]);
+        subdevice_list = device_list + i;
-      add_properties(device_list[i]);
+      }
      _num_devices++;
    }
    #endif
    for (cl_uint j=0; j<num_subdevices; j++) {
      _num_devices++;
      _cl_devices.push_back(subdevice_list[j]);
      add_properties(subdevice_list[j]);
    }
    if (num_subdevices > 1) delete[] subdevice_list;
  } // for i
  #endif
@ -686,10 +698,10 @@ void UCL_Device::add_properties(cl_device_id device_list) {
    double arch = static_cast<double>(minor)/10+major;
    if (arch >= 3.0)
      op.has_shuffle_support=true;
    op.shared_main_memory=_shared_mem_device(device_list);
  }
  delete[] buffer2;
  #endif
  op.shared_main_memory=_shared_mem_device(device_list);
  _properties.push_back(op);
 }
--- a/lib/gpu/geryon/ocl_timer.h
+++ b/lib/gpu/geryon/ocl_timer.h
@ -27,11 +27,15 @@
 #include "ocl_macros.h"
 #include "ocl_device.h"
 #ifndef GERYON_NO_OCL_MARKERS
 #ifdef CL_VERSION_1_2
 #define UCL_OCL_MARKER(cq,event) clEnqueueMarkerWithWaitList(cq,0,nullptr,event)
 #else
 #define UCL_OCL_MARKER clEnqueueMarker
 #endif
 #else
 #define UCL_OCL_MARKER(cq,event)
 #endif
 namespace ucl_opencl {
@ -51,8 +55,10 @@ class UCL_Timer {
  inline void clear() {
    if (_initialized) {
      if (has_measured_time) {
        #ifndef GERYON_NO_OCL_MARKERS
        clReleaseEvent(start_event);
        clReleaseEvent(stop_event);
        #endif
        has_measured_time = false;
      }
      CL_DESTRUCT_CALL(clReleaseCommandQueue(_cq));
@ -76,8 +82,10 @@ class UCL_Timer {
  /// Start timing on default command queue
  inline void start() {
    if (has_measured_time) {
      #ifndef GERYON_NO_OCL_MARKERS
      clReleaseEvent(start_event);
      clReleaseEvent(stop_event);
      #endif
      has_measured_time = false;
    }
    UCL_OCL_MARKER(_cq,&start_event);
@ -91,17 +99,26 @@ class UCL_Timer {
  /// Block until the start event has been reached on device
  inline void sync_start() {
    #ifndef GERYON_NO_OCL_MARKERS
    CL_SAFE_CALL(clWaitForEvents(1,&start_event));
    if (has_measured_time) {
      clReleaseEvent(start_event);
      clReleaseEvent(stop_event);
      has_measured_time = false;
    }
-    CL_SAFE_CALL(clWaitForEvents(1,&start_event));
+    #else
    CL_SAFE_CALL(clFinish(_cq));
    has_measured_time = false;
    #endif
  }
  /// Block until the stop event has been reached on device
  inline void sync_stop() {
    #ifndef GERYON_NO_OCL_MARKERS
    CL_SAFE_CALL(clWaitForEvents(1,&stop_event));
    #else
    CL_SAFE_CALL(clFinish(_cq));
    #endif
    has_measured_time = true;
  }
@ -126,6 +143,7 @@ class UCL_Timer {
  /// Return the time (ms) of last start to stop - Forces synchronization
  inline double time() {
    if(!has_measured_time) return 0.0;
    #ifndef GERYON_NO_OCL_MARKERS
    cl_ulong tstart,tend;
    CL_SAFE_CALL(clWaitForEvents(1,&stop_event));
    CL_SAFE_CALL(clGetEventProfilingInfo(stop_event,
@ -138,6 +156,11 @@ class UCL_Timer {
    clReleaseEvent(stop_event);
    has_measured_time = false;
    return (tend-tstart)*1e-6;
    #else
    CL_SAFE_CALL(clFinish(_cq));
    has_measured_time = false;
    return 0.0;
    #endif
  }
  /// Return the time (s) of last start to stop - Forces synchronization
--- a/lib/gpu/lal_beck_ext.cpp
+++ b/lib/gpu/lal_beck_ext.cpp
@ -76,7 +76,7 @@ int beck_gpu_init(const int ntypes, double **cutsq, double **aa,
                        special_lj, inum, nall, max_nbors, maxspecial,
                        cell_size, gpu_split, screen);
-    BLMF.device->gpu_barrier();
+    BLMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_born_coul_long_cs_ext.cpp
+++ b/lib/gpu/lal_born_coul_long_cs_ext.cpp
@ -84,7 +84,7 @@ int bornclcs_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
                            gpu_split, screen, host_cut_ljsq, host_cut_coulsq,
                            host_special_coul, qqrd2e, g_ewald);
-    BCLCSMF.device->gpu_barrier();
+    BCLCSMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_born_coul_long_ext.cpp
+++ b/lib/gpu/lal_born_coul_long_ext.cpp
@ -84,7 +84,7 @@ int borncl_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
                            gpu_split, screen, host_cut_ljsq, host_cut_coulsq,
                            host_special_coul, qqrd2e, g_ewald);
-    BORNCLMF.device->gpu_barrier();
+    BORNCLMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_born_coul_wolf_cs_ext.cpp
+++ b/lib/gpu/lal_born_coul_wolf_cs_ext.cpp
@ -86,7 +86,7 @@ int borncwcs_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
                            host_cut_coulsq, host_special_coul, qqrd2e,
                            alf, e_shift, f_shift);
-    BornCWCST.device->gpu_barrier();
+    BornCWCST.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_born_coul_wolf_ext.cpp
+++ b/lib/gpu/lal_born_coul_wolf_ext.cpp
@ -86,7 +86,7 @@ int borncw_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
                            host_cut_coulsq, host_special_coul, qqrd2e,
                            alf, e_shift, f_shift);
-    BORNCWMF.device->gpu_barrier();
+    BORNCWMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_born_ext.cpp
+++ b/lib/gpu/lal_born_ext.cpp
@ -80,7 +80,7 @@ int born_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
                          offset, special_lj, inum, nall, max_nbors,
                          maxspecial, cell_size, gpu_split, screen);
-    BORNMF.device->gpu_barrier();
+    BORNMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
@ -114,7 +114,7 @@ void born_gpu_reinit(const int ntypes, double **host_rhoinv,
      BORNMF.reinit(ntypes, host_rhoinv, host_born1, host_born2,
                    host_born3, host_a, host_c, host_d, offset);
-    BORNMF.device->gpu_barrier();
+    BORNMF.device->serialize_init();
  }
 }
--- a/lib/gpu/lal_buck_coul_ext.cpp
+++ b/lib/gpu/lal_buck_coul_ext.cpp
@ -83,7 +83,7 @@ int buckc_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
                       host_cut_ljsq, host_cut_coulsq,
                       host_special_coul, qqrd2e);
-    BUCKCMF.device->gpu_barrier();
+    BUCKCMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_buck_coul_long_ext.cpp
+++ b/lib/gpu/lal_buck_coul_long_ext.cpp
@ -82,7 +82,7 @@ int buckcl_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
                        maxspecial, cell_size, gpu_split, screen, host_cut_ljsq,
                        host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
-    BUCKCLMF.device->gpu_barrier();
+    BUCKCLMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_buck_ext.cpp
+++ b/lib/gpu/lal_buck_ext.cpp
@ -77,7 +77,7 @@ int buck_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
                       host_a, host_c, offset, special_lj, inum, nall, max_nbors,
                       maxspecial, cell_size, gpu_split, screen);
-    BUCKMF.device->gpu_barrier();
+    BUCKMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
@ -110,7 +110,7 @@ void buck_gpu_reinit(const int ntypes, double **cutsq, double **host_rhoinv,
      BUCKMF.reinit(ntypes, cutsq, host_rhoinv, host_buck1, host_buck2,
                    host_a, host_c, offset);
-    BUCKMF.device->gpu_barrier();
+    BUCKMF.device->serialize_init();
  }
 }
--- a/lib/gpu/lal_charmm_ext.cpp
+++ b/lib/gpu/lal_charmm_ext.cpp
@ -88,7 +88,7 @@ int crm_gpu_init(const int ntypes, double cut_bothsq, double **host_lj1,
                          qqrd2e, cut_lj_innersq, cut_coul_innersq, denom_lj,
                          denom_coul, epsilon, sigma, mix_arithmetic);
-    CRMMF.device->gpu_barrier();
+    CRMMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_charmm_long_ext.cpp
+++ b/lib/gpu/lal_charmm_long_ext.cpp
@ -86,7 +86,7 @@ int crml_gpu_init(const int ntypes, double cut_bothsq, double **host_lj1,
                          qqrd2e, g_ewald,  cut_lj_innersq, denom_lj, epsilon,
                          sigma, mix_arithmetic);
-    CRMLMF.device->gpu_barrier();
+    CRMLMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_colloid_ext.cpp
+++ b/lib/gpu/lal_colloid_ext.cpp
@ -83,7 +83,7 @@ int colloid_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
                          inum, nall, max_nbors, maxspecial,
                          cell_size, gpu_split, screen);
-    COLLMF.device->gpu_barrier();
+    COLLMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_coul_debye_ext.cpp
+++ b/lib/gpu/lal_coul_debye_ext.cpp
@ -74,7 +74,7 @@ int cdebye_gpu_init(const int ntypes, double **host_scale, double **cutsq,
      init_ok=CDEMF.init(ntypes, host_scale, cutsq, host_special_coul, inum, nall, max_nbors,
                         maxspecial, cell_size, gpu_split, screen, qqrd2e, kappa);
-    CDEMF.device->gpu_barrier();
+    CDEMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
@ -103,7 +103,7 @@ void cdebye_gpu_reinit(const int ntypes, double **host_scale) {
    if (gpu_rank==i && world_me!=0)
      CDEMF.reinit(ntypes, host_scale);
-    CDEMF.device->gpu_barrier();
+    CDEMF.device->serialize_init();
  }
 }
--- a/lib/gpu/lal_coul_dsf_ext.cpp
+++ b/lib/gpu/lal_coul_dsf_ext.cpp
@ -77,7 +77,7 @@ int cdsf_gpu_init(const int ntypes, const int inum, const int nall,
                        gpu_split, screen, host_cut_coulsq, host_special_coul,
                        qqrd2e, e_shift, f_shift, alpha);
-    CDMF.device->gpu_barrier();
+    CDMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_coul_ext.cpp
+++ b/lib/gpu/lal_coul_ext.cpp
@ -74,7 +74,7 @@ int coul_gpu_init(const int ntypes, double **host_scale,
      init_ok=COULMF.init(ntypes, host_scale, cutsq, special_coul, inum, nall, max_nbors,
                          maxspecial, cell_size, gpu_split, screen, qqrd2e);
-    COULMF.device->gpu_barrier();
+    COULMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
@ -103,7 +103,7 @@ void coul_gpu_reinit(const int ntypes, double **host_scale) {
    if (gpu_rank==i && world_me!=0)
      COULMF.reinit(ntypes, host_scale);
-    COULMF.device->gpu_barrier();
+    COULMF.device->serialize_init();
  }
 }
--- a/lib/gpu/lal_coul_long_cs_ext.cpp
+++ b/lib/gpu/lal_coul_long_cs_ext.cpp
@ -76,7 +76,7 @@ int clcs_gpu_init(const int ntypes, double **host_scale,
                        cell_size, gpu_split, screen, host_cut_coulsq,
                        host_special_coul, qqrd2e, g_ewald);
-    CLCSMF.device->gpu_barrier();
+    CLCSMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
@ -105,7 +105,7 @@ void clcs_gpu_reinit(const int ntypes, double **host_scale) {
    if (gpu_rank==i && world_me!=0)
      CLCSMF.reinit(ntypes, host_scale);
-    CLCSMF.device->gpu_barrier();
+    CLCSMF.device->serialize_init();
  }
 }
--- a/lib/gpu/lal_coul_long_ext.cpp
+++ b/lib/gpu/lal_coul_long_ext.cpp
@ -76,7 +76,7 @@ int cl_gpu_init(const int ntypes, double **host_scale,
                        cell_size, gpu_split, screen, host_cut_coulsq,
                        host_special_coul, qqrd2e, g_ewald);
-    CLMF.device->gpu_barrier();
+    CLMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
@ -105,7 +105,7 @@ void cl_gpu_reinit(const int ntypes, double **host_scale) {
    if (gpu_rank==i && world_me!=0)
      CLMF.reinit(ntypes, host_scale);
-    CLMF.device->gpu_barrier();
+    CLMF.device->serialize_init();
  }
 }
--- a/lib/gpu/lal_device.cpp
+++ b/lib/gpu/lal_device.cpp
@ -328,7 +328,7 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int ngpu,
  for (int i=0; i<_procs_per_gpu; i++) {
    if (_gpu_rank==i)
      flag=compile_kernels();
-    gpu_barrier();
+    serialize_init();
  }
  // check if double precision support is available
@ -609,6 +609,10 @@ void DeviceT::init_message(FILE *screen, const char *name,
    int last=last_gpu+1;
    if (last>gpu->num_devices())
      last=gpu->num_devices();
    if (gpu->num_platforms()>1) {
      std::string pname=gpu->platform_name();
      fprintf(screen,"Platform: %s\n",pname.c_str());
    }
    for (int i=first_gpu; i<last; i++) {
      std::string sname;
      if (i==first_gpu)
--- a/lib/gpu/lal_device.h
+++ b/lib/gpu/lal_device.h
@ -217,6 +217,12 @@ class Device {
  inline int gpu_rank() const { return _gpu_rank; }
  /// MPI Barrier for gpu
  inline void gpu_barrier() { MPI_Barrier(_comm_gpu); }
  /// Serialize GPU initialization and JIT for unsafe platforms
  inline void serialize_init() {
    #ifdef LAL_SERIALIZE_INIT
    gpu_barrier();
    #endif
  }
  /// Return the 'mode' for acceleration: GPU_FORCE, GPU_NEIGH or GPU_HYB_NEIGH
  inline int gpu_mode() const { return _gpu_mode; }
  /// Index of first device used by a node
--- a/lib/gpu/lal_dipole_lj_ext.cpp
+++ b/lib/gpu/lal_dipole_lj_ext.cpp
@ -80,7 +80,7 @@ int dpl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
                         cell_size, gpu_split, screen, host_cut_ljsq,
                         host_cut_coulsq, host_special_coul, qqrd2e);
-    DPLMF.device->gpu_barrier();
+    DPLMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_dipole_lj_sf_ext.cpp
+++ b/lib/gpu/lal_dipole_lj_sf_ext.cpp
@ -80,7 +80,7 @@ int dplsf_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
                           cell_size, gpu_split, screen, host_cut_ljsq,
                           host_cut_coulsq, host_special_coul, qqrd2e);
-    DPLSFMF.device->gpu_barrier();
+    DPLSFMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_dipole_long_lj_ext.cpp
+++ b/lib/gpu/lal_dipole_long_lj_ext.cpp
@ -81,7 +81,7 @@ int dplj_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
                         cell_size, gpu_split, screen, host_cut_ljsq,
                         host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
-    DPLJMF.device->gpu_barrier();
+    DPLJMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_dpd_ext.cpp
+++ b/lib/gpu/lal_dpd_ext.cpp
@ -76,7 +76,7 @@ int dpd_gpu_init(const int ntypes, double **cutsq, double **host_a0,
                         host_cut, special_lj, false, inum, nall, max_nbors,
                         maxspecial, cell_size, gpu_split, screen);
-    DPDMF.device->gpu_barrier();
+    DPDMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_dpd_tstat_ext.cpp
+++ b/lib/gpu/lal_dpd_tstat_ext.cpp
@ -76,7 +76,7 @@ int dpd_tstat_gpu_init(const int ntypes, double **cutsq, double **host_a0,
                         host_cut, special_lj, true, inum, nall, 300,
                         maxspecial, cell_size, gpu_split, screen);
-    DPDTMF.device->gpu_barrier();
+    DPDTMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_eam_alloy_ext.cpp
+++ b/lib/gpu/lal_eam_alloy_ext.cpp
@ -90,7 +90,7 @@ int eam_alloy_gpu_init(const int ntypes, double host_cutforcesq,
                         nz2r, nfrho, nr, nlocal, nall, max_nbors, maxspecial,
                         cell_size, gpu_split, screen);
-    EAMALMF.device->gpu_barrier();
+    EAMALMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_eam_ext.cpp
+++ b/lib/gpu/lal_eam_ext.cpp
@ -90,7 +90,7 @@ int eam_gpu_init(const int ntypes, double host_cutforcesq,
                         nz2r, nfrho, nr, nlocal, nall, max_nbors, maxspecial,
                         cell_size, gpu_split, screen);
-    EAMMF.device->gpu_barrier();
+    EAMMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_eam_fs_ext.cpp
+++ b/lib/gpu/lal_eam_fs_ext.cpp
@ -90,7 +90,7 @@ int eam_fs_gpu_init(const int ntypes, double host_cutforcesq,
                         nz2r, nfrho, nr, nlocal, nall, max_nbors, maxspecial,
                         cell_size, gpu_split, screen);
-    EAMFSMF.device->gpu_barrier();
+    EAMFSMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_gauss_ext.cpp
+++ b/lib/gpu/lal_gauss_ext.cpp
@ -76,7 +76,7 @@ int gauss_gpu_init(const int ntypes, double **cutsq, double **host_a,
                        offset, special_lj, inum, nall, max_nbors, maxspecial,
                        cell_size, gpu_split, screen);
-    GLMF.device->gpu_barrier();
+    GLMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
@ -106,7 +106,7 @@ void gauss_gpu_reinit(const int ntypes, double **cutsq, double **host_a,
    if (gpu_rank==i && world_me!=0)
      GLMF.reinit(ntypes, cutsq, host_a, host_b, offset);
-    GLMF.device->gpu_barrier();
+    GLMF.device->serialize_init();
  }
 }
--- a/lib/gpu/lal_gayberne_ext.cpp
+++ b/lib/gpu/lal_gayberne_ext.cpp
@ -83,7 +83,7 @@ int gb_gpu_init(const int ntypes, const double gamma,
                        host_lj3, host_lj4, offset, special_lj,  inum, nall,
                        max_nbors, maxspecial, cell_size, gpu_split,  screen);
-    GBMF.device->gpu_barrier();
+    GBMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_lj96_ext.cpp
+++ b/lib/gpu/lal_lj96_ext.cpp
@ -76,7 +76,7 @@ int lj96_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
                          offset, special_lj, inum,  nall, max_nbors, maxspecial,
                          cell_size, gpu_split, screen);
-    LJ96MF.device->gpu_barrier();
+    LJ96MF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_lj_class2_long_ext.cpp
+++ b/lib/gpu/lal_lj_class2_long_ext.cpp
@ -81,7 +81,7 @@ int c2cl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
                          cell_size, gpu_split, screen, host_cut_ljsq,
                          host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
-    C2CLMF.device->gpu_barrier();
+    C2CLMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_lj_coul_debye_ext.cpp
+++ b/lib/gpu/lal_lj_coul_debye_ext.cpp
@ -81,7 +81,7 @@ int ljcd_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
                          cell_size, gpu_split, screen, host_cut_ljsq,
                          host_cut_coulsq, host_special_coul, qqrd2e, kappa);
-    LJCDMF.device->gpu_barrier();
+    LJCDMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_lj_coul_ext.cpp
+++ b/lib/gpu/lal_lj_coul_ext.cpp
@ -80,7 +80,7 @@ int ljc_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
                         cell_size, gpu_split, screen, host_cut_ljsq,
                         host_cut_coulsq, host_special_coul, qqrd2e);
-    LJCMF.device->gpu_barrier();
+    LJCMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_lj_coul_long_ext.cpp
+++ b/lib/gpu/lal_lj_coul_long_ext.cpp
@ -81,7 +81,7 @@ int ljcl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
                          cell_size, gpu_split, screen, host_cut_ljsq,
                          host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
-    LJCLMF.device->gpu_barrier();
+    LJCLMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
@ -112,7 +112,7 @@ void ljcl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
    if (gpu_rank==i && world_me!=0)
      LJCLMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
                    offset, host_cut_ljsq);
-    LJCLMF.device->gpu_barrier();
+    LJCLMF.device->serialize_init();
  }
 }
--- a/lib/gpu/lal_lj_coul_msm_ext.cpp
+++ b/lib/gpu/lal_lj_coul_msm_ext.cpp
@ -83,7 +83,7 @@ int ljcm_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
                          cell_size, gpu_split, screen, host_cut_ljsq,
                          host_cut_coulsq, host_special_coul, order, qqrd2e);
-    LJCMLMF.device->gpu_barrier();
+    LJCMLMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_lj_cubic_ext.cpp
+++ b/lib/gpu/lal_lj_cubic_ext.cpp
@ -80,7 +80,7 @@ int ljcb_gpu_init(const int ntypes, double **cutsq, double **cut_inner_sq,
                              special_lj, inum, nall, max_nbors, maxspecial,
                              cell_size, gpu_split, screen);
-    LJCubicLMF.device->gpu_barrier();
+    LJCubicLMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_lj_dsf_ext.cpp
+++ b/lib/gpu/lal_lj_dsf_ext.cpp
@ -84,7 +84,7 @@ int ljd_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
                         host_cut_coulsq, host_special_coul, qqrd2e, e_shift,
                         f_shift, alpha);
-    LJDMF.device->gpu_barrier();
+    LJDMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_lj_expand_coul_long_ext.cpp
+++ b/lib/gpu/lal_lj_expand_coul_long_ext.cpp
@ -81,7 +81,7 @@ int ljecl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
                          cell_size, gpu_split, screen, host_cut_ljsq,
                          host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
-    LJECLMF.device->gpu_barrier();
+    LJECLMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
@ -112,7 +112,7 @@ void ljecl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
    if (gpu_rank==i && world_me!=0)
      LJECLMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
                    offset, shift, host_cut_ljsq);
-    LJECLMF.device->gpu_barrier();
+    LJECLMF.device->serialize_init();
  }
 }
--- a/lib/gpu/lal_lj_expand_ext.cpp
+++ b/lib/gpu/lal_lj_expand_ext.cpp
@ -108,7 +108,7 @@ void lje_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
    if (gpu_rank==i && world_me!=0)
      LJEMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
                   offset, shift);
-    LJEMF.device->gpu_barrier();
+    LJEMF.device->serialize_init();
  }
 }
--- a/lib/gpu/lal_lj_ext.cpp
+++ b/lib/gpu/lal_lj_ext.cpp
@ -76,7 +76,7 @@ int ljl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
                         offset, special_lj, inum, nall, max_nbors, maxspecial,
                         cell_size, gpu_split, screen);
-    LJLMF.device->gpu_barrier();
+    LJLMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
@ -105,7 +105,7 @@ void ljl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
  for (int i=0; i<procs_per_gpu; i++) {
    if (gpu_rank==i && world_me!=0)
      LJLMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, offset);
-    LJLMF.device->gpu_barrier();
+    LJLMF.device->serialize_init();
  }
 }
--- a/lib/gpu/lal_lj_gromacs_ext.cpp
+++ b/lib/gpu/lal_lj_gromacs_ext.cpp
@ -81,7 +81,7 @@ int ljgrm_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
                           gpu_split, screen, host_ljsw1, host_ljsw2, host_ljsw3,
                           host_ljsw4, host_ljsw5, cut_inner, cut_inner_sq);
-    LJGRMMF.device->gpu_barrier();
+    LJGRMMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_lj_smooth_ext.cpp
+++ b/lib/gpu/lal_lj_smooth_ext.cpp
@ -80,7 +80,7 @@ int ljsmt_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
                         cell_size, gpu_split, screen, host_ljsw0, host_ljsw1, host_ljsw2, host_ljsw3,
                         host_ljsw4, cut_inner, cut_inner_sq);
-    LJSMTMF.device->gpu_barrier();
+    LJSMTMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
@ -110,7 +110,7 @@ void ljsmt_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
  for (int i=0; i<procs_per_gpu; i++) {
    if (gpu_rank==i && world_me!=0)
      LJSMTMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, offset, host_ljsw0, host_ljsw1, host_ljsw2, host_ljsw3, host_ljsw4, cut_inner, cut_inner_sq);
-    LJSMTMF.device->gpu_barrier();
+    LJSMTMF.device->serialize_init();
  }
 }
--- a/lib/gpu/lal_lj_spica_ext.cpp
+++ b/lib/gpu/lal_lj_spica_ext.cpp
@ -77,7 +77,7 @@ int spica_gpu_init(const int ntypes, double **cutsq, int **cg_types,
                         host_lj4, offset, special_lj, inum, nall, max_nbors,
                         maxspecial, cell_size, gpu_split, screen);
-    CMMMF.device->gpu_barrier();
+    CMMMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_lj_spica_long_ext.cpp
+++ b/lib/gpu/lal_lj_spica_long_ext.cpp
@ -81,7 +81,7 @@ int spical_gpu_init(const int ntypes, double **cutsq, int **cg_type,
                          maxspecial, cell_size, gpu_split, screen,
                          host_cut_ljsq, host_cut_coulsq, host_special_coul,
                          qqrd2e, g_ewald);
-    CMMLMF.device->gpu_barrier();
+    CMMLMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_lj_tip4p_long_ext.cpp
+++ b/lib/gpu/lal_lj_tip4p_long_ext.cpp
@ -89,7 +89,7 @@ int ljtip4p_long_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
          host_special_coul, qqrd2e,
          g_ewald, map_size, max_same);
-    LJTIP4PLMF.device->gpu_barrier();
+    LJTIP4PLMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_mie_ext.cpp
+++ b/lib/gpu/lal_mie_ext.cpp
@ -80,7 +80,7 @@ int mie_gpu_init(const int ntypes, double **cutsq, double **host_mie1,
                        offset, special_lj, inum, nall, max_nbors, maxspecial,
                        cell_size, gpu_split, screen);
-    MLMF.device->gpu_barrier();
+    MLMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_morse_ext.cpp
+++ b/lib/gpu/lal_morse_ext.cpp
@ -77,7 +77,7 @@ int mor_gpu_init(const int ntypes, double **cutsq,
                         offset, special_lj, inum, nall, max_nbors, maxspecial,
                         cell_size, gpu_split, screen);
-    MORMF.device->gpu_barrier();
+    MORMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_pppm_ext.cpp
+++ b/lib/gpu/lal_pppm_ext.cpp
@ -81,7 +81,7 @@ grdtyp * pppm_gpu_init(memtyp &pppm, const int nlocal, const int nall,
                           vd_brick,slab_volfactor,nx_pppm,ny_pppm,nz_pppm,
                           split,success);
-    pppm.device->gpu_barrier();
+    pppm.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_re_squared_ext.cpp
+++ b/lib/gpu/lal_re_squared_ext.cpp
@ -80,7 +80,7 @@ int re_gpu_init(const int ntypes, double **shape, double **well, double **cutsq,
                        host_lj4, offset, special_lj,  inum, nall,
                        max_nbors, maxspecial, cell_size, gpu_split, screen);
-    REMF.device->gpu_barrier();
+    REMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_soft_ext.cpp
+++ b/lib/gpu/lal_soft_ext.cpp
@ -76,7 +76,7 @@ int soft_gpu_init(const int ntypes, double **cutsq, double **host_prefactor,
                        special_lj, inum, nall, max_nbors, maxspecial,
                        cell_size, gpu_split, screen);
-    SLMF.device->gpu_barrier();
+    SLMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
@ -106,7 +106,7 @@ void soft_gpu_reinit(const int ntypes, double **cutsq, double **host_prefactor,
    if (gpu_rank==i && world_me!=0)
      SLMF.reinit(ntypes, cutsq, host_prefactor, host_cut);
-    SLMF.device->gpu_barrier();
+    SLMF.device->serialize_init();
  }
 }
--- a/lib/gpu/lal_sw_ext.cpp
+++ b/lib/gpu/lal_sw_ext.cpp
@ -84,7 +84,7 @@ int sw_gpu_init(const int ntypes, const int inum, const int nall,
                        sigma_gamma, c1, c2, c3, c4, c5, c6, lambda_epsilon,
                        costheta, map, e2param);
-    SWMF.device->gpu_barrier();
+    SWMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_table_ext.cpp
+++ b/lib/gpu/lal_table_ext.cpp
@ -76,7 +76,7 @@ int table_gpu_init(const int ntypes, double **cutsq, double ***table_coeffs,
                      special_lj, inum, nall, max_nbors, maxspecial, cell_size,
                      gpu_split, screen, tabstyle, ntables, tablength);
-    TBMF.device->gpu_barrier();
+    TBMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_tersoff_ext.cpp
+++ b/lib/gpu/lal_tersoff_ext.cpp
@ -91,7 +91,7 @@ int tersoff_gpu_init(const int ntypes, const int inum, const int nall, const int
                        ts_c1, ts_c2, ts_c3, ts_c4, ts_c, ts_d, ts_h,
                        ts_gamma, ts_beta, ts_powern, ts_cutsq);
-    TSMF.device->gpu_barrier();
+    TSMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_tersoff_mod_ext.cpp
+++ b/lib/gpu/lal_tersoff_mod_ext.cpp
@ -91,7 +91,7 @@ int tersoff_mod_gpu_init(const int ntypes, const int inum, const int nall,
                        ts_c3, ts_c4, ts_c5, ts_h, ts_beta, ts_powern,
                        ts_powern_del, ts_ca1, ts_cutsq);
-    TSMMF.device->gpu_barrier();
+    TSMMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_tersoff_zbl_ext.cpp
+++ b/lib/gpu/lal_tersoff_zbl_ext.cpp
@ -102,7 +102,7 @@ int tersoff_zbl_gpu_init(const int ntypes, const int inum, const int nall,
                        ts_ZBLcut, ts_ZBLexpscale, global_e, global_a_0,
                        global_epsilon_0, ts_cutsq);
-    TSZMF.device->gpu_barrier();
+    TSZMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_ufm_ext.cpp
+++ b/lib/gpu/lal_ufm_ext.cpp
@ -78,7 +78,7 @@ int ufml_gpu_init(const int ntypes, double **cutsq, double **host_uf1,
                         offset, special_lj, inum, nall, max_nbors, maxspecial,
                         cell_size, gpu_split, screen);
-    UFMLMF.device->gpu_barrier();
+    UFMLMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
@ -106,7 +106,7 @@ void ufml_gpu_reinit(const int ntypes, double **cutsq, double **host_uf1,
  for (int i=0; i<procs_per_gpu; i++) {
    if (gpu_rank==i && world_me!=0)
      UFMLMF.reinit(ntypes, cutsq, host_uf1, host_uf2, host_uf3, offset);
-    UFMLMF.device->gpu_barrier();
+    UFMLMF.device->serialize_init();
  }
 }
--- a/lib/gpu/lal_vashishta_ext.cpp
+++ b/lib/gpu/lal_vashishta_ext.cpp
@ -89,7 +89,7 @@ int vashishta_gpu_init(const int ntypes, const int inum, const int nall, const i
                        lam4inv, zizj, mbigd, dvrc, big6w, heta, bigh, bigw,
                        c0, costheta, bigb, big2b, bigc);
-    VashishtaMF.device->gpu_barrier();
+    VashishtaMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_yukawa_colloid_ext.cpp
+++ b/lib/gpu/lal_yukawa_colloid_ext.cpp
@ -76,7 +76,7 @@ int ykcolloid_gpu_init(const int ntypes, double **cutsq, double **host_a,
                            inum, nall, max_nbors, maxspecial, cell_size, gpu_split,
                            screen, kappa);
-    YKCOLLMF.device->gpu_barrier();
+    YKCOLLMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_yukawa_ext.cpp
+++ b/lib/gpu/lal_yukawa_ext.cpp
@ -76,7 +76,7 @@ int yukawa_gpu_init(const int ntypes, double **cutsq, double kappa,
                      inum, nall, max_nbors, maxspecial, cell_size,
                      gpu_split, screen);
-    YKMF.device->gpu_barrier();
+    YKMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/gpu/lal_zbl_ext.cpp
+++ b/lib/gpu/lal_zbl_ext.cpp
@ -79,7 +79,7 @@ int zbl_gpu_init(const int ntypes, double **cutsq, double **host_sw1,
                         cut_globalsq, cut_innersq, cut_inner,
                         inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen);
-    ZBLMF.device->gpu_barrier();
+    ZBLMF.device->serialize_init();
    if (message)
      fprintf(screen,"Done.\n");
  }
--- a/lib/pace/Install.py
+++ b/lib/pace/Install.py
@ -1,4 +1,4 @@
-# TODO#!/usr/bin/env python
+#!/usr/bin/env python
 """
 Install.py tool to download, compile, and setup the pace library
@ -6,7 +6,10 @@ used to automate the steps described in the README file in this dir
 """
 from __future__ import print_function
-import sys, subprocess
+
 import shutil
 import subprocess
 import sys
 from argparse import ArgumentParser
 sys.path.append('..')
@ -15,23 +18,16 @@ from install_helpers import fullpath, geturl, checkmd5sum
 # settings
 thisdir = fullpath('.')
-version = 'v.2021.10.25.fix2'
+version ='v.2022.09.27.fix10Oct'
 # known checksums for different PACE versions. used to validate the download.
 checksums = { \
-        'v.2021.2.3.upd2' : '8fd1162724d349b930e474927197f20d',
+    'v.2022.09.27.fix10Oct': '766cebcc0e5c4b8430c2f3cd202d9905'
-        'v.2021.4.9'      : '4db54962fbd6adcf8c18d46e1798ceb5',
+}
        'v.2021.9.28'     : 'f98363bb98adc7295ea63974738c2a1b',
        'v.2021.10.25'    : 'a2ac3315c41a1a4a5c912bcb1bc9c5cc',
        'v.2021.10.25.fix': 'e0572de57039d4afedefb25707b6ceae',
        'v.2021.10.25.fix2': '32394d799bc282bb57696c78c456e64f'
        }
 parser = ArgumentParser(prog='Install.py',
                        description="LAMMPS library build wrapper script")
 # help message
 HELP = """
@ -55,6 +51,8 @@ parser.add_argument("-v", "--version", default=version, choices=checksums.keys()
                    help="set version of PACE library to download and build (default: %s)" % version)
 parser.add_argument("-vv", "--verbose", action="store_true",
                    help="be more verbose about is happening while this script runs")
 parser.add_argument("-l", "--local", default=None,
                    help="use local version of PACE library build")
 args = parser.parse_args()
@ -67,21 +65,20 @@ buildflag = args.build
 verboseflag = args.verbose
 version = args.version
-
+local = args.local
 archive_extension = "tar.gz"
 url = "https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/%s.%s" % (version, archive_extension)
-unarchived_folder_name = "lammps-user-pace-%s"%(version)
+unarchived_folder_name = "lammps-user-pace-%s" % (version)
 # download PACE tarball, unpack, build PACE
 if buildflag:
-
+    if not local:
        # download entire tarball
        print("Downloading pace tarball ...")
        archive_filename = "%s.%s" % (version, archive_extension)
        download_filename = "%s/%s" % (thisdir, archive_filename)
-  print("Downloading from ",url," to ",download_filename, end=" ")
+        print("Downloading from ", url, " to ", download_filename, end=" ")
        geturl(url, download_filename)
        print(" done")
@ -91,9 +88,18 @@ if buildflag:
                sys.exit("Checksum for pace library does not match")
        print("Unpacking pace tarball ...")
-  src_folder = thisdir+"/src"
+        src_folder = thisdir + "/src"
-  cmd = 'cd "%s"; rm -rf "%s"; tar -xvf %s; mv %s %s' % (thisdir, src_folder, archive_filename, unarchived_folder_name, src_folder)
+        cmd = 'cd "%s"; rm -rf "%s"; tar -xvf %s; mv %s %s' % (
            thisdir, src_folder, archive_filename, unarchived_folder_name, src_folder)
        subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True)
    else:
        # copy from local version of library PACE
        print("Copy pace from ", local)
        src_folder = thisdir + "/src"
        shutil.copytree(local, src_folder,
                        # ignore=lambda (s1,s2): ('.git' in s1 or '.git' in s2),
                        dirs_exist_ok=True)
    # build
    print("Building libpace ...")
@ -102,8 +108,11 @@ if buildflag:
    if verboseflag:
        print(txt.decode("UTF-8"))
-#   remove source files
+    #   remove source files
    print("Removing pace build files and archive ...")
-  cmd = 'rm %s; make clean-build' % (download_filename)
+    cmd = 'make clean-build'
    if not local:
        cmd = ('rm %s;' % (download_filename))+cmd
    subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True)
--- a/lib/pace/Makefile
+++ b/lib/pace/Makefile
@ -5,8 +5,14 @@ SHELL = /bin/sh
 YAML_CPP_PATH = src/yaml-cpp
 YAML_CPP_INC = $(YAML_CPP_PATH)/include
-SRC_FILES = $(wildcard src/ML-PACE/*.cpp)
+WIGNER_CPP_INC = src/wigner-cpp/include/wigner
-SRC = $(filter-out src/ML-PACE/pair_pace.cpp, $(SRC_FILES))
+
 CNPY_CPP_PATH = src/cnpy
 CNPY_CPP_INC = $(CNPY_CPP_PATH)
 CNPY_SRC_FILES =  $(CNPY_CPP_PATH)/cnpy.cpp
 SRC_FILES = $(wildcard src/ML-PACE/ace/*.cpp) $(wildcard src/ML-PACE/ace-evaluator/*.cpp)
 SRC = $(filter-out src/ML-PACE/pair_pace.cpp, $(SRC_FILES)) $(CNPY_SRC_FILES)
 # ------ DEFINITIONS ------
@ -15,7 +21,7 @@ OBJ =   $(SRC:.cpp=.o)
 # ------ SETTINGS ------
-CXXFLAGS = -O3 -fPIC -Isrc/ML-PACE -I$(YAML_CPP_INC)
+CXXFLAGS = -O3 -fPIC -Isrc/ML-PACE/ace -Isrc/ML-PACE/ace-evaluator -I$(YAML_CPP_INC) -I$(WIGNER_CPP_INC) -I$(CNPY_CPP_INC) -DEXTRA_C_PROJECTIONS
 ARCHIVE =	ar
 ARCHFLAG =	-rc
--- a/lib/pace/Makefile.lammps
+++ b/lib/pace/Makefile.lammps
@ -1,3 +1,3 @@
-pace_SYSINC =-I../../lib/pace/src/ML-PACE -I../../lib/pace/src/yaml-cpp/include
+pace_SYSINC =-I../../lib/pace/src/ML-PACE/ace -I../../lib/pace/src/ML-PACE/ace-evaluator -I../../lib/pace/src/yaml-cpp/include -I../../lib/pace/src/wigner-cpp/include/wigner -DEXTRA_C_PROJECTIONS
 pace_SYSLIB = -L../../lib/pace/ -lpace -L../../lib/pace/src/yaml-cpp/ -lyaml-cpp
 pace_SYSPATH =
--- a/python/lammps/constants.py
+++ b/python/lammps/constants.py
@ -22,7 +22,8 @@ LAMMPS_INT64       = 4
 LAMMPS_INT64_2D    = 5
 LAMMPS_STRING      = 6
-# these must be kept in sync with the enums in library.h
+# these must be kept in sync with the enums in src/library.h, tools/swig/lammps.i
 # and the constants in fortran/lammps.f90
 LMP_STYLE_GLOBAL   = 0
 LMP_STYLE_ATOM     = 1
 LMP_STYLE_LOCAL    = 2
@ -42,6 +43,8 @@ LMP_ERROR_UNIVERSE = 8
 LMP_VAR_EQUAL      = 0
 LMP_VAR_ATOM       = 1
 LMP_VAR_VECTOR     = 2
 LMP_VAR_STRING     = 3
 # -------------------------------------------------------------------------
--- a/python/lammps/core.py
+++ b/python/lammps/core.py
@ -301,6 +301,8 @@ class lammps(object):
    self.lib.lammps_extract_fix.argtypes = [c_void_p, c_char_p, c_int, c_int, c_int, c_int]
    self.lib.lammps_extract_variable.argtypes = [c_void_p, c_char_p, c_char_p]
    self.lib.lammps_extract_variable_datatype.argtypes = [c_void_p, c_char_p]
    self.lib.lammps_extract_variable_datatype.restype = c_int
    self.lib.lammps_fix_external_get_force.argtypes = [c_void_p, c_char_p]
    self.lib.lammps_fix_external_get_force.restype = POINTER(POINTER(c_double))
@ -1083,21 +1085,23 @@ class lammps(object):
  # for vector, must copy nlocal returned values to local c_double vector
  # memory was allocated by library interface function
-  def extract_variable(self, name, group=None, vartype=LMP_VAR_EQUAL):
+  def extract_variable(self, name, group=None, vartype=None):
    """ Evaluate a LAMMPS variable and return its data
    This function is a wrapper around the function
-    :cpp:func:`lammps_extract_variable` of the C-library interface,
+    :cpp:func:`lammps_extract_variable` of the C library interface,
    evaluates variable name and returns a copy of the computed data.
    The memory temporarily allocated by the C-interface is deleted
    after the data is copied to a Python variable or list.
    The variable must be either an equal-style (or equivalent)
-    variable or an atom-style variable. The variable type has to
+    variable or an atom-style variable. The variable type can be
-    provided as ``vartype`` parameter which may be one of two constants:
+    provided as the ``vartype`` parameter, which may be one of several
-    ``LMP_VAR_EQUAL`` or ``LMP_VAR_ATOM``; it defaults to
+    constants: ``LMP_VAR_EQUAL``, ``LMP_VAR_ATOM``, ``LMP_VAR_VECTOR``,
-    equal-style variables.
+    or ``LMP_VAR_STRING``. If omitted or ``None``, LAMMPS will determine its
-    The group parameter is only used for atom-style variables and
+    value for you based on a call to
-    defaults to the group "all" if set to ``None``, which is the default.
+    :cpp:func:`lammps_extract_variable_datatype` from the C library interface.
    The group parameter is only used for atom-style variables and defaults to
    the group "all".
    :param name: name of the variable to execute
    :type name: string
@ -1111,6 +1115,8 @@ class lammps(object):
    if name: name = name.encode()
    else: return None
    if group: group = group.encode()
    if vartype is None :
      vartype = self.lib.lammps_extract_variable_datatype(self.lmp, name)
    if vartype == LMP_VAR_EQUAL:
      self.lib.lammps_extract_variable.restype = POINTER(c_double)
      with ExceptionCheck(self):
@ -1130,6 +1136,31 @@ class lammps(object):
        self.lib.lammps_free(ptr)
      else: return None
      return result
    elif vartype == LMP_VAR_VECTOR :
      nvector = 0
      self.lib.lammps_extract_variable.restype = POINTER(c_int)
      ptr = self.lib.lammps_extract_variable(self.lmp,name,
              'LMP_SIZE_VECTOR'.encode())
      if ptr :
        nvector = ptr[0]
        self.lib.lammps_free(ptr)
      else :
        return None
      self.lib.lammps_extract_variable.restype = POINTER(c_double)
      result = (c_double*nvector)()
      values = self.lib.lammps_extract_variable(self.lmp,name,group)
      if values :
        for i in range(nvector) :
          result[i] = values[i]
        # do NOT free the values pointer (points to internal vector data)
        return result
      else :
        return None
    elif vartype == LMP_VAR_STRING :
      self.lib.lammps_extract_variable.restype = c_char_p
      with ExceptionCheck(self) :
        ptr = self.lib.lammps_extract_variable(self.lmp, name, group)
        return ptr.decode('utf-8')
    return None
  # -------------------------------------------------------------------------
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@ -1,3 +1,3 @@
 [build-system]
-requires = [ "setuptools>=42", "wheel" ]
+requires = [ "setuptools>=42", "wheel", "build" ]
 build-backend = "setuptools.build_meta"
--- a/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi
+++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi
@ -6,7 +6,7 @@ SHELL = /bin/sh
 # compiler/linker settings
 # specify flags and libraries needed for your compiler
-CC =		mpiicpc -std=c++11
+CC =		mpiicpc -std=c++11 -diag-disable=10441 -diag-disable=2196
 OPTFLAGS =      -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \
                -qopt-zmm-usage=high
 CCFLAGS =	-qopenmp -qno-offload -ansi-alias -restrict \
@ -15,7 +15,7 @@ CCFLAGS =	-qopenmp -qno-offload -ansi-alias -restrict \
 SHFLAGS =	-fPIC
 DEPFLAGS =	-M
-LINK =		mpiicpc -std=c++11
+LINK =		mpiicpc -std=c++11 -diag-disable=10441 -diag-disable=2196
 LINKFLAGS =	-qopenmp $(OPTFLAGS) -L$(MKLROOT)/lib/intel64/
 LIB =           -ltbbmalloc -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core
 SIZE =		size
--- a/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich
+++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_mpich
@ -6,7 +6,7 @@ SHELL = /bin/sh
 # compiler/linker settings
 # specify flags and libraries needed for your compiler
-CC =		mpicxx -cxx=icc -std=c++11
+CC =		mpicxx -cxx=icc -std=c++11 -diag-disable=10441 -diag-disable=2196
 OPTFLAGS =      -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \
                -qopt-zmm-usage=high
 CCFLAGS =	-qopenmp -qno-offload -ansi-alias -restrict \
@ -15,7 +15,7 @@ CCFLAGS =	-qopenmp -qno-offload -ansi-alias -restrict \
 SHFLAGS =	-fPIC
 DEPFLAGS =	-M
-LINK =		mpicxx -cxx=icc -std=c++11
+LINK =		mpicxx -cxx=icc -std=c++11 -diag-disable=10441 -diag-disable=2196
 LINKFLAGS =	-qopenmp $(OPTFLAGS) -L$(MKLROOT)/lib/intel64/
 LIB =           -ltbbmalloc -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core
 SIZE =		size
--- a/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi
+++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_openmpi
@ -7,7 +7,7 @@ SHELL = /bin/sh
 # specify flags and libraries needed for your compiler
 export OMPI_CXX = icc
-CC =		mpicxx -std=c++11
+CC =		mpicxx -std=c++11 -diag-disable=10441 -diag-disable=2196
 OPTFLAGS =      -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits \
                -qopt-zmm-usage=high
 CCFLAGS =	-qopenmp -qno-offload -ansi-alias -restrict \
@ -16,7 +16,7 @@ CCFLAGS =	-qopenmp -qno-offload -ansi-alias -restrict \
 SHFLAGS =	-fPIC
 DEPFLAGS =	-M
-LINK =		mpicxx -std=c++11
+LINK =		mpicxx -std=c++11 -diag-disable=10441 -diag-disable=2196
 LINKFLAGS =	-qopenmp $(OPTFLAGS) -L$(MKLROOT)/lib/intel64/
 LIB =           -ltbbmalloc -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core
 SIZE =		size
--- a/src/ML-PACE/pair_pace.cpp
+++ b/src/ML-PACE/pair_pace.cpp
@ -123,15 +123,12 @@ void PairPACE::compute(int eflag, int vflag)
  ev_init(eflag, vflag);
  // downwards modified by YL
  double **x = atom->x;
  double **f = atom->f;
  int *type = atom->type;
  // number of atoms in cell
  int nlocal = atom->nlocal;
  int newton_pair = force->newton_pair;
  // inum: length of the neighborlists list
@ -205,13 +202,13 @@ void PairPACE::compute(int eflag, int vflag)
      f[j][2] -= fij[2];
      // tally per-atom virial contribution
-      if (vflag)
+      if (vflag_either)
        ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, fij[0], fij[1], fij[2], -delx, -dely,
                     -delz);
    }
    // tally energy contribution
-    if (eflag) {
+    if (eflag_either) {
      // evdwl = energy of atom I
      evdwl = scale[itype][itype] * aceimpl->ace->e_atom;
      ev_tally_full(i, 2.0 * evdwl, 0.0, 0.0, 0.0, 0.0, 0.0);
@ -242,7 +239,7 @@ void PairPACE::allocate()
 void PairPACE::settings(int narg, char **arg)
 {
-  if (narg > 3) error->all(FLERR, "Illegal pair_style command.");
+  if (narg > 3) utils::missing_cmd_args(FLERR, "pair_style pace", error);
  // ACE potentials are parameterized in metal units
  if (strcmp("metal", update->unit_style) != 0)
@ -262,7 +259,7 @@ void PairPACE::settings(int narg, char **arg)
      chunksize = utils::inumeric(FLERR, arg[iarg + 1], false, lmp);
      iarg += 2;
    } else
-      error->all(FLERR, "Illegal pair_style command");
+      error->all(FLERR, "Unknown pair_style pace keyword: {}", arg[iarg]);
  }
  if (comm->me == 0) {
@ -286,7 +283,6 @@ void PairPACE::coeff(int narg, char **arg)
  map_element2type(narg - 3, arg + 3);
  auto potential_file_name = utils::get_potential_file_path(arg[2]);
  char **elemtypes = &arg[3];
  //load potential file
  delete aceimpl->basis_set;
@ -304,7 +300,7 @@ void PairPACE::coeff(int narg, char **arg)
    }
  }
-  // read args that map atom types to pACE elements
+  // read args that map atom types to PACE elements
  // map[i] = which element the Ith atom type is, -1 if not mapped
  // map[0] is not used
@ -354,8 +350,8 @@ void PairPACE::coeff(int narg, char **arg)
 void PairPACE::init_style()
 {
-  if (atom->tag_enable == 0) error->all(FLERR, "Pair style pACE requires atom IDs");
+  if (atom->tag_enable == 0) error->all(FLERR, "Pair style pace requires atom IDs");
-  if (force->newton_pair == 0) error->all(FLERR, "Pair style pACE requires newton pair on");
+  if (force->newton_pair == 0) error->all(FLERR, "Pair style pace requires newton pair on");
  // request a full neighbor list
  neighbor->add_request(this, NeighConst::REQ_FULL);
--- a/src/ML-PACE/pair_pace_extrapolation.cpp
+++ b/src/ML-PACE/pair_pace_extrapolation.cpp
@ -0,0 +1,452 @@
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
   Steve Plimpton, sjplimp@sandia.gov
   Copyright (2003) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
   certain rights in this software.  This software is distributed under
   the GNU General Public License.
   See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 /*
 Copyright 2022 Yury Lysogorskiy^1, Anton Bochkarev^1, Matous Mrovec^1, Ralf Drautz^1
 ^1: Ruhr-University Bochum, Bochum, Germany
 */
 //
 // Created by Lysogorskiy Yury on 2.01.22.
 //
 #include "pair_pace_extrapolation.h"
 #include "atom.h"
 #include "comm.h"
 #include "error.h"
 #include "force.h"
 #include "math_const.h"
 #include "memory.h"
 #include "modify.h"
 #include "neigh_list.h"
 #include "neigh_request.h"
 #include "neighbor.h"
 #include "update.h"
 #include <cmath>
 #include <cstdlib>
 #include <cstring>
 #include "ace_b_basis.h"
 #include "ace_b_evaluator.h"
 #include "ace_recursive.h"
 #include "ace_version.h"
 namespace LAMMPS_NS {
 struct ACEALImpl {
  ACEALImpl() : basis_set(nullptr), ace(nullptr), ctilde_basis_set(nullptr), rec_ace(nullptr) {}
  ~ACEALImpl()
  {
    delete basis_set;
    delete ace;
    delete ctilde_basis_set;
    delete rec_ace;
  }
  ACEBBasisSet *basis_set;
  ACEBEvaluator *ace;
  ACECTildeBasisSet *ctilde_basis_set;
  ACERecursiveEvaluator *rec_ace;
 };
 }    // namespace LAMMPS_NS
 using namespace LAMMPS_NS;
 using namespace MathConst;
 static char const *const elements_pace_al[] = {
    "X",  "H",  "He", "Li", "Be", "B",  "C",  "N",  "O",  "F",  "Ne", "Na", "Mg", "Al", "Si",
    "P",  "S",  "Cl", "Ar", "K",  "Ca", "Sc", "Ti", "V",  "Cr", "Mn", "Fe", "Co", "Ni", "Cu",
    "Zn", "Ga", "Ge", "As", "Se", "Br", "Kr", "Rb", "Sr", "Y",  "Zr", "Nb", "Mo", "Tc", "Ru",
    "Rh", "Pd", "Ag", "Cd", "In", "Sn", "Sb", "Te", "I",  "Xe", "Cs", "Ba", "La", "Ce", "Pr",
    "Nd", "Pm", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf", "Ta", "W",
    "Re", "Os", "Ir", "Pt", "Au", "Hg", "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac",
    "Th", "Pa", "U",  "Np", "Pu", "Am", "Cm", "Bk", "Cf", "Es", "Fm", "Md", "No", "Lr"};
 static constexpr int elements_num_pace_al = sizeof(elements_pace_al) / sizeof(const char *);
 int AtomicNumberByName_pace_al(char *elname)
 {
  for (int i = 1; i < elements_num_pace_al; i++)
    if (strcmp(elname, elements_pace_al[i]) == 0) return i;
  return -1;
 }
 /* ---------------------------------------------------------------------- */
 PairPACEExtrapolation::PairPACEExtrapolation(LAMMPS *lmp) : Pair(lmp)
 {
  single_enable = 0;
  restartinfo = 0;
  one_coeff = 1;
  manybody_flag = 1;
  nmax = 0;
  aceimpl = new ACEALImpl;
  scale = nullptr;
  extrapolation_grade_gamma = nullptr;
 }
 /* ----------------------------------------------------------------------
   check if allocated, since class can be destructed when incomplete
 ------------------------------------------------------------------------- */
 PairPACEExtrapolation::~PairPACEExtrapolation()
 {
  if (copymode) return;
  delete aceimpl;
  if (allocated) {
    memory->destroy(setflag);
    memory->destroy(cutsq);
    memory->destroy(scale);
    memory->destroy(map);
    memory->destroy(extrapolation_grade_gamma);
  }
 }
 /* ---------------------------------------------------------------------- */
 void PairPACEExtrapolation::compute(int eflag, int vflag)
 {
  int i, j, ii, jj, inum, jnum;
  double delx, dely, delz, evdwl;
  double fij[3];
  int *ilist, *jlist, *numneigh, **firstneigh;
  ev_init(eflag, vflag);
  // downwards modified by YL
  double **x = atom->x;
  double **f = atom->f;
  tagint *tag = atom->tag;
  int *type = atom->type;
  // number of atoms in cell
  int nlocal = atom->nlocal;
  int newton_pair = force->newton_pair;
  // number of atoms including ghost atoms
  int nall = nlocal + atom->nghost;
  // inum: length of the neighborlists list
  inum = list->inum;
  // ilist: list of "i" atoms for which neighbor lists exist
  ilist = list->ilist;
  //numneigh: the length of each these neigbor list
  numneigh = list->numneigh;
  // the pointer to the list of neighbors of "i"
  firstneigh = list->firstneigh;
  // this happens when used as substyle in pair style hybrid.
  // So this check and error effectively disallows use with pair style hybrid.
  if (inum != nlocal) { error->all(FLERR, "inum: {} nlocal: {} are different", inum, nlocal); }
  //if flag_compute_extrapolation_grade at this iteration then
  // grow extrapolation_grade_gamma array, that store per-atom extrapolation grades
  if (flag_compute_extrapolation_grade && atom->nlocal > nmax) {
    memory->destroy(extrapolation_grade_gamma);
    nmax = atom->nlocal;
    memory->create(extrapolation_grade_gamma, nmax, "pace/atom:gamma");
    //zeroify array
    memset(extrapolation_grade_gamma, 0, nmax * sizeof(*extrapolation_grade_gamma));
  }
  //determine the maximum number of neighbours
  int max_jnum = 0;
  int nei = 0;
  for (ii = 0; ii < list->inum; ii++) {
    i = ilist[ii];
    jnum = numneigh[i];
    nei = nei + jnum;
    if (jnum > max_jnum) max_jnum = jnum;
  }
  if (flag_compute_extrapolation_grade)
    aceimpl->ace->resize_neighbours_cache(max_jnum);
  else
    aceimpl->rec_ace->resize_neighbours_cache(max_jnum);
  //loop over atoms
  for (ii = 0; ii < list->inum; ii++) {
    i = list->ilist[ii];
    const int itype = type[i];
    const double xtmp = x[i][0];
    const double ytmp = x[i][1];
    const double ztmp = x[i][2];
    jlist = firstneigh[i];
    jnum = numneigh[i];
    // checking if neighbours are actually within cutoff range is done inside compute_atom
    // mapping from LAMMPS atom types ('type' array) to ACE species is done inside compute_atom
    //      by using 'ace->element_type_mapping' array
    // x: [r0 ,r1, r2, ..., r100]
    // i = 0 ,1
    // jnum(0) = 50
    // jlist(neigh ind of 0-atom) = [1,2,10,7,99,25, .. 50 element in total]
    try {
      if (flag_compute_extrapolation_grade)
        aceimpl->ace->compute_atom(i, x, type, jnum, jlist);
      else
        aceimpl->rec_ace->compute_atom(i, x, type, jnum, jlist);
    } catch (std::exception &e) {
      error->one(FLERR, e.what());
    }
    // 'compute_atom' will update the `ace->e_atom` and `ace->neighbours_forces(jj, alpha)` arrays and max_gamma_grade
    if (flag_compute_extrapolation_grade)
      extrapolation_grade_gamma[i] = aceimpl->ace->max_gamma_grade;
    Array2D<DOUBLE_TYPE> &neighbours_forces =
        (flag_compute_extrapolation_grade ? aceimpl->ace->neighbours_forces
                                          : aceimpl->rec_ace->neighbours_forces);
    //optionally assign global forces arrays
    for (jj = 0; jj < jnum; jj++) {
      j = jlist[jj];
      const int jtype = type[j];
      j &= NEIGHMASK;
      delx = x[j][0] - xtmp;
      dely = x[j][1] - ytmp;
      delz = x[j][2] - ztmp;
      fij[0] = scale[itype][jtype] * neighbours_forces(jj, 0);
      fij[1] = scale[itype][jtype] * neighbours_forces(jj, 1);
      fij[2] = scale[itype][jtype] * neighbours_forces(jj, 2);
      f[i][0] += fij[0];
      f[i][1] += fij[1];
      f[i][2] += fij[2];
      f[j][0] -= fij[0];
      f[j][1] -= fij[1];
      f[j][2] -= fij[2];
      // tally per-atom virial contribution
      if (vflag)
        ev_tally_xyz(i, j, nlocal, newton_pair, 0.0, 0.0, fij[0], fij[1], fij[2], -delx, -dely,
                     -delz);
    }
    // tally energy contribution
    if (eflag) {
      // evdwl = energy of atom I
      DOUBLE_TYPE e_atom;
      if (flag_compute_extrapolation_grade)
        e_atom = aceimpl->ace->e_atom;
      else
        e_atom = aceimpl->rec_ace->e_atom;
      evdwl = scale[itype][itype] * e_atom;
      ev_tally_full(i, 2.0 * evdwl, 0.0, 0.0, 0.0, 0.0, 0.0);
    }
  }
  if (vflag_fdotr) virial_fdotr_compute();
  // end modifications YL
 }
 /* ---------------------------------------------------------------------- */
 void PairPACEExtrapolation::allocate()
 {
  allocated = 1;
  int np1 = atom->ntypes + 1;
  memory->create(setflag, np1, np1, "pair:setflag");
  memory->create(cutsq, np1, np1, "pair:cutsq");
  memory->create(map, np1, "pair:map");
  memory->create(scale, np1, np1, "pair:scale");
 }
 /* ----------------------------------------------------------------------
   global settings
 ------------------------------------------------------------------------- */
 void PairPACEExtrapolation::settings(int narg, char **arg)
 {
  if (narg > 0) error->all(FLERR, "Pair style pace/extrapolation supports no keywords");
  if (comm->me == 0)
    utils::logmesg(lmp, "ACE/AL version: {}.{}.{}\n", VERSION_YEAR, VERSION_MONTH, VERSION_DAY);
 }
 /* ----------------------------------------------------------------------
   set coeffs for one or more type pairs
 ------------------------------------------------------------------------- */
 void PairPACEExtrapolation::coeff(int narg, char **arg)
 {
  if (narg < 5) utils::missing_cmd_args(FLERR, "pair_coeff", error);
  if (!allocated) allocate();
  map_element2type(narg - 4, arg + 4);
  auto potential_file_name = utils::get_potential_file_path(arg[2]);
  auto active_set_inv_filename = utils::get_potential_file_path(arg[3]);
  char **elemtypes = &arg[4];
  delete aceimpl->basis_set;
  delete aceimpl->ctilde_basis_set;
  //load potential file
  aceimpl->basis_set = new ACEBBasisSet();
  if (comm->me == 0) utils::logmesg(lmp, "Loading {}\n", potential_file_name);
  aceimpl->basis_set->load(potential_file_name);
  //convert the basis set to CTilde format
  aceimpl->ctilde_basis_set = new ACECTildeBasisSet();
  *aceimpl->ctilde_basis_set = aceimpl->basis_set->to_ACECTildeBasisSet();
  if (comm->me == 0) {
    utils::logmesg(lmp, "Total number of basis functions\n");
    for (SPECIES_TYPE mu = 0; mu < aceimpl->basis_set->nelements; mu++) {
      int n_r1 = aceimpl->basis_set->total_basis_size_rank1[mu];
      int n = aceimpl->basis_set->total_basis_size[mu];
      utils::logmesg(lmp, "\t{}: {} (r=1) {} (r>1)\n", aceimpl->basis_set->elements_name[mu], n_r1,
                     n);
    }
  }
  // read args that map atom types to PACE elements
  // map[i] = which element the Ith atom type is, -1 if not mapped
  // map[0] is not used
  delete aceimpl->ace;
  delete aceimpl->rec_ace;
  aceimpl->ace = new ACEBEvaluator();
  aceimpl->ace->element_type_mapping.init(atom->ntypes + 1);
  aceimpl->rec_ace = new ACERecursiveEvaluator();
  aceimpl->rec_ace->set_recursive(true);
  aceimpl->rec_ace->element_type_mapping.init(atom->ntypes + 1);
  aceimpl->rec_ace->element_type_mapping.fill(-1);    //-1 means atom not included into potential
  FILE *species_type_file = nullptr;
  const int n = atom->ntypes;
  element_names.resize(n);
  for (int i = 1; i <= n; i++) {
    char *elemname = elemtypes[i - 1];
    element_names[i - 1] = elemname;
    if (strcmp(elemname, "NULL") == 0) {
      // species_type=-1 value will not reach ACE Evaluator::compute_atom,
      // but if it will ,then error will be thrown there
      aceimpl->ace->element_type_mapping(i) = -1;
      map[i] = -1;
      if (comm->me == 0) utils::logmesg(lmp, "Skipping LAMMPS atom type #{}(NULL)\n", i);
    } else {
      // dump species types for reconstruction of atomic configurations
      int atomic_number = AtomicNumberByName_pace_al(elemname);
      if (atomic_number == -1) error->all(FLERR, "'{}' is not a valid element\n", elemname);
      SPECIES_TYPE mu = aceimpl->basis_set->get_species_index_by_name(elemname);
      if (mu != -1) {
        if (comm->me == 0)
          utils::logmesg(lmp, "Mapping LAMMPS atom type #{}({}) -> ACE species type #{}\n", i,
                         elemname, mu);
        map[i] = mu;
        // set up LAMMPS atom type to ACE species  mapping for ace evaluators
        aceimpl->ace->element_type_mapping(i) = mu;
        aceimpl->rec_ace->element_type_mapping(i) = mu;
      } else {
        error->all(FLERR, "Element {} is not supported by ACE-potential from file {}", elemname,
                   potential_file_name);
      }
    }
  }
  aceimpl->ace->set_basis(*aceimpl->basis_set);
  aceimpl->rec_ace->set_basis(*aceimpl->ctilde_basis_set);
  if (comm->me == 0) utils::logmesg(lmp, "Loading ASI {}\n", active_set_inv_filename);
  aceimpl->ace->load_active_set(active_set_inv_filename);
  bool is_linear_extrapolation_grade = aceimpl->ace->get_is_linear_extrapolation_grade();
  if (comm->me == 0) {
        if (is_linear_extrapolation_grade)
            utils::logmesg(lmp, "LINEAR ASI is loaded\n");
        else
            utils::logmesg(lmp, "FULL ASI is loaded\n");
  }
  // clear setflag since coeff() called once with I,J = * *
  for (int i = 1; i <= n; i++)
    for (int j = i; j <= n; j++) scale[i][j] = 1.0;
 }
 /* ----------------------------------------------------------------------
   init specific to this pair style
 ------------------------------------------------------------------------- */
 void PairPACEExtrapolation::init_style()
 {
  if (atom->tag_enable == 0) error->all(FLERR, "Pair style pace/extrapolation requires atom IDs");
  if (force->newton_pair == 0)
    error->all(FLERR, "Pair style pace/extrapolation requires newton pair on");
  // request a full neighbor list
  neighbor->add_request(this, NeighConst::REQ_FULL);
 }
 /* ----------------------------------------------------------------------
   init for one type pair i,j and corresponding j,i
 ------------------------------------------------------------------------- */
 double PairPACEExtrapolation::init_one(int i, int j)
 {
  if (setflag[i][j] == 0) error->all(FLERR, "All pair coeffs are not set");
  //cutoff from the basis set's radial functions settings
  scale[j][i] = scale[i][j];
  return aceimpl->basis_set->radial_functions->cut(map[i], map[j]);
 }
 /* ----------------------------------------------------------------------
    extract method for extracting value of scale variable
 ---------------------------------------------------------------------- */
 void *PairPACEExtrapolation::extract(const char *str, int &dim)
 {
  //check if str=="gamma_flag" then compute extrapolation grades on this iteration
  dim = 0;
  if (strcmp(str, "gamma_flag") == 0) return (void *) &flag_compute_extrapolation_grade;
  dim = 2;
  if (strcmp(str, "scale") == 0) return (void *) scale;
  return nullptr;
 }
 /* ----------------------------------------------------------------------
   peratom requests from FixPair
   return ptr to requested data
   also return ncol = # of quantites per atom
     0 = per-atom vector
     1 or more = # of columns in per-atom array
   return NULL if str is not recognized
 ---------------------------------------------------------------------- */
 void *PairPACEExtrapolation::extract_peratom(const char *str, int &ncol)
 {
  if (strcmp(str, "gamma") == 0) {
    ncol = 0;
    return (void *) extrapolation_grade_gamma;
  }
  return nullptr;
 }
--- a/src/ML-PACE/pair_pace_extrapolation.h
+++ b/src/ML-PACE/pair_pace_extrapolation.h
@ -0,0 +1,65 @@
 /* -*- c++ -*- ----------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   https://www.lammps.org/, Sandia National Laboratories
   Steve Plimpton, sjplimp@sandia.gov
   This software is distributed under the GNU General Public License.
   See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 /*
 Copyright 2022 Yury Lysogorskiy^1, Anton Bochkarev^1, Matous Mrovec^1, Ralf Drautz^1
 ^1: Ruhr-University Bochum, Bochum, Germany
 */
 //
 // Created by Lysogorskiy Yury on 1.01.22.
 //
 #ifdef PAIR_CLASS
 // clang-format off
 PairStyle(pace/extrapolation,PairPACEExtrapolation)
 // clang-format on
 #else
 #ifndef LMP_PAIR_PACE_AL_H
 #define LMP_PAIR_PACE_AL_H
 #include "pair.h"
 #include <vector>
 namespace LAMMPS_NS {
 class PairPACEExtrapolation : public Pair {
 public:
  PairPACEExtrapolation(class LAMMPS *);
  ~PairPACEExtrapolation() override;
  void compute(int, int) override;
  void settings(int, char **) override;
  void coeff(int, char **) override;
  void init_style() override;
  double init_one(int, int) override;
  void *extract(const char *, int &) override;
  void *extract_peratom(const char *, int &) override;
 protected:
  struct ACEALImpl *aceimpl;
  int nmax;
  void allocate();
  std::vector<std::string> element_names;    // list of elements (used by dump pace/extrapolation)
  int nelements;                             // # of unique elements
  double *extrapolation_grade_gamma;         //per-atom gamma value
  int flag_compute_extrapolation_grade;
  double **scale;
 };
 }    // namespace LAMMPS_NS
 #endif
 #endif
--- a/src/input.cpp
+++ b/src/input.cpp
@ -831,7 +831,7 @@ int Input::execute_command()
  // try suffixed version first
  std::string mycmd = command;
-  if (lmp->suffix_enable) {
+  if (lmp->suffix_enable && lmp->suffix) {
    mycmd = command + std::string("/") + lmp->suffix;
    if (command_map->find(mycmd) == command_map->end()) {
      if (lmp->suffix2) {
--- a/src/lammps.cpp
+++ b/src/lammps.cpp
@ -46,6 +46,7 @@
 #include "modify.h"
 #include "neighbor.h"
 #include "output.h"
 #include "suffix.h"
 #include "timer.h"
 #include "universe.h"
 #include "update.h"
@ -858,43 +859,25 @@ void LAMMPS::post_create()
 {
  if (skiprunflag) input->one("timer timeout 0 every 1");
  // Don't unnecessarily reissue a package command via suffix
  int package_issued = Suffix::NONE;
  // default package command triggered by "-k on"
  if (kokkos && kokkos->kokkos_exists) input->one("package kokkos");
  // suffix will always be set if suffix_enable = 1
  // check that KOKKOS package classes were instantiated
  // check that GPU, INTEL, OPENMP fixes were compiled with LAMMPS
  if (suffix_enable) {
    if (strcmp(suffix,"gpu") == 0 && !modify->check_package("GPU"))
      error->all(FLERR,"Using suffix gpu without GPU package installed");
    if (strcmp(suffix,"intel") == 0 && !modify->check_package("INTEL"))
      error->all(FLERR,"Using suffix intel without INTEL package installed");
    if (strcmp(suffix,"kk") == 0 &&
        (kokkos == nullptr || kokkos->kokkos_exists == 0))
      error->all(FLERR,"Using suffix kk without KOKKOS package enabled");
    if (strcmp(suffix,"omp") == 0 && !modify->check_package("OMP"))
      error->all(FLERR,"Using suffix omp without OPENMP package installed");
    if (strcmp(suffix,"gpu") == 0) input->one("package gpu 0");
    if (strcmp(suffix,"intel") == 0) input->one("package intel 1");
    if (strcmp(suffix,"omp") == 0) input->one("package omp 0");
    if (suffix2) {
      if (strcmp(suffix2,"gpu") == 0) input->one("package gpu 0");
      if (strcmp(suffix2,"intel") == 0) input->one("package intel 1");
      if (strcmp(suffix2,"omp") == 0) input->one("package omp 0");
    }
  }
  // invoke any command-line package commands
  if (num_package) {
    std::string str;
    for (int i = 0; i < num_package; i++) {
      str = "package";
      char *pkg_name = *(packargs[i]);
      if (pkg_name != nullptr) {
        if (strcmp("gpu", pkg_name) == 0) package_issued |= Suffix::GPU;
        if (strcmp("omp", pkg_name) == 0) package_issued |= Suffix::OMP;
        if (strcmp("intel", pkg_name) == 0) package_issued |= Suffix::INTEL;
      }
      for (char **ptr = packargs[i]; *ptr != nullptr; ++ptr) {
        str += " ";
        str += *ptr;
@ -902,6 +885,42 @@ void LAMMPS::post_create()
      input->one(str);
    }
  }
  // either suffix or suffixp will be set if suffix_enable = 1
  // check that KOKKOS package classes were instantiated
  // check that GPU, INTEL, OPENMP fixes were compiled with LAMMPS
  // do not re-issue package command if already issued
  if (suffix_enable) {
    const char *mysuffix = suffix;
    if (suffixp) mysuffix = suffixp;
    if (strcmp(mysuffix,"gpu") == 0 && !modify->check_package("GPU"))
      error->all(FLERR,"Using suffix gpu without GPU package installed");
    if (strcmp(mysuffix,"intel") == 0 && !modify->check_package("INTEL"))
      error->all(FLERR,"Using suffix intel without INTEL package installed");
    if (strcmp(mysuffix,"kk") == 0 &&
        (kokkos == nullptr || kokkos->kokkos_exists == 0))
      error->all(FLERR,"Using suffix kk without KOKKOS package enabled");
    if (strcmp(mysuffix,"omp") == 0 && !modify->check_package("OMP"))
      error->all(FLERR,"Using suffix omp without OPENMP package installed");
    if (strcmp(mysuffix,"gpu") == 0 && !(package_issued & Suffix::GPU))
      input->one("package gpu 0");
    if (strcmp(mysuffix,"intel") == 0 && !(package_issued & Suffix::INTEL))
      input->one("package intel 1");
    if (strcmp(mysuffix,"omp") == 0 && !(package_issued & Suffix::OMP))
      input->one("package omp 0");
    if (suffix2) {
      if (strcmp(suffix2,"gpu") == 0 && !(package_issued & Suffix::GPU))
        input->one("package gpu 0");
      if (strcmp(suffix2,"intel") == 0 && !(package_issued & Suffix::INTEL))
        input->one("package intel 1");
      if (strcmp(suffix2,"omp") == 0 && !(package_issued & Suffix::OMP))
        input->one("package omp 0");
    }
  }
 }
 /* ----------------------------------------------------------------------
@ -1201,7 +1220,6 @@ void _noopt LAMMPS::help()
          "-var varname value          : set index style variable (-v)\n\n",
          exename);
  print_config(fp);
  fprintf(fp,"List of individual style options included in this LAMMPS executable\n\n");
--- a/src/library.cpp
+++ b/src/library.cpp
@ -949,7 +949,8 @@ int lammps_get_mpi_comm(void *handle)
 This function will retrieve or compute global properties. In contrast to
 :cpp:func:`lammps_get_thermo` this function returns an ``int``.  The
 following tables list the currently supported keyword.  If a keyword is
-not recognized, the function returns -1.
+not recognized, the function returns -1.  The integer sizes functions may
 be called without a valid LAMMPS object handle (it is ignored).
 * :ref:`Integer sizes <extract_integer_sizes>`
 * :ref:`System status <extract_system_status>`
@ -1145,7 +1146,7 @@ int lammps_extract_setting(void *handle, const char *keyword)
 This function returns an integer that encodes the data type of the global
 property with the specified name. See :cpp:enum:`_LMP_DATATYPE_CONST` for valid
 values. Callers of :cpp:func:`lammps_extract_global` can use this information
-to then decide how to cast the (void*) pointer and access the data.
+to then decide how to cast the ``void *`` pointer and access the data.
 .. versionadded:: 18Sep2020
@ -1622,7 +1623,7 @@ void *lammps_extract_global(void *handle, const char *name)
 This function returns an integer that encodes the data type of the per-atom
 property with the specified name. See :cpp:enum:`_LMP_DATATYPE_CONST` for valid
 values. Callers of :cpp:func:`lammps_extract_atom` can use this information
-to then decide how to cast the (void*) pointer and access the data.
+to then decide how to cast the ``void *`` pointer and access the data.
 .. versionadded:: 18Sep2020
@ -2043,16 +2044,19 @@ void *lammps_extract_fix(void *handle, const char *id, int style, int type,
 This function returns a pointer to data from a LAMMPS :doc:`variable`
 identified by its name.  When the variable is either an *equal*\ -style
-compatible or an *atom*\ -style variable the variable is evaluated and
+compatible variable, a *vector*\ -style variable, or an *atom*\ -style
-the corresponding value(s) returned.  Variables of style *internal*
+variable, the variable is evaluated and the corresponding value(s) returned.
-are compatible with *equal*\ -style variables and so are *python*\
+Variables of style *internal* are compatible with *equal*\ -style variables and
-style variables, if they return a numeric value.  For other
+so are *python*\ -style variables, if they return a numeric value.  For other
-variable styles their string value is returned.  The function returns
+variable styles, their string value is returned.  The function returns
 ``NULL`` when a variable of the provided *name* is not found or of an
 incompatible style.  The *group* argument is only used for *atom*\
-style variables and ignored otherwise.  If set to ``NULL`` when
+-style variables and ignored otherwise, with one exception: for style *vector*,
-extracting data from and *atom*\ -style variable, the group is assumed
+if *group* is "GET_VECTOR_SIZE", the returned pointer will yield the length
-to be "all".
+of the vector to be returned when dereferenced. This pointer must be
 deallocated after the value is read to avoid a memory leak.
 If *group* is set to ``NULL`` when extracting data from an *atom*\ -style
 variable, the group is assumed to be "all".
 When requesting data from an *equal*\ -style or compatible variable
 this function allocates storage for a single double value, copies the
@ -2066,15 +2070,23 @@ use to avoid a memory leak. Example:
   double value = *dptr;
   lammps_free((void *)dptr);
-For *atom*\ -style variables the data returned is a pointer to an
+For *atom*\ -style variables, the return value is a pointer to an
 allocated block of storage of double of the length ``atom->nlocal``.
-Since the data is returned a copy, the location will persist, but its
+Since the data returned are a copy, the location will persist, but its
-content will not be updated, in case the variable is re-evaluated.
+content will not be updated in case the variable is re-evaluated.
-To avoid a memory leak this pointer needs to be freed after use in
+To avoid a memory leak, this pointer needs to be freed after use in
 the calling program.
 For *vector*\ -style variables, the returned pointer is to actual LAMMPS data.
 The pointer should not be deallocated. Its length depends on the variable,
 compute, or fix data used to construct the *vector*\ -style variable.
 This length can be fetched by calling this function with *group* set to the
 constant "LMP_SIZE_VECTOR", which returns a ``void *`` pointer that can be
 dereferenced to an integer that is the length of the vector. This pointer
 needs to be deallocated when finished with it to avoid memory leaks.
 For other variable styles the returned pointer needs to be cast to
-a char pointer.
+a char pointer. It should not be deallocated.
 .. code-block:: c
@ -2084,10 +2096,10 @@ a char pointer.
 .. note::
   LAMMPS cannot easily check if it is valid to access the data
-   referenced by the variables, e.g. computes or fixes or thermodynamic
+   referenced by the variables (e.g., computes, fixes, or thermodynamic
-   info, so it may fail with an error.  The caller has to make certain,
+   info), so it may fail with an error.  The caller has to make certain
-   that the data is extracted only when it safe to evaluate the variable
+   that the data are extracted only when it safe to evaluate the variable
-   and thus an error and crash is avoided.
+   and thus an error or crash are avoided.
 \endverbatim
 *
@ -2118,6 +2130,15 @@ void *lammps_extract_variable(void *handle, const char *name, const char *group)
      auto vector = (double *) malloc(nlocal*sizeof(double));
      lmp->input->variable->compute_atom(ivar,igroup,vector,1,0);
      return (void *) vector;
    } else if (lmp->input->variable->vectorstyle(ivar)) {
      double *values = nullptr;
      int nvector = lmp->input->variable->compute_vector(ivar, &values);
      if ( group != nullptr && strcmp(group,"LMP_SIZE_VECTOR") == 0 ) {
          int* nvecptr = (int *) malloc(sizeof(int));
          *nvecptr = nvector;
          return (void *) nvecptr;
      } else
        return (void *) values;
    } else {
      return lmp->input->variable->retrieve(name);
    }
@ -2130,6 +2151,49 @@ void *lammps_extract_variable(void *handle, const char *name, const char *group)
 /* ---------------------------------------------------------------------- */
 /** Get data type of a LAMMPS variable.
 *
 \verbatim embed:rst
 This function returns an integer that encodes the data type of the variable
 with the specified name. See :cpp:enum:`_LMP_VAR_CONST` for valid values.
 Callers of :cpp:func:`lammps_extract_variable` can use this information to
 decide how to cast the ``void *`` pointer and access the data.
 .. versionadded:: TBD
 \endverbatim
 *
 * \param  handle  pointer to a previously created LAMMPS instance
 * \param  name    string with the name of the extracted variable
 * \return         integer constant encoding the data type of the property
 *                 or -1 if not found.
 **/
 int lammps_extract_variable_datatype(void *handle, const char *name)
 {
  auto lmp = (LAMMPS*) handle;
  BEGIN_CAPTURE
  {
    int ivar = lmp->input->variable->find(name);
    if ( ivar < 0 ) return -1;
    if (lmp->input->variable->equalstyle(ivar))
      return LMP_VAR_EQUAL;
    else if (lmp->input->variable->atomstyle(ivar))
      return LMP_VAR_ATOM;
    else if (lmp->input->variable->vectorstyle(ivar))
      return LMP_VAR_VECTOR;
    else
      return LMP_VAR_STRING;
  }
  END_CAPTURE
  return -1;
 }
 /* ---------------------------------------------------------------------- */
 /** Set the value of a string-style variable.
 *
 * This function assigns a new value from the string str to the
@ -2159,21 +2223,38 @@ int lammps_set_variable(void *handle, char *name, char *str)
 // Library functions for scatter/gather operations of data
 // ----------------------------------------------------------------------
 /** Gather the named atom-based entity for all atoms across all processors,
 * in order.
 *
 \verbatim embed:rst
 This subroutine gathers data for all atoms and stores them in a
 one-dimensional array allocated by the user. The data will be ordered by
 atom ID, which requires consecutive atom IDs (1 to *natoms*\ ). If you need
 a similar array but have non-consecutive atom IDs, see
 :cpp:func:`lammps_gather_atoms_concat`; for a similar array but for a subset
 of atoms, see :cpp:func:`lammps_gather_atoms_subset`.
 The *data* array will be ordered in groups of *count* values, sorted by atom ID
 (e.g., if *name* is *x* and *count* = 3, then *data* = x[0][0], x[0][1],
 x[0][2], x[1][0], x[1][1], x[1][2], x[2][0], :math:`\dots`);
 *data* must be pre-allocated by the caller to length (*count* :math:`\times`
 *natoms*), as queried by :cpp:func:`lammps_get_natoms`,
 :cpp:func:`lammps_extract_global`, or :cpp:func:`lammps_extract_setting`.
 \endverbatim
 *
 * \param handle  pointer to a previously created LAMMPS instance
 * \param name    desired quantity (e.g., *x* or *charge*)
 * \param type    0 for ``int`` values, 1 for ``double`` values
 * \param count   number of per-atom values (e.g., 1 for *type* or *charge*,
 *                3 for *x* or *f*); use *count* = 3 with *image* if you want
 *                a single image flag unpacked into (*x*,*y*,*z*) components.
 * \param data    per-atom values packed in a 1-dimensional array of length
 *                *natoms* \* *count*.
 *
 */
 /* ----------------------------------------------------------------------
   gather the named atom-based entity for all atoms
     return it in user-allocated data
   data will be ordered by atom ID
     requirement for consecutive atom IDs (1 to N)
   see gather_atoms_concat() to return data for all atoms, unordered
   see gather_atoms_subset() to return data for only a subset of atoms
   name = desired quantity, e.g. x or charge
   type = 0 for integer values, 1 for double values
   count = # of per-atom values, e.g. 1 for type or charge, 3 for x or f
     use count = 3 with "image" if want single image flag unpacked into xyz
   return atom-based values in 1d data, ordered by count, then by atom ID
     e.g. x[0][0],x[0][1],x[0][2],x[1][0],x[1][1],x[1][2],x[2][0],...
     data must be pre-allocated by caller to correct length
     correct length = count*Natoms, as queried by get_natoms()
   method:
     alloc and zero count*Natom length vector
     loop over Nlocal to fill vector with my values
@ -2294,23 +2375,43 @@ void lammps_gather_atoms(void *handle, char *name, int type, int count, void *da
  END_CAPTURE
 }
 /** Gather the named atom-based entity for all atoms across all processors,
 * unordered.
 *
 \verbatim embed:rst
 This subroutine gathers data for all atoms and stores them in a
 one-dimensional array allocated by the user. The data will be a concatenation
 of chunks from each processor's owned atoms, in whatever order the atoms are
 in on each processor. This process has no requirement that the atom IDs be
 consecutive. If you need the ID of each atom, you can do another
 :cpp:func:`lammps_gather_atoms_concat` call with *name* set to ``id``.
 If you have consecutive IDs and want the data to be in order, use
 :cpp:func:`lammps_gather_atoms`; for a similar array but for a subset
 of atoms, use :cpp:func:`lammps_gather_atoms_subset`.
 The *data* array will be in groups of *count* values, with *natoms*
 groups total, but not in order by atom ID (e.g., if *name* is *x* and *count*
 is 3, then *data* might be something like = x[10][0], x[10][1], x[10][2],
 x[2][0], x[2][1], x[2][2], x[4][0], :math:`\dots`); *data* must be
 pre-allocated by the caller to length (*count* :math:`\times` *natoms*), as
 queried by :cpp:func:`lammps_get_natoms`,
 :cpp:func:`lammps_extract_global`, or :cpp:func:`lammps_extract_setting`.
 \endverbatim
 *
 * \param handle: pointer to a previously created LAMMPS instance
 * \param name:   desired quantity (e.g., *x* or *charge*\ )
 * \param type:   0 for ``int`` values, 1 for ``double`` values
 * \param count:  number of per-atom values (e.g., 1 for *type* or *charge*,
 *                3 for *x* or *f*); use *count* = 3 with "image" if you want
 *                single image flags unpacked into (*x*,*y*,*z*)
 * \param data:   per-atom values packed in a 1-dimensional array of length
 *                *natoms* \* *count*.
 *
 */
 /* ----------------------------------------------------------------------
   gather the named atom-based entity for all atoms
     return it in user-allocated data
   data will be a concatenation of chunks of each proc's atoms,
     in whatever order the atoms are on each proc
     no requirement for consecutive atom IDs (1 to N)
     can do a gather_atoms_concat for "id" if need to know atom IDs
   see gather_atoms() to return data ordered by consecutive atom IDs
   see gather_atoms_subset() to return data for only a subset of atoms
   name = desired quantity, e.g. x or charge
   type = 0 for integer values, 1 for double values
   count = # of per-atom values, e.g. 1 for type or charge, 3 for x or f
     use count = 3 with "image" if want single image flag unpacked into xyz
   return atom-based values in 1d data, ordered by count, then by atom
     e.g. x[0][0],x[0][1],x[0][2],x[1][0],x[1][1],x[1][2],x[2][0],...
     data must be pre-allocated by caller to correct length
     correct length = count*Natoms, as queried by get_natoms()
   method:
     Allgather Nlocal atoms from each proc into data
 ------------------------------------------------------------------------- */
@ -2440,23 +2541,40 @@ void lammps_gather_atoms_concat(void *handle, char *name, int type, int count, v
  END_CAPTURE
 }
 /** Gather the named atom-based entity for a subset of atoms.
 *
 \verbatim embed:rst
 This subroutine gathers data for the requested atom IDs and stores them in a
 one-dimensional array allocated by the user. The data will be ordered by atom
 ID, but there is no requirement that the IDs be consecutive. If you wish to
 return a similar array for *all* the atoms, use :cpp:func:`lammps_gather_atoms`
 or :cpp:func:`lammps_gather_atoms_concat`.
 The *data* array will be in groups of *count* values, sorted by atom ID
 in the same order as the array *ids* (e.g., if *name* is *x*, *count* = 3, and
 *ids* is {100, 57, 210}, then *data* might look like {x[100][0], x[100][1],
 x[100][2], x[57][0], x[57][1], x[57][2], x[210][0], :math:`\dots`);
 *ids* must be provided by the user with length *ndata*, and
 *data* must be pre-allocated by the caller to length
 (*count* :math:`\times` *ndata*).
 \endverbatim
 *
 * \param handle: pointer to a previously created LAMMPS instance
 * \param name:   desired quantity (e.g., *x* or *charge*)
 * \param type:   0 for ``int`` values, 1 for ``double`` values
 * \param count:  number of per-atom values (e.g., 1 for *type* or *charge*,
 *                3 for *x* or *f*); use *count* = 3 with "image" if you want
 *                single image flags unpacked into (*x*,*y*,*z*)
 * \param ndata:  number of atoms for which to return data (can be all of them)
 * \param ids:    list of *ndata* atom IDs for which to return data
 * \param data:   per-atom values packed in a 1-dimensional array of length
 *                *ndata* \* *count*.
 *
 */
 /* ----------------------------------------------------------------------
   gather the named atom-based entity for a subset of atoms
     return it in user-allocated data
   data will be ordered by requested atom IDs
     no requirement for consecutive atom IDs (1 to N)
   see gather_atoms() to return data for all atoms, ordered by consecutive IDs
   see gather_atoms_concat() to return data for all atoms, unordered
   name = desired quantity, e.g. x or charge
   type = 0 for integer values, 1 for double values
   count = # of per-atom values, e.g. 1 for type or charge, 3 for x or f
     use count = 3 with "image" if want single image flag unpacked into xyz
   ndata = # of atoms to return data for (could be all atoms)
   ids = list of Ndata atom IDs to return data for
   return atom-based values in 1d data, ordered by count, then by atom
     e.g. x[0][0],x[0][1],x[0][2],x[1][0],x[1][1],x[1][2],x[2][0],...
     data must be pre-allocated by caller to correct length
     correct length = count*Ndata
   method:
     alloc and zero count*Ndata length vector
     loop over Ndata to fill vector with my values
@ -2477,15 +2595,16 @@ void lammps_gather_atoms_subset(void *handle, char *name, int type, int count,
    int i,j,m,offset;
    tagint id;
-    // error if tags are not defined
+    // error if tags are not defined or no atom map
    // NOTE: test that name = image or ids is not a 64-bit int in code?
    int flag = 0;
    if (lmp->atom->tag_enable == 0) flag = 1;
    if (lmp->atom->natoms > MAXSMALLINT) flag = 1;
    if (lmp->atom->map_style == Atom::MAP_NONE) flag = 1;
    if (flag) {
      if (lmp->comm->me == 0)
-        lmp->error->warning(FLERR,"Library error in lammps_gather_atoms_subset");
+        lmp->error->warning(FLERR,"Library error in lammps_gather_atoms_subset: atoms must have mappable ids");
      return;
    }
@ -2586,18 +2705,35 @@ void lammps_gather_atoms_subset(void *handle, char *name, int type, int count,
  END_CAPTURE
 }
 /** Scatter the named atom-based entities in *data* to all processors.
 *
 \verbatim embed:rst
 This subroutine takes data stored in a one-dimensional array supplied by the
 user and scatters them to all atoms on all processors. The data must be
 ordered by atom ID, with the requirement that the IDs be consecutive.
 Use :cpp:func:`lammps_scatter_atoms_subset` to scatter data for some (or all)
 atoms, unordered.
 The *data* array needs to be ordered in groups of *count* values, sorted by
 atom ID (e.g., if *name* is *x* and *count* = 3, then
 *data* = x[0][0], x[0][1], x[0][2], x[1][0], x[1][1], x[1][2], x[2][0],
 :math:`\dots`); *data* must be of length (*count* :math:`\times` *natoms*).
 \endverbatim
 *
 * \param handle  pointer to a previously created LAMMPS instance
 * \param name    desired quantity (e.g., *x* or *charge*)
 * \param type    0 for ``int`` values, 1 for ``double`` values
 * \param count   number of per-atom values (e.g., 1 for *type* or *charge*,
 *                3 for *x* or *f*); use *count* = 3 with *image* if you have
 *                a single image flag packed into (*x*,*y*,*z*) components.
 * \param data    per-atom values packed in a 1-dimensional array of length
 *                *natoms* \* *count*.
 *
 */
 /* ----------------------------------------------------------------------
   scatter the named atom-based entity in data to all atoms
   data is ordered by atom ID
     requirement for consecutive atom IDs (1 to N)
   see scatter_atoms_subset() to scatter data for some (or all) atoms, unordered
   name = desired quantity, e.g. x or charge
   type = 0 for integer values, 1 for double values
   count = # of per-atom values, e.g. 1 for type or charge, 3 for x or f
     use count = 3 with "image" for xyz to be packed into single image flag
   data = atom-based values in 1d data, ordered by count, then by atom ID
     e.g. x[0][0],x[0][1],x[0][2],x[1][0],x[1][1],x[1][2],x[2][0],...
     data must be correct length = count*Natoms, as queried by get_natoms()
   method:
     loop over Natoms, if I own atom ID, set its values from data
 ------------------------------------------------------------------------- */
@ -2624,7 +2760,7 @@ void lammps_scatter_atoms(void *handle, char *name, int type, int count, void *d
    if (lmp->atom->map_style == Atom::MAP_NONE) flag = 1;
    if (flag) {
      if (lmp->comm->me == 0)
-        lmp->error->warning(FLERR,"Library error in lammps_scatter_atoms");
+        lmp->error->warning(FLERR,"Library error in lammps_scatter_atoms: ids must exist, be consecutive, and be mapped");
      return;
    }
@ -2702,19 +2838,51 @@ void lammps_scatter_atoms(void *handle, char *name, int type, int count, void *d
  END_CAPTURE
 }
 /** Scatter the named atom-based entities in *data* from a subset of atoms
 *  to all processors.
 *
 \verbatim embed:rst
 This subroutine takes data stored in a one-dimensional array supplied by the
 user and scatters them to a subset of atoms on all processors. The array
 *data* contains data associated with atom IDs, but there is no requirement that
 the IDs be consecutive, as they are provided in a separate array.
 Use :cpp:func:`lammps_scatter_atoms` to scatter data for all atoms, in order.
 The *data* array needs to be organized in groups of *count* values, with the
 groups in the same order as the array *ids*. For example, if you want *data*
 to be the array {x[1][0], x[1][1], x[1][2], x[100][0], x[100][1], x[100][2],
 x[57][0], x[57][1], x[57][2]}, then *count* = 3, *ndata* = 3, and *ids* would
 be {1, 100, 57}.
 \endverbatim
 *
 * \param handle: pointer to a previously created LAMMPS instance
 * \param name:   desired quantity (e.g., *x* or *charge*)
 * \param type:   0 for ``int`` values, 1 for ``double`` values
 * \param count:  number of per-atom values (e.g., 1 for *type* or *charge*,
 *                3 for *x* or *f*); use *count* = 3 with "image" if you have
 *                all the image flags packed into (*xyz*)
 * \param ndata:  number of atoms listed in *ids* and *data* arrays
 * \param ids:    list of *ndata* atom IDs to scatter data to
 * \param data    per-atom values packed in a 1-dimensional array of length
 *                *ndata* \* *count*.
 *
 */
 /* ----------------------------------------------------------------------
   scatter the named atom-based entity in data to a subset of atoms
   data is ordered by provided atom IDs
     no requirement for consecutive atom IDs (1 to N)
   see scatter_atoms() to scatter data for all atoms, ordered by consecutive IDs
-   name = desired quantity, e.g. x or charge
+   name = desired quantity (e.g., x or charge)
   type = 0 for integer values, 1 for double values
-   count = # of per-atom values, e.g. 1 for type or charge, 3 for x or f
+   count = # of per-atom values (e.g., 1 for type or charge, 3 for x or f)
     use count = 3 with "image" for xyz to be packed into single image flag
   ndata = # of atoms in ids and data (could be all atoms)
   ids = list of Ndata atom IDs to scatter data to
   data = atom-based values in 1d data, ordered by count, then by atom ID
-     e.g. x[0][0],x[0][1],x[0][2],x[1][0],x[1][1],x[1][2],x[2][0],...
+     (e.g., x[0][0],x[0][1],x[0][2],x[1][0],x[1][1],x[1][2],x[2][0],...)
     data must be correct length = count*Ndata
   method:
     loop over Ndata, if I own atom ID, set its values from data
@ -2743,7 +2911,7 @@ void lammps_scatter_atoms_subset(void *handle, char *name, int type, int count,
    if (lmp->atom->map_style == Atom::MAP_NONE) flag = 1;
    if (flag) {
      if (lmp->comm->me == 0)
-        lmp->error->warning(FLERR,"Library error in lammps_scatter_atoms_subset");
+        lmp->error->warning(FLERR,"Library error in lammps_scatter_atoms_subset: atoms must have mapped ids");
      return;
    }
@ -2904,7 +3072,7 @@ Below is a brief C code demonstrating accessing this collected bond information.
 void lammps_gather_bonds(void *handle, void *data)
 {
-  auto lmp = (LAMMPS *)handle;
+  auto lmp = (LAMMPS *) handle;
  BEGIN_CAPTURE {
    void *val = lammps_extract_global(handle,"nbonds");
    bigint nbonds = *(bigint *)val;
@ -2951,10 +3119,10 @@ void lammps_gather_bonds(void *handle, void *data)
         "d2_name" or "i2_name" for fix property/atom arrays with count > 1
         will return error if fix/compute isn't atom-based
  type = 0 for integer values, 1 for double values
-  count = # of per-atom values, e.g. 1 for type or charge, 3 for x or f
+  count = # of per-atom values (e.g., 1 for type or charge, 3 for x or f)
    use count = 3 with "image" if want single image flag unpacked into xyz
  return atom-based values in 1d data, ordered by count, then by atom ID
-    e.g. x[0][0],x[0][1],x[0][2],x[1][0],x[1][1],x[1][2],x[2][0],...
+    (e.g., x[0][0],x[0][1],x[0][2],x[1][0],x[1][1],x[1][2],x[2][0],...)
    data must be pre-allocated by caller to correct length
    correct length = count*Natoms, as queried by get_natoms()
  method:
@ -3186,10 +3354,10 @@ void lammps_gather(void *handle, char *name, int type, int count, void *data)
         "d2_name" or "i2_name" for fix property/atom arrays with count > 1
         will return error if fix/compute isn't atom-based
  type = 0 for integer values, 1 for double values
-  count = # of per-atom values, e.g. 1 for type or charge, 3 for x or f
+  count = # of per-atom values (e.g., 1 for type or charge, 3 for x or f)
    use count = 3 with "image" if want single image flag unpacked into xyz
  return atom-based values in 1d data, ordered by count, then by atom ID
-    e.g. x[0][0],x[0][1],x[0][2],x[1][0],x[1][1],x[1][2],x[2][0],...
+    (e.g., x[0][0],x[0][1],x[0][2],x[1][0],x[1][1],x[1][2],x[2][0],...)
    data must be pre-allocated by caller to correct length
    correct length = count*Natoms, as queried by get_natoms()
  method:
@ -3438,10 +3606,10 @@ void lammps_gather_concat(void *handle, char *name, int type, int count, void *d
         "d2_name" or "i2_name" for fix property/atom arrays with count > 1
         will return error if fix/compute isn't atom-based
  type = 0 for integer values, 1 for double values
-  count = # of per-atom values, e.g. 1 for type or charge, 3 for x or f
+  count = # of per-atom values (e.g., 1 for type or charge, 3 for x or f)
    use count = 3 with "image" if want single image flag unpacked into xyz
  return atom-based values in 1d data, ordered by count, then by atom ID
-    e.g. x[0][0],x[0][1],x[0][2],x[1][0],x[1][1],x[1][2],x[2][0],...
+    (e.g., x[0][0],x[0][1],x[0][2],x[1][0],x[1][1],x[1][2],x[2][0],...)
    data must be pre-allocated by caller to correct length
    correct length = count*Natoms, as queried by get_natoms()
  method:
@ -3465,11 +3633,12 @@ void lammps_gather_subset(void *handle, char *name,
    int i,j,m,offset,ltype;
    tagint id;
-    // error if tags are not defined or not consecutive
+    // error if tags are not defined or no atom map
    int flag = 0;
    if (lmp->atom->tag_enable == 0) flag = 1;
    if (lmp->atom->natoms > MAXSMALLINT) flag = 1;
    if (lmp->atom->map_style == Atom::MAP_NONE) flag = 1;
    if (flag) {
      if (lmp->comm->me == 0)
        lmp->error->warning(FLERR,"Library error in lammps_gather_subset");
@ -3686,10 +3855,10 @@ void lammps_gather_subset(void *handle, char *name,
         "d2_name" or "i2_name" for fix property/atom arrays with count > 1
         will return error if fix/compute isn't atom-based
  type = 0 for integer values, 1 for double values
-  count = # of per-atom values, e.g. 1 for type or charge, 3 for x or f
+  count = # of per-atom values (e.g., 1 for type or charge, 3 for x or f)
    use count = 3 with "image" if want single image flag unpacked into xyz
  return atom-based values in 1d data, ordered by count, then by atom ID
-    e.g. x[0][0],x[0][1],x[0][2],x[1][0],x[1][1],x[1][2],x[2][0],...
+    (e.g., x[0][0],x[0][1],x[0][2],x[1][0],x[1][1],x[1][2],x[2][0],...)
    data must be pre-allocated by caller to correct length
    correct length = count*Natoms, as queried by get_natoms()
  method:
@ -3904,12 +4073,12 @@ void lammps_scatter(void *handle, char *name, int type, int count, void *data)
          "f_fix", "c_compute" for fixes / computes
          will return error if fix/compute doesn't isn't atom-based
   type = 0 for integer values, 1 for double values
-   count = # of per-atom values, e.g. 1 for type or charge, 3 for x or f
+   count = # of per-atom values (e.g., 1 for type or charge, 3 for x or f)
     use count = 3 with "image" for xyz to be packed into single image flag
   ndata = # of atoms in ids and data (could be all atoms)
   ids = list of Ndata atom IDs to scatter data to
   data = atom-based values in 1d data, ordered by count, then by atom ID
-     e.g. x[0][0],x[0][1],x[0][2],x[1][0],x[1][1],x[1][2],x[2][0],...
+     (e.g., x[0][0],x[0][1],x[0][2],x[1][0],x[1][1],x[1][2],x[2][0],...)
     data must be correct length = count*Ndata
   method:
     loop over Ndata, if I own atom ID, set its values from data
@ -4152,8 +4321,8 @@ boundaries atoms will be wrapped back into the simulation cell and its
 image flags adjusted accordingly, unless explicit image flags are
 provided.
-The function returns the number of atoms created or -1 on failure, e.g.
+The function returns the number of atoms created or -1 on failure (e.g.,
-when called before as box has been created.
+when called before as box has been created).
 Coordinates and velocities have to be given in a 1d-array in the order
 X(1),Y(1),Z(1),X(2),Y(2),Z(2),...,X(N),Y(N),Z(N).
@ -4358,7 +4527,7 @@ int lammps_find_fix_neighlist(void *handle, const char *id, int reqid) {
 *                 multiple requests from the same compute
 * \return         return neighbor list index if found, otherwise -1 */
-int lammps_find_compute_neighlist(void* handle, const char *id, int reqid) {
+int lammps_find_compute_neighlist(void *handle, const char *id, int reqid) {
  auto lmp = (LAMMPS *) handle;
  auto compute = lmp->modify->get_compute_by_id(id);
  if (!compute) return -1;
@ -4511,7 +4680,7 @@ int lammps_config_has_mpi_support()
 * files via a pipe to gzip or similar compression programs
 \verbatim embed:rst
-Several LAMMPS commands (e.g. :doc:`read_data`, :doc:`write_data`,
+Several LAMMPS commands (e.g., :doc:`read_data`, :doc:`write_data`,
 :doc:`dump styles atom, custom, and xyz <dump>`) support reading and
 writing compressed files via creating a pipe to the ``gzip`` program.
 This function checks whether this feature was :ref:`enabled at compile
@ -4531,8 +4700,8 @@ int lammps_config_has_gzip_support() {
 \verbatim embed:rst
 The LAMMPS :doc:`dump style image <dump_image>` supports writing multiple
-image file formats.  Most of them need, however, support from an external
+image file formats.  Most of them, however, need support from an external
-library and using that has to be :ref:`enabled at compile time <graphics>`.
+library, and using that has to be :ref:`enabled at compile time <graphics>`.
 This function checks whether support for the `PNG image file format
 <https://en.wikipedia.org/wiki/Portable_Network_Graphics>`_ is available
 in the current LAMMPS library.
@ -4550,8 +4719,8 @@ int lammps_config_has_png_support() {
 \verbatim embed:rst
 The LAMMPS :doc:`dump style image <dump_image>` supports writing multiple
-image file formats.  Most of them need, however, support from an external
+image file formats.  Most of them, however, need support from an external
-library and using that has to be :ref:`enabled at compile time <graphics>`.
+library, and using that has to be :ref:`enabled at compile time <graphics>`.
 This function checks whether support for the `JPEG image file format
 <https://jpeg.org/jpeg/>`_ is available in the current LAMMPS library.
 \endverbatim
@ -4582,14 +4751,14 @@ int lammps_config_has_ffmpeg_support() {
 /* ---------------------------------------------------------------------- */
-/** Check whether LAMMPS errors will throw a C++ exception
+/** Check whether LAMMPS errors will throw C++ exceptions.
 *
 \verbatim embed:rst
-In case of errors LAMMPS will either abort or throw a C++ exception.
+In case of an error, LAMMPS will either abort or throw a C++ exception.
 The latter has to be :ref:`enabled at compile time <exceptions>`.
 This function checks if exceptions were enabled.
-When using the library interface and C++ exceptions are enabled,
+When using the library interface with C++ exceptions enabled,
 the library interface functions will "catch" them and the
 error status can then be checked by calling
 :cpp:func:`lammps_has_error` and the most recent error message
@ -4608,10 +4777,10 @@ int lammps_config_has_exceptions() {
 /* ---------------------------------------------------------------------- */
-/** Check if a specific package has been included in LAMMPS
+/** Check whether a specific package has been included in LAMMPS
 *
 \verbatim embed:rst
-This function checks if the LAMMPS library in use includes the
+This function checks whether the LAMMPS library in use includes the
 specific :doc:`LAMMPS package <Packages>` provided as argument.
 \endverbatim
 *
@ -5183,8 +5352,8 @@ data structures can change as well as the order of atom as they migrate
 between MPI processes because of the domain decomposition
 parallelization, this function should be always called immediately
 before the forces are going to be set to get an up-to-date pointer.
- You can use e.g. :cpp:func:`lammps_get_natoms` to obtain the number
+You can use, for example, :cpp:func:`lammps_extract_setting` to obtain the
-of local atoms `nlocal` and then assume the dimensions of the returned
+number of local atoms `nlocal` and then assume the dimensions of the returned
 force array as ``double force[nlocal][3]``.
 This is an alternative to the callback mechanism in fix external set up by
@ -5470,7 +5639,7 @@ void lammps_fix_external_set_vector_length(void *handle, const char *id, int len
 This is a companion function to :cpp:func:`lammps_set_fix_external_callback` and
 :cpp:func:`lammps_fix_external_get_force` to set the values of a global vector of
 properties that will be stored with the fix.  And can be accessed from
-within LAMMPS input commands (e.g. fix ave/time or variables) when used
+within LAMMPS input commands (e.g., fix ave/time or variables) when used
 in a vector context.
 This function needs to be called **after** a call to
@ -5568,7 +5737,7 @@ int lammps_is_running(void *handle)
  return lmp->update->whichflag;
 }
-/** Force a timeout to cleanly stop an ongoing run
+/** Force a timeout to stop an ongoing run cleanly.
 *
 * This function can be used from signal handlers or multi-threaded
 * applications to cleanly terminate an ongoing run.
@ -5594,9 +5763,9 @@ has thrown a :ref:`C++ exception <exceptions>`.
 .. note::
   This function will always report "no error" when the LAMMPS library
-   has been compiled without ``-DLAMMPS_EXCEPTIONS`` which turns fatal
+   has been compiled without ``-DLAMMPS_EXCEPTIONS``, which turns fatal
-   errors aborting LAMMPS into a C++ exceptions. You can use the library
+   errors aborting LAMMPS into C++ exceptions. You can use the library
-   function :cpp:func:`lammps_config_has_exceptions` to check if this is
+   function :cpp:func:`lammps_config_has_exceptions` to check whether this is
   the case.
 \endverbatim
 *
@ -5605,8 +5774,8 @@ has thrown a :ref:`C++ exception <exceptions>`.
 */
 int lammps_has_error(void *handle) {
 #ifdef LAMMPS_EXCEPTIONS
-  LAMMPS *  lmp = (LAMMPS *) handle;
+  LAMMPS *lmp = (LAMMPS *) handle;
-  Error * error = lmp->error;
+  Error *error = lmp->error;
  return (error->get_last_error().empty()) ? 0 : 1;
 #else
  return 0;
@ -5626,15 +5795,15 @@ error message is longer, it will be truncated accordingly.  The return
 value of the function corresponds to the kind of error: a "1" indicates
 an error that occurred on all MPI ranks and is often recoverable, while
 a "2" indicates an abort that would happen only in a single MPI rank
-and thus may not be recoverable as other MPI ranks may be waiting on
+and thus may not be recoverable, as other MPI ranks may be waiting on
 the failing MPI ranks to send messages.
 .. note::
   This function will do nothing when the LAMMPS library has been
-   compiled without ``-DLAMMPS_EXCEPTIONS`` which turns errors aborting
+   compiled without ``-DLAMMPS_EXCEPTIONS``, which turns errors aborting
-   LAMMPS into a C++ exceptions.  You can use the library function
+   LAMMPS into C++ exceptions.  You can use the library function
-   :cpp:func:`lammps_config_has_exceptions` to check if this is the case.
+   :cpp:func:`lammps_config_has_exceptions` to check whether this is the case.
 \endverbatim
 *
 * \param  handle    pointer to a previously created LAMMPS instance cast to ``void *``.
--- a/src/library.h
+++ b/src/library.h
@ -40,7 +40,8 @@
 /** Data type constants for extracting data from atoms, computes and fixes
 *
- * Must be kept in sync with the equivalent constants in lammps/constants.py */
+ * Must be kept in sync with the equivalent constants in python/lammps/constants.py,
 * fortran/lammps.f90, and tools/swig/lammps.i */
 enum _LMP_DATATYPE_CONST {
  LAMMPS_INT = 0,       /*!< 32-bit integer (array) */
@ -54,7 +55,8 @@ enum _LMP_DATATYPE_CONST {
 /** Style constants for extracting data from computes and fixes.
 *
- * Must be kept in sync with the equivalent constants in lammps/constants.py */
+ * Must be kept in sync with the equivalent constants in python/lammps/constants.py,
 * fortran/lammps.f90, and tools/swig/lammps.i */
 enum _LMP_STYLE_CONST {
  LMP_STYLE_GLOBAL = 0, /*!< return global data */
@ -64,7 +66,8 @@ enum _LMP_STYLE_CONST {
 /** Type and size constants for extracting data from computes and fixes.
 *
- * Must be kept in sync with the equivalent constants in lammps/constants.py */
+ * Must be kept in sync with the equivalent constants in python/lammps/constants.py,
 * fortran/lammps.f90, and tools/swig/lammps.i */
 enum _LMP_TYPE_CONST {
  LMP_TYPE_SCALAR = 0, /*!< return scalar */
@ -77,7 +80,8 @@ enum _LMP_TYPE_CONST {
 /** Error codes to select the suitable function in the Error class
 *
- * Must be kept in sync with the equivalent constants in lammps/constants.py */
+ * Must be kept in sync with the equivalent constants in python/lammps/constants.py,
 * fortran/lammps.f90, and tools/swig/lammps.i */
 enum _LMP_ERROR_CONST {
  LMP_ERROR_WARNING = 0, /*!< call Error::warning() */
@ -87,6 +91,18 @@ enum _LMP_ERROR_CONST {
  LMP_ERROR_UNIVERSE = 8 /*!< error on Comm::universe */
 };
 /** Variable style constants for extracting data from variables.
 *
 * Must be kept in sync with the equivalent constants in python/lammps/constants.py,
 * fortran/lammps.f90, and tools/swig/lammps.i */
 enum _LMP_VAR_CONST {
  LMP_VAR_EQUAL = 0,  /*!< compatible with equal-style variables */
  LMP_VAR_ATOM = 1,   /*!< compatible with atom-style variables */
  LMP_VAR_VECTOR = 2, /*!< compatible with vector-style variables */
  LMP_VAR_STRING = 3  /*!< return value will be a string (catch-all) */
 };
 /* Ifdefs to allow this file to be included in C and C++ programs */
 #ifdef __cplusplus
@ -153,6 +169,7 @@ void *lammps_extract_atom(void *handle, const char *name);
 void *lammps_extract_compute(void *handle, const char *, int, int);
 void *lammps_extract_fix(void *handle, const char *, int, int, int, int);
 void *lammps_extract_variable(void *handle, const char *, const char *);
 int lammps_extract_variable_datatype(void *handle, const char *name);
 int lammps_set_variable(void *, char *, char *);
 /* ----------------------------------------------------------------------
--- a/tools/coding_standard/whitespace.py
+++ b/tools/coding_standard/whitespace.py
@ -24,11 +24,13 @@ include:
    - cmake/**
    - doc
    - doc/src/**
-    - python
+    - fortran/**
    - python/**
    - src/**
    - lib/**
    - tools/coding_standard
    - tools/python
    - unittest/**
 exclude:
    - lib/colvars/Install.py
    - lib/gpu/geryon/file_to_cstr.sh
--- a/Show More
+++ b/Show More