From 348ee5299e5d5d719652d5c52c7d8ae9cfdba3f8 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Wed, 6 Apr 2022 19:11:55 -0400
Subject: [PATCH 1/7] implement a dump style yaml

---
 doc/src/dump.rst             |  79 +++++++++++++++++--
 doc/src/dump_modify.rst      |   4 +-
 src/.gitignore               |   2 +
 src/EXTRA-DUMP/dump_yaml.cpp | 143 +++++++++++++++++++++++++++++++++++
 src/EXTRA-DUMP/dump_yaml.h   |  89 ++++++++++++++++++++++
 src/thermo.h                 |   1 +
 6 files changed, 309 insertions(+), 9 deletions(-)
 create mode 100644 src/EXTRA-DUMP/dump_yaml.cpp
 create mode 100644 src/EXTRA-DUMP/dump_yaml.h
diff --git a/doc/src/dump.rst b/doc/src/dump.rst
index c94813a41e..fdf31bbd06 100644
--- a/doc/src/dump.rst
+++ b/doc/src/dump.rst
@@ -36,7 +36,7 @@ Syntax
 
 * ID = user-assigned name for the dump
 * group-ID = ID of the group of atoms to be dumped
-* style = *atom* or *atom/gz* or *atom/zstd or *atom/mpiio* or *cfg* or *cfg/gz* or *cfg/zstd* or *cfg/mpiio* or *custom* or *custom/gz* or *custom/zstd* or *custom/mpiio* or *dcd* or *h5md* or *image* or *local* or *local/gz* or *local/zstd* or *molfile* or *movie* or *netcdf* or *netcdf/mpiio* or *vtk* or *xtc* or *xyz* or *xyz/gz* or *xyz/zstd* or *xyz/mpiio*
+* style = *atom* or *atom/gz* or *atom/zstd or *atom/mpiio* or *cfg* or *cfg/gz* or *cfg/zstd* or *cfg/mpiio* or *custom* or *custom/gz* or *custom/zstd* or *custom/mpiio* or *dcd* or *h5md* or *image* or *local* or *local/gz* or *local/zstd* or *molfile* or *movie* or *netcdf* or *netcdf/mpiio* or *vtk* or *xtc* or *xyz* or *xyz/gz* or *xyz/zstd* or *xyz/mpiio* or *yaml*
 * N = dump every this many timesteps
 * file = name of file to write dump info to
 * args = list of arguments for a particular style
@@ -68,8 +68,9 @@ Syntax
        *xyz/gz* args = none
        *xyz/zstd* args = none
        *xyz/mpiio* args = none
+       *yaml* args = same as *custom* args, see below
 
-* *custom* or *custom/gz* or *custom/zstd* or *custom/mpiio* or *netcdf* or *netcdf/mpiio* args = list of atom attributes
+* *custom* or *custom/gz* or *custom/zstd* or *custom/mpiio* or *netcdf* or *netcdf/mpiio* or *yaml* args = list of atom attributes
 
   .. parsed-literal::
 
@@ -386,6 +387,70 @@ from using the (numerical) atom type to an element name (or some
 other label). This will help many visualization programs to guess
 bonds and colors.
 
+Dump style *yaml* has the same command syntax as style *custom* and
+writes YAML format files that can be easily parsed by a variety of data
+processing tools and programming languages.  Each timestep will be
+written as a YAML "document" (i.e. starts with "---" and ends with
+"...").  The style supports writing one file per timestep through the
+"\*" wildcard but not multi-processor outputs with the "%" token in the
+filename.  In addition to per-atom data, :doc:`thermo <thermo>` data can
+be included in the *yaml* style dump file using the :doc:`dump_modify
+thermo yes <dump_modify>`. The data included in the dump file uses the
+"thermo" tag and is otherwise identical to data specified by the
+:doc:`thermo_style <thermo_style>` command.
+
+Below is an example for a YAML format dump created by the following commands.
+
+.. code-block:: LAMMPS
+
+   dump out all yaml 100 dump.yaml id type x y z vx vy vz ix iy iz
+   dump_modify out time yes units yes thermo yes format 1 %5d format "% 10.6e"
+
+The tags "time", "units", and "thermo" are optional and enabled by the
+dump_modify command. The list under the "box" tag has 3 lines for
+orthogonal boxes and 4 lines with triclinic boxes, where the first 3 are
+the box boundaries and the 4th the three tilt factors (xy, xz, yz).  The
+"thermo" data follows the format of the *yaml* thermo style.  The
+"keywords" tag lists the per-atom properties contained in the "data"
+columns, which contain a list with one line per atom.  The keywords may
+be renamed using the dump_modify command same as for the *custom* dump
+style.
+
+.. code-block:: yaml
+
+   ---
+   timestep: 0
+   units: lj
+   time: 0
+   natoms: 4000
+   boundary: [ p, p, p, p, p, p, ]
+   thermo:
+     - keywords: [ Step, Temp, E_pair, E_mol, TotEng, Press, ]
+     - data: [ 0, 0, -27093.472213010766, 0, 0, 0, ]
+   box:
+     - [ 0, 16.795961913825074 ]
+     - [ 0, 16.795961913825074 ]
+     - [ 0, 16.795961913825074 ]
+     - [ 0, 0, 0 ]
+   keywords: [ id, type, x, y, z, vx, vy, vz, ix, iy, iz,  ]
+   data:
+     - [     1 , 1 ,  0.000000e+00 ,  0.000000e+00 ,  0.000000e+00 ,  -1.841579e-01 , -9.710036e-01 , -2.934617e+00 , 0 , 0 , 0, ]
+     - [     2 , 1 ,  8.397981e-01 ,  8.397981e-01 ,  0.000000e+00 ,  -1.799591e+00 ,  2.127197e+00 ,  2.298572e+00 , 0 , 0 , 0, ]
+     - [     3 , 1 ,  8.397981e-01 ,  0.000000e+00 ,  8.397981e-01 ,  -1.807682e+00 , -9.585130e-01 ,  1.605884e+00 , 0 , 0 , 0, ]
+
+     [...]
+   ...
+   ---
+   timestep: 100
+   units: lj
+   time: 0.5
+
+     [...]
+
+   ...
+
+----------
+
 Note that *atom*, *custom*, *dcd*, *xtc*, and *xyz* style dump files
 can be read directly by `VMD <http://www.ks.uiuc.edu/Research/vmd>`_, a
 popular molecular viewing program.
@@ -427,9 +492,9 @@ If a "%" character appears in the filename, then each of P processors
 writes a portion of the dump file, and the "%" character is replaced
 with the processor ID from 0 to P-1.  For example, tmp.dump.% becomes
 tmp.dump.0, tmp.dump.1, ... tmp.dump.P-1, etc.  This creates smaller
-files and can be a fast mode of output on parallel machines that
-support parallel I/O for output. This option is not available for the
-*dcd*, *xtc*, and *xyz* styles.
+files and can be a fast mode of output on parallel machines that support
+parallel I/O for output. This option is **not** available for the *dcd*,
+*xtc*, *xyz*, and *yaml* styles.
 
 By default, P = the number of processors meaning one file per
 processor, but P can be set to a smaller value via the *nfile* or
@@ -722,8 +787,8 @@ are part of the MPIIO package.  They are only enabled if LAMMPS was
 built with that package.  See the :doc:`Build package <Build_package>`
 doc page for more info.
 
-The *xtc* and *dcd* styles are part of the EXTRA-DUMP package.  They
-are only enabled if LAMMPS was built with that package.  See the
+The *xtc*, *dcd* and *yaml* styles are part of the EXTRA-DUMP package.
+They are only enabled if LAMMPS was built with that package.  See the
 :doc:`Build package <Build_package>` page for more info.
 
 Related commands
diff --git a/doc/src/dump_modify.rst b/doc/src/dump_modify.rst
index 4bc852ea36..9e3f7f738d 100644
--- a/doc/src/dump_modify.rst
+++ b/doc/src/dump_modify.rst
@@ -712,8 +712,8 @@ run, this option is ignored since the output is already balanced.
 
 ----------
 
-The *thermo* keyword only applies the dump *netcdf* style.  It
-triggers writing of :doc:`thermo <thermo>` information to the dump file
+The *thermo* keyword only applies the dump styles *netcdf* and *yaml*.
+It triggers writing of :doc:`thermo <thermo>` information to the dump file
 alongside per-atom data.  The values included in the dump file are
 identical to the values specified by :doc:`thermo_style <thermo_style>`.
 
diff --git a/src/.gitignore b/src/.gitignore
index 8803d8a7e3..30eb498043 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -600,6 +600,8 @@
 /dump_xyz_mpiio.h
 /dump_xyz_zstd.cpp
 /dump_xyz_zstd.h
+/dump_yaml.cpp
+/dump_yaml.h
 /dynamical_matrix.cpp
 /dynamical_matrix.h
 /ewald.cpp
diff --git a/src/EXTRA-DUMP/dump_yaml.cpp b/src/EXTRA-DUMP/dump_yaml.cpp
new file mode 100644
index 0000000000..d6bf27885d
--- /dev/null
+++ b/src/EXTRA-DUMP/dump_yaml.cpp
@@ -0,0 +1,143 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "dump_yaml.h"
+
+#include "comm.h"
+#include "domain.h"
+#include "error.h"
+#include "output.h"
+#include "thermo.h"
+#include "update.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+DumpYAML::DumpYAML(class LAMMPS *_lmp, int narg, char **args) :
+    DumpCustom(_lmp, narg, args), thermo(false)
+{
+  buffer_allow = 0;
+  buffer_flag = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void DumpYAML::init_style()
+{
+  if (binary) error->all(FLERR, "Dump style yaml does not support binary output");
+  if (multiproc) error->all(FLERR, "Dump style yaml does not support multi-processor output");
+
+  DumpCustom::init_style();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void DumpYAML::write()
+{
+  // temporarily enable so write_header() is called
+  // by all MPI ranks to compute thermo data
+  if (thermo) filewriter = 1;
+
+  Dump::write();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void DumpYAML::write_header(bigint ndump)
+{
+  std::string thermo_data;
+  if (thermo) {
+    Thermo *th = output->thermo;
+    thermo_data += "thermo:\n  - keywords: [ ";
+    for (int i = 0; i < th->nfield; ++i) thermo_data += fmt::format("{}, ", th->keyword[i]);
+    thermo_data += "]\n  - data: [ ";
+
+    for (int i = 0; i < th->nfield; ++i) {
+      th->call_vfunc(i);
+      if (th->vtype[i] == Thermo::FLOAT)
+        thermo_data += fmt::format("{}, ", th->dvalue);
+      else if (th->vtype[i] == Thermo::INT)
+        thermo_data += fmt::format("{}, ", th->ivalue);
+      else if (th->vtype[i] == Thermo::BIGINT)
+        thermo_data += fmt::format("{}, ", th->bivalue);
+    }
+    thermo_data += "]\n";
+    MPI_Barrier(world);
+  }
+
+  if (comm->me == 0) {
+    const std::string boundary(boundstr);
+    fmt::print(fp, "---\ntimestep: {}\n", update->ntimestep);
+    if (unit_flag) fmt::print(fp, "units: {}\n", update->unit_style);
+    if (time_flag) fmt::print(fp, "time: {:.16g}\n", compute_time());
+
+    fmt::print(fp, "natoms: {}\n", ndump);
+    fputs("boundary: [ ", fp);
+    for (const auto bflag : boundary) {
+      if (bflag == ' ') continue;
+      fmt::print(fp, "{}, ", bflag);
+    }
+    fputs("]\n", fp);
+
+    if (thermo) fmt::print(fp, thermo_data);
+
+    fmt::print(fp, "box:\n  - [ {}, {} ]\n", boxxlo, boxxhi);
+    fmt::print(fp, "  - [ {}, {} ]\n", boxylo, boxyhi);
+    fmt::print(fp, "  - [ {}, {} ]\n", boxzlo, boxzhi);
+    if (domain->triclinic) fmt::print(fp, "  - [ {}, {}, {} ]\n", boxxy, boxxz, boxyz);
+
+    fmt::print(fp, "keywords: [ ");
+    for (const auto &item : utils::split_words(columns)) fmt::print(fp, "{}, ", item);
+    fputs(" ]\ndata:\n", fp);
+  } else    // reset so that the remainder of the output is not multi-proc
+    filewriter = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void DumpYAML::write_data(int n, double *mybuf)
+{
+  int m = 0;
+  for (int i = 0; i < n; i++) {
+    fputs("  - [ ", fp);
+    for (int j = 0; j < nfield; j++) {
+      if (vtype[j] == Dump::INT)
+        fprintf(fp, vformat[j], static_cast<int>(mybuf[m]));
+      else if (vtype[j] == Dump::DOUBLE)
+        fprintf(fp, vformat[j], mybuf[m]);
+      else if (vtype[j] == Dump::STRING)
+        fprintf(fp, vformat[j], typenames[(int) mybuf[m]]);
+      else if (vtype[j] == Dump::BIGINT)
+        fprintf(fp, vformat[j], static_cast<bigint>(mybuf[m]));
+      m++;
+      fputs(", ", fp);
+    }
+    fputs("]\n", fp);
+  }
+  fputs("...\n", fp);
+}
+
+/* ---------------------------------------------------------------------- */
+
+int DumpYAML::modify_param(int narg, char **arg)
+{
+  int n = DumpCustom::modify_param(narg, arg);
+  if (n > 0) return n;
+
+  if (strcmp(arg[0], "thermo") == 0) {
+    if (narg < 2) error->all(FLERR, "expected 'yes' or 'no' after 'thermo' keyword.");
+    thermo = utils::logical(FLERR, arg[1], false, lmp) == 1;
+    return 2;
+  } else
+    return 0;
+}
diff --git a/src/EXTRA-DUMP/dump_yaml.h b/src/EXTRA-DUMP/dump_yaml.h
new file mode 100644
index 0000000000..e9717ea0b3
--- /dev/null
+++ b/src/EXTRA-DUMP/dump_yaml.h
@@ -0,0 +1,89 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef DUMP_CLASS
+// clang-format off
+DumpStyle(yaml,DumpYAML);
+// clang-format on
+#else
+
+#ifndef LMP_DUMP_YAML_H
+#define LMP_DUMP_YAML_H
+
+#include "dump_custom.h"
+
+namespace LAMMPS_NS {
+
+class DumpYAML : public DumpCustom {
+ public:
+  DumpYAML(class LAMMPS *, int, char **);
+
+protected:
+  bool thermo;
+
+  void init_style() override;
+  void write() override;
+  void write_header(bigint) override;
+  void write_data(int, double *) override;
+
+  int modify_param(int, char **) override;
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+E: Cannot open dump file %s
+
+The output file for the dump command cannot be opened.  Check that the
+path and name are correct.
+
+E: Too much per-proc info for dump
+
+Number of local atoms times number of columns must fit in a 32-bit
+integer for dump.
+
+E: Dump_modify format line is too short
+
+UNDOCUMENTED
+
+E: Could not find dump custom compute ID
+
+Self-explanatory.
+
+E: Could not find dump custom fix ID
+
+Self-explanatory.
+
+E: Dump custom and fix not computed at compatible times
+
+The fix must produce per-atom quantities on timesteps that dump custom
+needs them.
+
+E: Could not find dump custom variable name
+
+Self-explanatory.
+
+E: Region ID for dump custom does not exist
+
+Self-explanatory.
+
+U: Dump_modify format string is too short
+
+There are more fields to be dumped in a line of output than your
+format string specifies.
+
+*/
diff --git a/src/thermo.h b/src/thermo.h
index 9d0fefbc56..c36eac3d8f 100644
--- a/src/thermo.h
+++ b/src/thermo.h
@@ -23,6 +23,7 @@ class Thermo : protected Pointers {
   friend class MinCG;              // accesses compute_pe
   friend class DumpNetCDF;         // accesses thermo properties
   friend class DumpNetCDFMPIIO;    // accesses thermo properties
+  friend class DumpYAML;           // accesses thermo properties
 
  public:
   char *style;

From 601bdadf447a9a6f5a6337d8e94653858c27ed33 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Thu, 7 Apr 2022 02:05:05 -0400
Subject: [PATCH 2/7] update for recent changes in thermo output

---
 python/lammps/formats.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/python/lammps/formats.py b/python/lammps/formats.py
index 641e17be3e..f7a7a4eb83 100644
--- a/python/lammps/formats.py
+++ b/python/lammps/formats.py
@@ -46,14 +46,15 @@ class LogFile:
         for line in f:
             if "ERROR" in line or "exited on signal" in line:
                 self.errors.append(line)
-            elif line.startswith('Step '):
+
+            elif re.match(r'^ *Step ', line):
                 in_thermo = True
                 in_data_section = True
                 keys = line.split()
                 current_run = {}
                 for k in keys:
                     current_run[k] = []
-            elif line.startswith('---------------- Step'):
+            elif re.match(r'^------* Step ', line):
                 if not in_thermo:
                    current_run = {'Step': [], 'CPU': []}
                 in_thermo = True

From 98b908387f083735884f89d79c1dbb25d2e715b0 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Thu, 7 Apr 2022 02:05:47 -0400
Subject: [PATCH 3/7] add unit test for yaml style thermo output and updated
 logfile class

---
 examples/README                     |   1 +
 examples/yaml/in.yaml               |  37 +++++++
 examples/yaml/log.7Apr22.yaml.g++.1 | 151 ++++++++++++++++++++++++++++
 python/lammps/formats.py            |  38 +++++--
 unittest/python/python-formats.py   |  22 ++++
 5 files changed, 243 insertions(+), 6 deletions(-)
 create mode 100644 examples/yaml/in.yaml
 create mode 100644 examples/yaml/log.7Apr22.yaml.g++.1

diff --git a/examples/README b/examples/README
index 0c09b6d847..d9637af5c2 100644
--- a/examples/README
+++ b/examples/README
@@ -118,6 +118,7 @@ ttm:      two-temeperature model examples
 vashishta: models using the Vashishta potential
 voronoi:  Voronoi tesselation via compute voronoi/atom command
 wall:     use of reflective walls with different stochastic models
+yaml:     demonstrates use of yaml thermo and dump styles
 
 Here is how you might run and visualize one of the sample problems:
 
diff --git a/examples/yaml/in.yaml b/examples/yaml/in.yaml
new file mode 100644
index 0000000000..28660751c8
--- /dev/null
+++ b/examples/yaml/in.yaml
@@ -0,0 +1,37 @@
+# 3d Lennard-Jones melt
+
+variable        x index 1
+variable        y index 1
+variable        z index 1
+
+variable        xx equal 20*$x
+variable        yy equal 20*$y
+variable        zz equal 20*$z
+
+units           lj
+atom_style      atomic
+
+lattice         fcc 0.8442
+region          box block 0 ${xx} 0 ${yy} 0 ${zz}
+create_box      1 box
+create_atoms    1 box
+mass            1 1.0
+
+velocity        all create 1.44 87287 loop geom
+
+pair_style      lj/cut 2.5
+pair_coeff      1 1 1.0 1.0 2.5
+
+neighbor        0.3 bin
+neigh_modify    delay 0 every 20 check no
+
+fix             1 all nve
+thermo_style    yaml
+thermo 10
+
+dump            1 all yaml 25 dump.yaml id type x y z ix iy iz vx vy vz
+dump_modify     1 sort id thermo yes units yes time yes format 1 %5d format float "% 12.8e" format int %2d
+
+run             100
+
+run             100
diff --git a/examples/yaml/log.7Apr22.yaml.g++.1 b/examples/yaml/log.7Apr22.yaml.g++.1
new file mode 100644
index 0000000000..0c39dbe6a3
--- /dev/null
+++ b/examples/yaml/log.7Apr22.yaml.g++.1
@@ -0,0 +1,151 @@
+LAMMPS (24 Mar 2022)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
+  using 1 OpenMP thread(s) per MPI task
+# 3d Lennard-Jones melt
+
+variable        x index 1
+variable        y index 1
+variable        z index 1
+
+variable        xx equal 20*$x
+variable        xx equal 20*1
+variable        yy equal 20*$y
+variable        yy equal 20*1
+variable        zz equal 20*$z
+variable        zz equal 20*1
+
+units           lj
+atom_style      atomic
+
+lattice         fcc 0.8442
+Lattice spacing in x,y,z = 1.6795962 1.6795962 1.6795962
+region          box block 0 ${xx} 0 ${yy} 0 ${zz}
+region          box block 0 20 0 ${yy} 0 ${zz}
+region          box block 0 20 0 20 0 ${zz}
+region          box block 0 20 0 20 0 20
+create_box      1 box
+Created orthogonal box = (0 0 0) to (33.591924 33.591924 33.591924)
+  1 by 1 by 1 MPI processor grid
+create_atoms    1 box
+Created 32000 atoms
+  using lattice units in orthogonal box = (0 0 0) to (33.591924 33.591924 33.591924)
+  create_atoms CPU = 0.003 seconds
+mass            1 1.0
+
+velocity        all create 1.44 87287 loop geom
+
+pair_style      lj/cut 2.5
+pair_coeff      1 1 1.0 1.0 2.5
+
+neighbor        0.3 bin
+neigh_modify    delay 0 every 20 check no
+
+fix             1 all nve
+thermo_style    yaml
+thermo 10
+
+dump            1 all yaml 25 dump.yaml id type x y z ix iy iz vx vy vz
+dump_modify     1 sort id thermo yes units yes time yes format 1 %5d format float "% 12.8e" format int %2d
+
+run             100
+  generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
+Neighbor list info ...
+  update every 20 steps, delay 0 steps, check no
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 2.8
+  ghost atom cutoff = 2.8
+  binsize = 1.4, bins = 24 24 24
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cut, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 20.56 | 20.56 | 20.56 Mbytes
+---
+keywords: [Step, Temp, KinEng, PotEng, E_bond, E_angle, E_dihed, E_impro, E_vdwl, E_coul, E_long, Press, ]
+data:
+  - [0, 1.44000000000001, 2.15993250000001, -6.77336805323422, 0, 0, 0, 0, -6.77336805323422, 0, 0, -5.01970725908556, ]
+  - [10, 1.12539487029313, 1.68803955255514, -6.30005271976029, 0, 0, 0, 0, -6.30005271976029, 0, 0, -2.55968522600129, ]
+  - [20, 0.625793798302192, 0.938661363368992, -5.55655653922756, 0, 0, 0, 0, -5.55655653922756, 0, 0, 0.973517658007722, ]
+  - [30, 0.745927295413064, 1.11885597777762, -5.73951278150759, 0, 0, 0, 0, -5.73951278150759, 0, 0, 0.339284096694852, ]
+  - [40, 0.731026217827733, 1.09650505988764, -5.71764564663628, 0, 0, 0, 0, -5.71764564663628, 0, 0, 0.388973418756238, ]
+  - [50, 0.740091517740786, 1.11010258482128, -5.73150426762886, 0, 0, 0, 0, -5.73150426762886, 0, 0, 0.335273324523691, ]
+  - [60, 0.750500641591031, 1.12571578266897, -5.74713299283555, 0, 0, 0, 0, -5.74713299283555, 0, 0, 0.26343139026926, ]
+  - [70, 0.755436366857812, 1.13311913920702, -5.75480059117447, 0, 0, 0, 0, -5.75480059117447, 0, 0, 0.224276619217515, ]
+  - [80, 0.759974280364828, 1.13992579675285, -5.76187162670983, 0, 0, 0, 0, -5.76187162670983, 0, 0, 0.191626237124102, ]
+  - [90, 0.760464250735042, 1.14066072934081, -5.76280209529731, 0, 0, 0, 0, -5.76280209529731, 0, 0, 0.189478083345243, ]
+  - [100, 0.757453103239936, 1.13614414924569, -5.75850548601596, 0, 0, 0, 0, -5.75850548601596, 0, 0, 0.207261053624723, ]
+...
+Loop time of 1.89046 on 1 procs for 100 steps with 32000 atoms
+
+Performance: 22851.622 tau/day, 52.897 timesteps/s
+99.6% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 1.2896     | 1.2896     | 1.2896     |   0.0 | 68.22
+Neigh   | 0.17687    | 0.17687    | 0.17687    |   0.0 |  9.36
+Comm    | 0.014543   | 0.014543   | 0.014543   |   0.0 |  0.77
+Output  | 0.37678    | 0.37678    | 0.37678    |   0.0 | 19.93
+Modify  | 0.028638   | 0.028638   | 0.028638   |   0.0 |  1.51
+Other   |            | 0.003975   |            |       |  0.21
+
+Nlocal:          32000 ave       32000 max       32000 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:          19657 ave       19657 max       19657 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    1.20283e+06 ave 1.20283e+06 max 1.20283e+06 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 1202833
+Ave neighs/atom = 37.588531
+Neighbor list builds = 5
+Dangerous builds not checked
+
+run             100
+  generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
+Per MPI rank memory allocation (min/avg/max) = 20.57 | 20.57 | 20.57 Mbytes
+---
+keywords: [Step, Temp, KinEng, PotEng, E_bond, E_angle, E_dihed, E_impro, E_vdwl, E_coul, E_long, Press, ]
+data:
+  - [100, 0.757453103239935, 1.13614414924569, -5.7585054860159, 0, 0, 0, 0, -5.7585054860159, 0, 0, 0.207261053624721, ]
+  - [110, 0.759322359337036, 1.13894794576996, -5.7614668389562, 0, 0, 0, 0, -5.7614668389562, 0, 0, 0.194314975399602, ]
+  - [120, 0.759372342462676, 1.13902291811546, -5.76149365656489, 0, 0, 0, 0, -5.76149365656489, 0, 0, 0.191600048851267, ]
+  - [130, 0.756833027516501, 1.13521406472659, -5.75777334823494, 0, 0, 0, 0, -5.75777334823494, 0, 0, 0.208792327853067, ]
+  - [140, 0.759725426691298, 1.13955252790757, -5.76208910746081, 0, 0, 0, 0, -5.76208910746081, 0, 0, 0.193895435346637, ]
+  - [150, 0.760545839455106, 1.14078310859643, -5.7633284876011, 0, 0, 0, 0, -5.7633284876011, 0, 0, 0.187959630462945, ]
+  - [160, 0.758404626168493, 1.13757138903589, -5.76023198892283, 0, 0, 0, 0, -5.76023198892283, 0, 0, 0.19692107984108, ]
+  - [170, 0.758880300638885, 1.13828487844424, -5.76103232235402, 0, 0, 0, 0, -5.76103232235402, 0, 0, 0.197653518549842, ]
+  - [180, 0.753691827878246, 1.13050241251294, -5.75304767384283, 0, 0, 0, 0, -5.75304767384283, 0, 0, 0.237041776410937, ]
+  - [190, 0.757361226563721, 1.13600633853809, -5.75852399133222, 0, 0, 0, 0, -5.75852399133222, 0, 0, 0.219529562657488, ]
+  - [200, 0.759531750132731, 1.13926202214831, -5.76188923485725, 0, 0, 0, 0, -5.76188923485725, 0, 0, 0.209105747192796, ]
+...
+Loop time of 1.93916 on 1 procs for 100 steps with 32000 atoms
+
+Performance: 22277.687 tau/day, 51.569 timesteps/s
+99.4% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 1.3292     | 1.3292     | 1.3292     |   0.0 | 68.55
+Neigh   | 0.18317    | 0.18317    | 0.18317    |   0.0 |  9.45
+Comm    | 0.013626   | 0.013626   | 0.013626   |   0.0 |  0.70
+Output  | 0.38206    | 0.38206    | 0.38206    |   0.0 | 19.70
+Modify  | 0.027034   | 0.027034   | 0.027034   |   0.0 |  1.39
+Other   |            | 0.004028   |            |       |  0.21
+
+Nlocal:          32000 ave       32000 max       32000 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:          19570 ave       19570 max       19570 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    1.19982e+06 ave 1.19982e+06 max 1.19982e+06 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 1199821
+Ave neighs/atom = 37.494406
+Neighbor list builds = 5
+Dangerous builds not checked
+Total wall time: 0:00:04
diff --git a/python/lammps/formats.py b/python/lammps/formats.py
index f7a7a4eb83..83d05dd9f7 100644
--- a/python/lammps/formats.py
+++ b/python/lammps/formats.py
@@ -14,14 +14,19 @@
 ################################################################################
 # LAMMPS output formats
 # Written by Richard Berger <richard.berger@temple.edu>
+# and Axel Kohlmeyer <akohlmey@gmail.com>
 ################################################################################
 
-import re
+import re, yaml
+try:
+  from yaml import CSafeLoader as Loader, CSafeDumper as Dumper
+except ImportError:
+    from yaml import SafeLoader as Loader, SafeDumper as Dumper
 
 class LogFile:
   """Reads LAMMPS log files and extracts the thermo information
 
-  It supports both the default thermo output style (including custom) and multi.
+  It supports the line, multi, and yaml thermo output styles.
 
   :param filename: path to log file
   :type  filename: str
@@ -33,11 +38,13 @@ class LogFile:
 
   STYLE_DEFAULT = 0
   STYLE_MULTI   = 1
+  STYLE_YAML    = 2
 
   def __init__(self, filename):
     alpha = re.compile(r'[a-df-zA-DF-Z]') # except e or E for floating-point numbers
     kvpairs = re.compile(r'([a-zA-Z_0-9]+)\s+=\s*([0-9\.eE\-]+)')
     style = LogFile.STYLE_DEFAULT
+    yamllog = ""
     self.runs = []
     self.errors = []
     with open(filename, 'rt') as f:
@@ -54,6 +61,24 @@ class LogFile:
                 current_run = {}
                 for k in keys:
                     current_run[k] = []
+
+            elif re.match(r'^(keywords:.*$|data:$|---$|  - \[.*\]$)', line):
+                style = LogFile.STYLE_YAML
+                yamllog += line;
+                current_run = {}
+
+            elif re.match(r'^\.\.\.$', line):
+                thermo = yaml.load(yamllog, Loader=Loader)
+                for k in thermo['keywords']:
+                    current_run[k] = []
+                for step in thermo['data']:
+                    icol = 0
+                    for k in thermo['keywords']:
+                        current_run[k].append(step[icol])
+                        icol += 1
+                self.runs.append(current_run)
+                yamllog = ""
+
             elif re.match(r'^------* Step ', line):
                 if not in_thermo:
                    current_run = {'Step': [], 'CPU': []}
@@ -65,28 +90,29 @@ class LogFile:
                 cpu  = float(str_cpu.split('=')[1].split()[0])
                 current_run["Step"].append(step)
                 current_run["CPU"].append(cpu)
+
             elif line.startswith('Loop time of'):
                 in_thermo = False
-                self.runs.append(current_run)
+                if style != LogFile.STYLE_YAML:
+                    self.runs.append(current_run)
+
             elif in_thermo and in_data_section:
                 if style == LogFile.STYLE_DEFAULT:
                     if alpha.search(line):
                         continue
-
                     for k, v in zip(keys, map(float, line.split())):
                         current_run[k].append(v)
+
                 elif style == LogFile.STYLE_MULTI:
                     if '=' not in line:
                         in_data_section = False
                         continue
-
                     for k,v in kvpairs.findall(line):
                         if k not in current_run:
                             current_run[k] = [float(v)]
                         else:
                             current_run[k].append(float(v))
 
-
 class AvgChunkFile:
   """Reads files generated by fix ave/chunk
 
diff --git a/unittest/python/python-formats.py b/unittest/python/python-formats.py
index ca877b8305..9e7863e198 100644
--- a/unittest/python/python-formats.py
+++ b/unittest/python/python-formats.py
@@ -7,6 +7,7 @@ EXAMPLES_DIR=os.path.abspath(os.path.join(__file__, '..', '..', '..', 'examples'
 DEFAULT_STYLE_EXAMPLE_LOG="melt/log.8Apr21.melt.g++.1"
 MULTI_STYLE_EXAMPLE_LOG="peptide/log.27Nov18.peptide.g++.1"
 AVG_CHUNK_FILE="VISCOSITY/profile.13Oct16.nemd.2d.g++.1"
+YAML_STYLE_EXAMPLE_LOG="yaml/log.7Apr22.yaml.g++.1"
 
 class Logfiles(unittest.TestCase):
     def testLogFileNotFound(self):
@@ -58,6 +59,27 @@ class Logfiles(unittest.TestCase):
 
         self.assertEqual(run0["Step"], list(range(0,350, 50)))
 
+    def testYamlLogFile(self):
+        log = LogFile(os.path.join(EXAMPLES_DIR, YAML_STYLE_EXAMPLE_LOG))
+        self.assertEqual(len(log.runs), 2)
+        run = log.runs[0]
+        self.assertEqual(len(run.keys()), 12)
+        self.assertIn("Step", run)
+        self.assertIn("Temp", run)
+        self.assertIn("E_vdwl", run)
+        self.assertIn("E_coul", run)
+        self.assertIn("E_bond", run)
+        self.assertIn("E_angle", run)
+        self.assertIn("Press", run)
+        self.assertEqual(len(run["Step"]), 11)
+        self.assertEqual(len(run["Temp"]), 11)
+        self.assertEqual(len(run["E_vdwl"]), 11)
+        self.assertEqual(len(run["E_coul"]), 11)
+        self.assertEqual(len(run["E_bond"]), 11)
+        self.assertEqual(len(run["E_angle"]), 11)
+        self.assertEqual(len(run["Press"]), 11)
+        self.assertEqual(log.runs[0]["Step"], [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
+
 
 class AvgChunkFiles(unittest.TestCase):
     def testAvgChunkFileNotFound(self):

From 3970942028d6964957e556d11bb5e8d9870649c8 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Thu, 7 Apr 2022 02:43:39 -0400
Subject: [PATCH 4/7] add test for yaml dump style

---
 examples/yaml/in.yaml             |  8 ++---
 python/lammps/formats.py          |  2 +-
 unittest/python/python-formats.py | 55 +++++++++++++++++++++++++++++++
 3 files changed, 60 insertions(+), 5 deletions(-)

diff --git a/examples/yaml/in.yaml b/examples/yaml/in.yaml
index 28660751c8..f682f39776 100644
--- a/examples/yaml/in.yaml
+++ b/examples/yaml/in.yaml
@@ -29,9 +29,9 @@ fix             1 all nve
 thermo_style    yaml
 thermo 10
 
-dump            1 all yaml 25 dump.yaml id type x y z ix iy iz vx vy vz
-dump_modify     1 sort id thermo yes units yes time yes format 1 %5d format float "% 12.8e" format int %2d
+#dump            1 all yaml 25 dump.yaml id type x y z ix iy iz vx vy vz
+#dump_modify     1 sort id thermo yes units yes time yes format 1 %5d format float "% 12.8e" format int %2d
 
-run             100
+run             100 post no
 
-run             100
+run             100 post no
diff --git a/python/lammps/formats.py b/python/lammps/formats.py
index 83d05dd9f7..a311867253 100644
--- a/python/lammps/formats.py
+++ b/python/lammps/formats.py
@@ -21,7 +21,7 @@ import re, yaml
 try:
   from yaml import CSafeLoader as Loader, CSafeDumper as Dumper
 except ImportError:
-    from yaml import SafeLoader as Loader, SafeDumper as Dumper
+  from yaml import SafeLoader as Loader, SafeDumper as Dumper
 
 class LogFile:
   """Reads LAMMPS log files and extracts the thermo information
diff --git a/unittest/python/python-formats.py b/unittest/python/python-formats.py
index 9e7863e198..4d6aa7e2cd 100644
--- a/unittest/python/python-formats.py
+++ b/unittest/python/python-formats.py
@@ -2,6 +2,12 @@ import os
 import unittest
 from lammps.formats import LogFile, AvgChunkFile
 
+import yaml
+try:
+    from yaml import CSafeLoader as Loader, CSafeDumper as Dumper
+except ImportError:
+    from yaml import SafeLoader, SafeDumper
+
 EXAMPLES_DIR=os.path.abspath(os.path.join(__file__, '..', '..', '..', 'examples'))
 
 DEFAULT_STYLE_EXAMPLE_LOG="melt/log.8Apr21.melt.g++.1"
@@ -109,5 +115,54 @@ class AvgChunkFiles(unittest.TestCase):
         self.assertEqual(len(chunk['coord'][0]), 1)
 
 
+from lammps import lammps
+has_full = False
+try:
+    machine=None
+    if 'LAMMPS_MACHINE_NAME' in os.environ:
+        machine=os.environ['LAMMPS_MACHINE_NAME']
+    lmp=lammps(name=machine)
+    has_full = lmp.has_style("atom","full")
+    lmp.close()
+except:
+    pass
+
+@unittest.skipIf(not has_full, "atom_style full is not available")
+class PythonDump(unittest.TestCase):
+    def setUp(self):
+        machine = None
+        if 'LAMMPS_MACHINE_NAME' in os.environ:
+            machine=os.environ['LAMMPS_MACHINE_NAME']
+        self.lmp = lammps(name=machine,  cmdargs=['-nocite', '-log','none', '-echo','screen'])
+
+    def tearDown(self):
+        del self.lmp
+
+    def testDumpYaml(self):
+        dumpfile = os.path.join(os.path.abspath('.'), 'dump.yaml')
+        self.lmp.command('shell cd ' + os.environ['TEST_INPUT_DIR'])
+        self.lmp.command("newton on on")
+        self.lmp.file("in.fourmol")
+        self.lmp.command("dump 1 all yaml 2 " + dumpfile + " id type mol q x y z vx vy vz")
+        self.lmp.command("dump_modify 1 time yes sort id units yes")
+        self.lmp.command("run 4 post no")
+        with open(dumpfile) as d:
+            traj = tuple(yaml.load_all(d, Loader=Loader))
+        self.assertEqual(len(traj), 3)
+        self.assertEqual(traj[0]['timestep'], 0)
+        self.assertEqual(traj[0]['time'], 0)
+        self.assertEqual(traj[0]['natoms'], 29)
+        self.assertEqual(traj[0]['units'], 'real')
+        self.assertEqual(len(traj[0]['boundary']), 6)
+        self.assertEqual(traj[0]['boundary'][0], 'p')
+        self.assertEqual(traj[1]['timestep'], 2)
+        self.assertEqual(traj[1]['time'], 0.2)
+        self.assertEqual(traj[2]['timestep'], 4)
+        self.assertEqual(traj[2]['time'], 0.4)
+        self.assertEqual(traj[0]['keywords'],['id', 'type', 'mol', 'q', 'x', 'y', 'z',
+                                              'vx', 'vy', 'vz'])
+        self.assertEqual(traj[0]['data'][0],[1, 3, 1, -0.47, -0.279937, 2.47266, -0.172009,
+                                             0.000778678, 0.000589703, -0.000221795])
+
 if __name__ == "__main__":
     unittest.main()

From 8a6e6fe523e6c936e1689590c3996cffcee7b9b9 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Thu, 7 Apr 2022 05:59:31 -0400
Subject: [PATCH 5/7] make dump style yaml test dependent on it being available

---
 unittest/python/python-formats.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/unittest/python/python-formats.py b/unittest/python/python-formats.py
index 4d6aa7e2cd..ac0f2ec6ed 100644
--- a/unittest/python/python-formats.py
+++ b/unittest/python/python-formats.py
@@ -116,18 +116,18 @@ class AvgChunkFiles(unittest.TestCase):
 
 
 from lammps import lammps
-has_full = False
+has_dump_yaml = False
 try:
     machine=None
     if 'LAMMPS_MACHINE_NAME' in os.environ:
         machine=os.environ['LAMMPS_MACHINE_NAME']
     lmp=lammps(name=machine)
-    has_full = lmp.has_style("atom","full")
+    has_dump_yaml = lmp.has_style("atom","full") && lmp.has_style("dump", "yaml")
     lmp.close()
 except:
     pass
 
-@unittest.skipIf(not has_full, "atom_style full is not available")
+@unittest.skipIf(not has_dump_yaml, "Either atom_style full or dump_style yaml are not available")
 class PythonDump(unittest.TestCase):
     def setUp(self):
         machine = None

From f5add950834eda00c1b22e1b50e3330a28003ea0 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Thu, 7 Apr 2022 06:08:54 -0400
Subject: [PATCH 6/7] fix syntax error

---
 unittest/python/python-formats.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unittest/python/python-formats.py b/unittest/python/python-formats.py
index ac0f2ec6ed..c3dc6d52dc 100644
--- a/unittest/python/python-formats.py
+++ b/unittest/python/python-formats.py
@@ -122,7 +122,7 @@ try:
     if 'LAMMPS_MACHINE_NAME' in os.environ:
         machine=os.environ['LAMMPS_MACHINE_NAME']
     lmp=lammps(name=machine)
-    has_dump_yaml = lmp.has_style("atom","full") && lmp.has_style("dump", "yaml")
+    has_dump_yaml = lmp.has_style("atom","full") and lmp.has_style("dump", "yaml")
     lmp.close()
 except:
     pass

From 4efdfaa8f351710fb6b64709716cec1056de9a2c Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Fri, 8 Apr 2022 07:04:48 -0400
Subject: [PATCH 7/7] simplify and make consistent, fix time based dump bug

---
 src/output.cpp | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/output.cpp b/src/output.cpp
index 7a1eaaf807..c9da837f63 100644
--- a/src/output.cpp
+++ b/src/output.cpp
@@ -192,7 +192,7 @@ void Output::setup(int memflag)
   // decide whether to write snapshot and/or calculate next step for dump
 
   if (ndump && update->restrict_output == 0) {
-    next_time_dump_any = MAXBIGINT;
+    next_dump_any = next_time_dump_any = MAXBIGINT;
 
     for (int idump = 0; idump < ndump; idump++) {
 
@@ -256,8 +256,7 @@ void Output::setup(int memflag)
 
       if (mode_dump[idump] && (dump[idump]->clearstep || var_dump[idump]))
         next_time_dump_any = MIN(next_time_dump_any,next_dump[idump]);
-      if (idump) next_dump_any = MIN(next_dump_any,next_dump[idump]);
-      else next_dump_any = next_dump[0];
+      next_dump_any = MIN(next_dump_any,next_dump[idump]);
     }
 
   // if no dumps, set next_dump_any to last+1 so will not influence next
@@ -356,9 +355,9 @@ void Output::setup(int memflag)
    //     what other command may have added it
 
    if (next_dump_any == ntimestep) {
+     next_dump_any = next_time_dump_any = MAXBIGINT;
 
      for (int idump = 0; idump < ndump; idump++) {
-       next_time_dump_any = MAXBIGINT;
 
        if (next_dump[idump] == ntimestep) {
          if (last_dump[idump] == ntimestep) continue;
@@ -381,8 +380,7 @@ void Output::setup(int memflag)
 
        if (mode_dump[idump] && (dump[idump]->clearstep || var_dump[idump]))
          next_time_dump_any = MIN(next_time_dump_any,next_dump[idump]);
-       if (idump) next_dump_any = MIN(next_dump_any,next_dump[idump]);
-       else next_dump_any = next_dump[0];
+       next_dump_any = MIN(next_dump_any,next_dump[idump]);
      }
    }