Merge pull request #3205 from akohlmey/dump-style-yaml

Add dump style yaml
This commit is contained in:
Axel Kohlmeyer
2022-04-08 20:55:01 -04:00
committed by GitHub
10 changed files with 373 additions and 20 deletions

View File

@ -36,7 +36,7 @@ Syntax
* ID = user-assigned name for the dump
* group-ID = ID of the group of atoms to be dumped
* style = *atom* or *atom/gz* or *atom/zstd or *atom/mpiio* or *cfg* or *cfg/gz* or *cfg/zstd* or *cfg/mpiio* or *custom* or *custom/gz* or *custom/zstd* or *custom/mpiio* or *dcd* or *h5md* or *image* or *local* or *local/gz* or *local/zstd* or *molfile* or *movie* or *netcdf* or *netcdf/mpiio* or *vtk* or *xtc* or *xyz* or *xyz/gz* or *xyz/zstd* or *xyz/mpiio*
* style = *atom* or *atom/gz* or *atom/zstd or *atom/mpiio* or *cfg* or *cfg/gz* or *cfg/zstd* or *cfg/mpiio* or *custom* or *custom/gz* or *custom/zstd* or *custom/mpiio* or *dcd* or *h5md* or *image* or *local* or *local/gz* or *local/zstd* or *molfile* or *movie* or *netcdf* or *netcdf/mpiio* or *vtk* or *xtc* or *xyz* or *xyz/gz* or *xyz/zstd* or *xyz/mpiio* or *yaml*
* N = dump every this many timesteps
* file = name of file to write dump info to
* args = list of arguments for a particular style
@ -68,8 +68,9 @@ Syntax
*xyz/gz* args = none
*xyz/zstd* args = none
*xyz/mpiio* args = none
*yaml* args = same as *custom* args, see below
* *custom* or *custom/gz* or *custom/zstd* or *custom/mpiio* or *netcdf* or *netcdf/mpiio* args = list of atom attributes
* *custom* or *custom/gz* or *custom/zstd* or *custom/mpiio* or *netcdf* or *netcdf/mpiio* or *yaml* args = list of atom attributes
.. parsed-literal::
@ -386,6 +387,70 @@ from using the (numerical) atom type to an element name (or some
other label). This will help many visualization programs to guess
bonds and colors.
Dump style *yaml* has the same command syntax as style *custom* and
writes YAML format files that can be easily parsed by a variety of data
processing tools and programming languages. Each timestep will be
written as a YAML "document" (i.e. starts with "---" and ends with
"..."). The style supports writing one file per timestep through the
"\*" wildcard but not multi-processor outputs with the "%" token in the
filename. In addition to per-atom data, :doc:`thermo <thermo>` data can
be included in the *yaml* style dump file using the :doc:`dump_modify
thermo yes <dump_modify>`. The data included in the dump file uses the
"thermo" tag and is otherwise identical to data specified by the
:doc:`thermo_style <thermo_style>` command.
Below is an example for a YAML format dump created by the following commands.
.. code-block:: LAMMPS
dump out all yaml 100 dump.yaml id type x y z vx vy vz ix iy iz
dump_modify out time yes units yes thermo yes format 1 %5d format "% 10.6e"
The tags "time", "units", and "thermo" are optional and enabled by the
dump_modify command. The list under the "box" tag has 3 lines for
orthogonal boxes and 4 lines with triclinic boxes, where the first 3 are
the box boundaries and the 4th the three tilt factors (xy, xz, yz). The
"thermo" data follows the format of the *yaml* thermo style. The
"keywords" tag lists the per-atom properties contained in the "data"
columns, which contain a list with one line per atom. The keywords may
be renamed using the dump_modify command same as for the *custom* dump
style.
.. code-block:: yaml
---
timestep: 0
units: lj
time: 0
natoms: 4000
boundary: [ p, p, p, p, p, p, ]
thermo:
- keywords: [ Step, Temp, E_pair, E_mol, TotEng, Press, ]
- data: [ 0, 0, -27093.472213010766, 0, 0, 0, ]
box:
- [ 0, 16.795961913825074 ]
- [ 0, 16.795961913825074 ]
- [ 0, 16.795961913825074 ]
- [ 0, 0, 0 ]
keywords: [ id, type, x, y, z, vx, vy, vz, ix, iy, iz, ]
data:
- [ 1 , 1 , 0.000000e+00 , 0.000000e+00 , 0.000000e+00 , -1.841579e-01 , -9.710036e-01 , -2.934617e+00 , 0 , 0 , 0, ]
- [ 2 , 1 , 8.397981e-01 , 8.397981e-01 , 0.000000e+00 , -1.799591e+00 , 2.127197e+00 , 2.298572e+00 , 0 , 0 , 0, ]
- [ 3 , 1 , 8.397981e-01 , 0.000000e+00 , 8.397981e-01 , -1.807682e+00 , -9.585130e-01 , 1.605884e+00 , 0 , 0 , 0, ]
[...]
...
---
timestep: 100
units: lj
time: 0.5
[...]
...
----------
Note that *atom*, *custom*, *dcd*, *xtc*, and *xyz* style dump files
can be read directly by `VMD <http://www.ks.uiuc.edu/Research/vmd>`_, a
popular molecular viewing program.
@ -427,9 +492,9 @@ If a "%" character appears in the filename, then each of P processors
writes a portion of the dump file, and the "%" character is replaced
with the processor ID from 0 to P-1. For example, tmp.dump.% becomes
tmp.dump.0, tmp.dump.1, ... tmp.dump.P-1, etc. This creates smaller
files and can be a fast mode of output on parallel machines that
support parallel I/O for output. This option is not available for the
*dcd*, *xtc*, and *xyz* styles.
files and can be a fast mode of output on parallel machines that support
parallel I/O for output. This option is **not** available for the *dcd*,
*xtc*, *xyz*, and *yaml* styles.
By default, P = the number of processors meaning one file per
processor, but P can be set to a smaller value via the *nfile* or
@ -722,8 +787,8 @@ are part of the MPIIO package. They are only enabled if LAMMPS was
built with that package. See the :doc:`Build package <Build_package>`
doc page for more info.
The *xtc* and *dcd* styles are part of the EXTRA-DUMP package. They
are only enabled if LAMMPS was built with that package. See the
The *xtc*, *dcd* and *yaml* styles are part of the EXTRA-DUMP package.
They are only enabled if LAMMPS was built with that package. See the
:doc:`Build package <Build_package>` page for more info.
Related commands

View File

@ -712,8 +712,8 @@ run, this option is ignored since the output is already balanced.
----------
The *thermo* keyword only applies the dump *netcdf* style. It
triggers writing of :doc:`thermo <thermo>` information to the dump file
The *thermo* keyword only applies the dump styles *netcdf* and *yaml*.
It triggers writing of :doc:`thermo <thermo>` information to the dump file
alongside per-atom data. The values included in the dump file are
identical to the values specified by :doc:`thermo_style <thermo_style>`.

View File

@ -29,9 +29,9 @@ fix 1 all nve
thermo_style yaml
thermo 10
dump 1 all yaml 25 dump.yaml id type x y z ix iy iz vx vy vz
dump_modify 1 sort id thermo yes units yes time yes format 1 %5d format float "% 12.8e" format int %2d
#dump 1 all yaml 25 dump.yaml id type x y z ix iy iz vx vy vz
#dump_modify 1 sort id thermo yes units yes time yes format 1 %5d format float "% 12.8e" format int %2d
run 100
run 100 post no
run 100
run 100 post no

2
src/.gitignore vendored
View File

@ -602,6 +602,8 @@
/dump_xyz_mpiio.h
/dump_xyz_zstd.cpp
/dump_xyz_zstd.h
/dump_yaml.cpp
/dump_yaml.h
/dynamical_matrix.cpp
/dynamical_matrix.h
/ewald.cpp

View File

@ -0,0 +1,143 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "dump_yaml.h"
#include "comm.h"
#include "domain.h"
#include "error.h"
#include "output.h"
#include "thermo.h"
#include "update.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
DumpYAML::DumpYAML(class LAMMPS *_lmp, int narg, char **args) :
DumpCustom(_lmp, narg, args), thermo(false)
{
buffer_allow = 0;
buffer_flag = 0;
}
/* ---------------------------------------------------------------------- */
void DumpYAML::init_style()
{
if (binary) error->all(FLERR, "Dump style yaml does not support binary output");
if (multiproc) error->all(FLERR, "Dump style yaml does not support multi-processor output");
DumpCustom::init_style();
}
/* ---------------------------------------------------------------------- */
void DumpYAML::write()
{
// temporarily enable so write_header() is called
// by all MPI ranks to compute thermo data
if (thermo) filewriter = 1;
Dump::write();
}
/* ---------------------------------------------------------------------- */
void DumpYAML::write_header(bigint ndump)
{
std::string thermo_data;
if (thermo) {
Thermo *th = output->thermo;
thermo_data += "thermo:\n - keywords: [ ";
for (int i = 0; i < th->nfield; ++i) thermo_data += fmt::format("{}, ", th->keyword[i]);
thermo_data += "]\n - data: [ ";
for (int i = 0; i < th->nfield; ++i) {
th->call_vfunc(i);
if (th->vtype[i] == Thermo::FLOAT)
thermo_data += fmt::format("{}, ", th->dvalue);
else if (th->vtype[i] == Thermo::INT)
thermo_data += fmt::format("{}, ", th->ivalue);
else if (th->vtype[i] == Thermo::BIGINT)
thermo_data += fmt::format("{}, ", th->bivalue);
}
thermo_data += "]\n";
MPI_Barrier(world);
}
if (comm->me == 0) {
const std::string boundary(boundstr);
fmt::print(fp, "---\ntimestep: {}\n", update->ntimestep);
if (unit_flag) fmt::print(fp, "units: {}\n", update->unit_style);
if (time_flag) fmt::print(fp, "time: {:.16g}\n", compute_time());
fmt::print(fp, "natoms: {}\n", ndump);
fputs("boundary: [ ", fp);
for (const auto bflag : boundary) {
if (bflag == ' ') continue;
fmt::print(fp, "{}, ", bflag);
}
fputs("]\n", fp);
if (thermo) fmt::print(fp, thermo_data);
fmt::print(fp, "box:\n - [ {}, {} ]\n", boxxlo, boxxhi);
fmt::print(fp, " - [ {}, {} ]\n", boxylo, boxyhi);
fmt::print(fp, " - [ {}, {} ]\n", boxzlo, boxzhi);
if (domain->triclinic) fmt::print(fp, " - [ {}, {}, {} ]\n", boxxy, boxxz, boxyz);
fmt::print(fp, "keywords: [ ");
for (const auto &item : utils::split_words(columns)) fmt::print(fp, "{}, ", item);
fputs(" ]\ndata:\n", fp);
} else // reset so that the remainder of the output is not multi-proc
filewriter = 0;
}
/* ---------------------------------------------------------------------- */
void DumpYAML::write_data(int n, double *mybuf)
{
int m = 0;
for (int i = 0; i < n; i++) {
fputs(" - [ ", fp);
for (int j = 0; j < nfield; j++) {
if (vtype[j] == Dump::INT)
fprintf(fp, vformat[j], static_cast<int>(mybuf[m]));
else if (vtype[j] == Dump::DOUBLE)
fprintf(fp, vformat[j], mybuf[m]);
else if (vtype[j] == Dump::STRING)
fprintf(fp, vformat[j], typenames[(int) mybuf[m]]);
else if (vtype[j] == Dump::BIGINT)
fprintf(fp, vformat[j], static_cast<bigint>(mybuf[m]));
m++;
fputs(", ", fp);
}
fputs("]\n", fp);
}
fputs("...\n", fp);
}
/* ---------------------------------------------------------------------- */
int DumpYAML::modify_param(int narg, char **arg)
{
int n = DumpCustom::modify_param(narg, arg);
if (n > 0) return n;
if (strcmp(arg[0], "thermo") == 0) {
if (narg < 2) error->all(FLERR, "expected 'yes' or 'no' after 'thermo' keyword.");
thermo = utils::logical(FLERR, arg[1], false, lmp) == 1;
return 2;
} else
return 0;
}

View File

@ -0,0 +1,89 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef DUMP_CLASS
// clang-format off
DumpStyle(yaml,DumpYAML);
// clang-format on
#else
#ifndef LMP_DUMP_YAML_H
#define LMP_DUMP_YAML_H
#include "dump_custom.h"
namespace LAMMPS_NS {
class DumpYAML : public DumpCustom {
public:
DumpYAML(class LAMMPS *, int, char **);
protected:
bool thermo;
void init_style() override;
void write() override;
void write_header(bigint) override;
void write_data(int, double *) override;
int modify_param(int, char **) override;
};
} // namespace LAMMPS_NS
#endif
#endif
/* ERROR/WARNING messages:
E: Cannot open dump file %s
The output file for the dump command cannot be opened. Check that the
path and name are correct.
E: Too much per-proc info for dump
Number of local atoms times number of columns must fit in a 32-bit
integer for dump.
E: Dump_modify format line is too short
UNDOCUMENTED
E: Could not find dump custom compute ID
Self-explanatory.
E: Could not find dump custom fix ID
Self-explanatory.
E: Dump custom and fix not computed at compatible times
The fix must produce per-atom quantities on timesteps that dump custom
needs them.
E: Could not find dump custom variable name
Self-explanatory.
E: Region ID for dump custom does not exist
Self-explanatory.
U: Dump_modify format string is too short
There are more fields to be dumped in a line of output than your
format string specifies.
*/

View File

@ -192,7 +192,7 @@ void Output::setup(int memflag)
// decide whether to write snapshot and/or calculate next step for dump
if (ndump && update->restrict_output == 0) {
next_time_dump_any = MAXBIGINT;
next_dump_any = next_time_dump_any = MAXBIGINT;
for (int idump = 0; idump < ndump; idump++) {
@ -256,8 +256,7 @@ void Output::setup(int memflag)
if (mode_dump[idump] && (dump[idump]->clearstep || var_dump[idump]))
next_time_dump_any = MIN(next_time_dump_any,next_dump[idump]);
if (idump) next_dump_any = MIN(next_dump_any,next_dump[idump]);
else next_dump_any = next_dump[0];
next_dump_any = MIN(next_dump_any,next_dump[idump]);
}
// if no dumps, set next_dump_any to last+1 so will not influence next
@ -356,9 +355,9 @@ void Output::setup(int memflag)
// what other command may have added it
if (next_dump_any == ntimestep) {
next_dump_any = next_time_dump_any = MAXBIGINT;
for (int idump = 0; idump < ndump; idump++) {
next_time_dump_any = MAXBIGINT;
if (next_dump[idump] == ntimestep) {
if (last_dump[idump] == ntimestep) continue;
@ -381,8 +380,7 @@ void Output::setup(int memflag)
if (mode_dump[idump] && (dump[idump]->clearstep || var_dump[idump]))
next_time_dump_any = MIN(next_time_dump_any,next_dump[idump]);
if (idump) next_dump_any = MIN(next_dump_any,next_dump[idump]);
else next_dump_any = next_dump[0];
next_dump_any = MIN(next_dump_any,next_dump[idump]);
}
}

View File

@ -23,6 +23,7 @@ class Thermo : protected Pointers {
friend class MinCG; // accesses compute_pe
friend class DumpNetCDF; // accesses thermo properties
friend class DumpNetCDFMPIIO; // accesses thermo properties
friend class DumpYAML; // accesses thermo properties
public:
char *style;

View File

@ -2,6 +2,12 @@ import os
import unittest
from lammps.formats import LogFile, AvgChunkFile
import yaml
try:
from yaml import CSafeLoader as Loader, CSafeDumper as Dumper
except ImportError:
from yaml import SafeLoader, SafeDumper
EXAMPLES_DIR=os.path.abspath(os.path.join(__file__, '..', '..', '..', 'examples'))
DEFAULT_STYLE_EXAMPLE_LOG="melt/log.8Apr21.melt.g++.1"
@ -109,5 +115,54 @@ class AvgChunkFiles(unittest.TestCase):
self.assertEqual(len(chunk['coord'][0]), 1)
from lammps import lammps
has_dump_yaml = False
try:
machine=None
if 'LAMMPS_MACHINE_NAME' in os.environ:
machine=os.environ['LAMMPS_MACHINE_NAME']
lmp=lammps(name=machine)
has_dump_yaml = lmp.has_style("atom","full") and lmp.has_style("dump", "yaml")
lmp.close()
except:
pass
@unittest.skipIf(not has_dump_yaml, "Either atom_style full or dump_style yaml are not available")
class PythonDump(unittest.TestCase):
def setUp(self):
machine = None
if 'LAMMPS_MACHINE_NAME' in os.environ:
machine=os.environ['LAMMPS_MACHINE_NAME']
self.lmp = lammps(name=machine, cmdargs=['-nocite', '-log','none', '-echo','screen'])
def tearDown(self):
del self.lmp
def testDumpYaml(self):
dumpfile = os.path.join(os.path.abspath('.'), 'dump.yaml')
self.lmp.command('shell cd ' + os.environ['TEST_INPUT_DIR'])
self.lmp.command("newton on on")
self.lmp.file("in.fourmol")
self.lmp.command("dump 1 all yaml 2 " + dumpfile + " id type mol q x y z vx vy vz")
self.lmp.command("dump_modify 1 time yes sort id units yes")
self.lmp.command("run 4 post no")
with open(dumpfile) as d:
traj = tuple(yaml.load_all(d, Loader=Loader))
self.assertEqual(len(traj), 3)
self.assertEqual(traj[0]['timestep'], 0)
self.assertEqual(traj[0]['time'], 0)
self.assertEqual(traj[0]['natoms'], 29)
self.assertEqual(traj[0]['units'], 'real')
self.assertEqual(len(traj[0]['boundary']), 6)
self.assertEqual(traj[0]['boundary'][0], 'p')
self.assertEqual(traj[1]['timestep'], 2)
self.assertEqual(traj[1]['time'], 0.2)
self.assertEqual(traj[2]['timestep'], 4)
self.assertEqual(traj[2]['time'], 0.4)
self.assertEqual(traj[0]['keywords'],['id', 'type', 'mol', 'q', 'x', 'y', 'z',
'vx', 'vy', 'vz'])
self.assertEqual(traj[0]['data'][0],[1, 3, 1, -0.47, -0.279937, 2.47266, -0.172009,
0.000778678, 0.000589703, -0.000221795])
if __name__ == "__main__":
unittest.main()