diff --git a/doc/src/Howto_structured_data.rst b/doc/src/Howto_structured_data.rst index b320e87279..4ea6c28086 100644 --- a/doc/src/Howto_structured_data.rst +++ b/doc/src/Howto_structured_data.rst @@ -55,6 +55,9 @@ JSON YAML format thermo_style output =============================== +Extracting data from log file +----------------------------- + .. versionadded:: 24Mar2022 LAMMPS supports the thermo style "yaml" and for "custom" style @@ -66,7 +69,7 @@ the following style: .. code-block:: yaml --- - keywords: [Step, Temp, E_pair, E_mol, TotEng, Press, ] + keywords: ['Step', 'Temp', 'E_pair', 'E_mol', 'TotEng', 'Press', ] data: - [100, 0.757453103239935, -5.7585054860159, 0, -4.62236133677021, 0.207261053624721, ] - [110, 0.759322359337036, -5.7614668389562, 0, -4.62251889318624, 0.194314975399602, ] @@ -80,9 +83,9 @@ This data can be extracted and parsed from a log file using python with: import re, yaml try: - from yaml import CSafeLoader as Loader, CSafeDumper as Dumper + from yaml import CSafeLoader as Loader except ImportError: - from yaml import SafeLoader as Loader, SafeDumper as Dumper + from yaml import SafeLoader as Loader docs = "" with open("log.lammps") as f: @@ -109,6 +112,135 @@ of that run: Number of runs: 2 TotEng = -4.62140097780047 +.. versionadded:: 4May2022 + +YAML format output has been added to multiple commands in LAMMPS, +for example :doc:`dump yaml ` or :doc:`fix ave/time ` +Depending on the kind of data being written, organization of the data +or the specific syntax used may change, but the principles are very +similar and all files should be readable with a suitable YAML parser. + +Processing scalar data with Python +---------------------------------- + +.. figure:: JPG/thermo_bondeng.png + :figwidth: 33% + :align: right + +After reading and parsing the YAML format data, it can be easily +imported for further processing and visualization with the `pandas +`_ and `matplotlib +`_ Python modules. Because of the organization +of the data in the YAML format thermo output, it needs to be told to +process only the 'data' part of the imported data to create a pandas +data frame, and one needs to set the column names from the 'keywords' +entry. The following Python script code example demonstrates this, and +creates the image shown on the right of a simple plot of various bonded +energy contributions versus the timestep from a run of the 'peptide' +example input after changing the :doc:`thermo style ` to +'yaml'. The properties to be used for x and y values can be +conveniently selected through the keywords. Please note that those +keywords can be changed to custom strings with the :doc:`thermo_modify +colname ` command. + +.. code-block:: python + + import re, yaml + import pandas as pd + import matplotlib.pyplot as plt + + try: + from yaml import CSafeLoader as Loader + except ImportError: + from yaml import SafeLoader as Loader + + docs = "" + with open("log.lammps") as f: + for line in f: + m = re.search(r"^(keywords:.*$|data:$|---$|\.\.\.$| - \[.*\]$)", line) + if m: docs += m.group(0) + '\n' + + thermo = list(yaml.load_all(docs, Loader=Loader)) + + df = pd.DataFrame(data=thermo[0]['data'], columns=thermo[0]['keywords']) + fig = df.plot(x='Step', y=['E_bond', 'E_angle', 'E_dihed', 'E_impro'], ylabel='Energy in kcal/mol') + plt.savefig('thermo_bondeng.png') + +Processing vector data with Python +---------------------------------- + +Global *vector* data as produced by :doc:`fix ave/time ` +uses a slightly different organization of the data. You still have the +dictionary keys 'keywords' and 'data' for the column headers and the +data. But the data is a dictionary indexed by the time step and for +each step there are multiple rows of values each with a list of the +averaged properties. This requires a slightly different processing, +since the entire data cannot be directly imported into a single pandas +DataFrame class instance. The following Python script example +demonstrates how to read such data. The result will combine the data +for the different steps into one large "multi-index" table. The pandas +IndexSlice class can then be used to select data from this combined data +frame. + +.. code-block:: python + + import re, yaml + import pandas as pd + + try: + from yaml import CSafeLoader as Loader + except ImportError: + from yaml import SafeLoader as Loader + + with open("ave.yaml") as f: + ave = yaml.load(docs, Loader=Loader) + + keys = ave['keywords'] + df = {} + for k in ave['data'].keys(): + df[k] = pd.DataFrame(data=ave['data'][k], columns=keys) + + # create multi-index data frame + df = pd.concat(df) + + # output only the first 3 value for steps 200 to 300 of the column Pressure + idx = pd.IndexSlice + print(df['Pressure'].loc[idx[200:300, 0:2]]) + + +Processing scalar data with Perl +-------------------------------- + +The ease of processing YAML data is not limited to Python. Here is an +example for extracting and processing a LAMMPS log file with Perl instead. + +.. code-block:: perl + + use YAML::XS; + + open(LOG, "log.lammps") or die("could not open log.lammps: $!"); + my $file = ""; + while(my $line = ) { + if ($line =~ /^(keywords:.*$|data:$|---$|\.\.\.$| - \[.*\]$)/) { + $file .= $line; + } + } + close(LOG); + + # convert YAML to perl as nested hash and array references + my $thermo = Load $file; + + # convert references to real arrays + my @keywords = @{$thermo->{'keywords'}}; + my @data = @{$thermo->{'data'}}; + + # print first two columns + print("$keywords[0] $keywords[1]\n"); + foreach (@data) { + print("${$_}[0] ${$_}[1]\n"); + } + + Writing continuous data during a simulation =========================================== diff --git a/doc/src/JPG/thermo_bondeng.png b/doc/src/JPG/thermo_bondeng.png new file mode 100644 index 0000000000..28ac9d26cc Binary files /dev/null and b/doc/src/JPG/thermo_bondeng.png differ diff --git a/doc/src/fix_ave_time.rst b/doc/src/fix_ave_time.rst index 2836e11fd0..99300cfe26 100644 --- a/doc/src/fix_ave_time.rst +++ b/doc/src/fix_ave_time.rst @@ -272,10 +272,18 @@ each input value specified in the fix ave/time command. For *mode* = scalar, this means a single line is written each time output is performed. Thus the file ends up to be a series of lines, i.e. one column of numbers for each input value. For *mode* = vector, an array -of numbers is written each time output is performed. The number of -rows is the length of the input vectors, and the number of columns is -the number of values. Thus the file ends up to be a series of these -array sections. +of numbers is written each time output is performed. The number of rows +is the length of the input vectors, and the number of columns is the +number of values. Thus the file ends up to be a series of these array +sections. + +If the filename ends in '.yaml' or '.yml' then the output format +conforms to the `YAML standard `_ which allows +easy import that data into tools and scripts that support reading YAML +files. The :doc:`structured data Howto ` contains +examples for parsing and plotting such data with very little programming +effort in Python using the *pyyaml*, *pandas*, and *matplotlib* +packages. The *overwrite* keyword will continuously overwrite the output file with the latest output, so that it only contains one timestep worth of @@ -321,8 +329,10 @@ appropriate fields from the fix ave/time command. Restart, fix_modify, output, run start/stop, minimize info """"""""""""""""""""""""""""""""""""""""""""""""""""""""""" -No information about this fix is written to :doc:`binary restart files `. None of the :doc:`fix_modify ` options -are relevant to this fix. +No information about this fix is written to :doc:`binary restart files +`. The :doc:`fix_modify colname ` option can be +used to change the name of the column in the output file. When writing +a YAML format file this name will be in the list of keywords. This fix produces a global scalar or global vector or global array which can be accessed by various :doc:`output commands `. diff --git a/doc/src/fix_modify.rst b/doc/src/fix_modify.rst index 47080fcd58..e0cf3a34c7 100644 --- a/doc/src/fix_modify.rst +++ b/doc/src/fix_modify.rst @@ -12,19 +12,23 @@ Syntax * fix-ID = ID of the fix to modify * one or more keyword/value pairs may be appended -* keyword = *temp* or *press* or *energy* or *virial* or *respa* or *dynamic/dof* or *bodyforces* +* keyword = *bodyforces* or *colname* or *dynamic/dof* or *energy* or *press* or *respa* or *temp* or *virial* .. parsed-literal:: - *temp* value = compute ID that calculates a temperature - *press* value = compute ID that calculates a pressure - *energy* value = *yes* or *no* - *virial* value = *yes* or *no* - *respa* value = *1* to *max respa level* or *0* (for outermost level) - *dynamic/dof* value = *yes* or *no* - yes/no = do or do not re-compute the number of degrees of freedom (DOF) contributing to the temperature *bodyforces* value = *early* or *late* early/late = compute rigid-body forces/torques early or late in the timestep + *colname* values = ID string + string = new column header name + ID = integer from 1 to N, or integer from -1 to -N, where N = # of quantities being output + *or* a fix output property keyword or reference to compute, fix, property or variable. + *dynamic/dof* value = *yes* or *no* + yes/no = do or do not re-compute the number of degrees of freedom (DOF) contributing to the temperature + *energy* value = *yes* or *no* + *press* value = compute ID that calculates a pressure + *respa* value = *1* to *max respa level* or *0* (for outermost level) + *temp* value = compute ID that calculates a temperature + *virial* value = *yes* or *no* Examples """""""" @@ -34,6 +38,7 @@ Examples fix_modify 3 temp myTemp press myPress fix_modify 1 energy yes fix_modify tether respa 2 + fix_modify ave colname c_thermo_press Pressure colname 1 Temperature Description """"""""""" @@ -165,6 +170,20 @@ will have no effect on the motion of the rigid bodies if they are specified in the input script after the fix rigid command. LAMMPS will give a warning if that is the case. + +The *colname* keyword can be used to change the default header keywords +in output files of fix styles that support it: currently only :doc:`fix +ave/time ` is supported. The setting for *ID string* +replaces the default text with the provided string. *ID* can be a +positive integer when it represents the column number counting from the +left, a negative integer when it represents the column number from the +right (i.e. -1 is the last column/keyword), or a custom fix output +keyword (or compute, fix, property, or variable reference) and then it +replaces the string for that specific keyword. The *colname* keyword can +be used multiple times. If multiple *colname* settings refer to the same +keyword, the last setting has precedence. + + Restrictions """""""""""" none @@ -172,7 +191,8 @@ none Related commands """""""""""""""" -:doc:`fix `, :doc:`compute temp `, :doc:`compute pressure `, :doc:`thermo_style ` +:doc:`fix `, :doc:`compute temp `, +:doc:`compute pressure `, :doc:`thermo_style ` Default """"""" diff --git a/doc/utils/sphinx-config/false_positives.txt b/doc/utils/sphinx-config/false_positives.txt index 6cff952aba..bdebbb65a9 100644 --- a/doc/utils/sphinx-config/false_positives.txt +++ b/doc/utils/sphinx-config/false_positives.txt @@ -3779,6 +3779,7 @@ ylat ylo ymax ymin +yml Yoshida ys ysu diff --git a/src/fix_ave_time.cpp b/src/fix_ave_time.cpp index ee08c859f4..010170e149 100644 --- a/src/fix_ave_time.cpp +++ b/src/fix_ave_time.cpp @@ -83,6 +83,8 @@ FixAveTime::FixAveTime(LAMMPS *lmp, int narg, char **arg) : int expand = 0; char **earg; nvalues = utils::expand_args(FLERR,nvalues,&arg[6],mode,earg,lmp); + keyword.resize(nvalues); + key2col.clear(); if (earg != &arg[6]) expand = 1; arg = earg; @@ -98,6 +100,8 @@ FixAveTime::FixAveTime(LAMMPS *lmp, int narg, char **arg) : for (int i = 0; i < nvalues; i++) { ArgInfo argi(arg[i]); + keyword[i] = arg[i]; + key2col[arg[i]] = i; if ((argi.get_type() == ArgInfo::NONE) || (argi.get_type() == ArgInfo::UNKNOWN) @@ -269,9 +273,8 @@ FixAveTime::FixAveTime(LAMMPS *lmp, int narg, char **arg) : for (int i = 0; i < nvalues; i++) fprintf(fp," %s",earg[i]); fprintf(fp,"\n"); } - if (ferror(fp)) - error->one(FLERR,"Error writing file header"); - + if (yaml_flag) fputs("---\n",fp); + if (ferror(fp)) error->one(FLERR,"Error writing file header: {}", utils::getsyserror()); filepos = platform::ftell(fp); } @@ -456,8 +459,10 @@ FixAveTime::~FixAveTime() delete[] extlist; - if (fp && me == 0) fclose(fp); - + if (fp && me == 0) { + if (yaml_flag) fputs("...\n", fp); + fclose(fp); + } memory->destroy(column); delete[] vector; @@ -669,11 +674,22 @@ void FixAveTime::invoke_scalar(bigint ntimestep) if (fp && me == 0) { clearerr(fp); if (overwrite) platform::fseek(fp,filepos); - fmt::print(fp,"{}",ntimestep); - for (i = 0; i < nvalues; i++) fprintf(fp,format,vector_total[i]/norm); - fprintf(fp,"\n"); - if (ferror(fp)) error->one(FLERR,"Error writing out time averaged data"); - + if (yaml_flag) { + if (!yaml_header || overwrite) { + yaml_header = true; + fputs("keywords: ['Step', ", fp); + for (auto k : keyword) fmt::print(fp, "'{}', ", k); + fputs("]\ndata:\n", fp); + } + fmt::print(fp, " - [{}, ", ntimestep); + for (i = 0; i < nvalues; i++) fmt::print(fp,"{}, ",vector_total[i]/norm); + fputs("]\n", fp); + } else { + fmt::print(fp,"{}",ntimestep); + for (i = 0; i < nvalues; i++) fprintf(fp,format,vector_total[i]/norm); + fprintf(fp,"\n"); + if (ferror(fp)) error->one(FLERR,"Error writing out time averaged data"); + } fflush(fp); if (overwrite) { @@ -880,11 +896,26 @@ void FixAveTime::invoke_vector(bigint ntimestep) if (fp && me == 0) { if (overwrite) platform::fseek(fp,filepos); - fmt::print(fp,"{} {}\n",ntimestep,nrows); - for (i = 0; i < nrows; i++) { - fprintf(fp,"%d",i+1); - for (j = 0; j < nvalues; j++) fprintf(fp,format,array_total[i][j]/norm); - fprintf(fp,"\n"); + if (yaml_flag) { + if (!yaml_header || overwrite) { + yaml_header = true; + fputs("keywords: [", fp); + for (auto k : keyword) fmt::print(fp, "'{}', ", k); + fputs("]\ndata:\n", fp); + } + fmt::print(fp, " {}:\n", ntimestep); + for (i = 0; i < nrows; i++) { + fputs(" - [", fp); + for (j = 0; j < nvalues; j++) fmt::print(fp,"{}, ",array_total[i][j]/norm); + fputs("]\n", fp); + } + } else { + fmt::print(fp,"{} {}\n",ntimestep,nrows); + for (i = 0; i < nrows; i++) { + fprintf(fp,"%d",i+1); + for (j = 0; j < nvalues; j++) fprintf(fp,format,array_total[i][j]/norm); + fprintf(fp,"\n"); + } } fflush(fp); if (overwrite) { @@ -994,6 +1025,34 @@ double FixAveTime::compute_array(int i, int j) return 0.0; } +/* ---------------------------------------------------------------------- + modify settings +------------------------------------------------------------------------- */ + +int FixAveTime::modify_param(int narg, char **arg) +{ + if (strcmp(arg[0], "colname") == 0) { + if (narg < 3) utils::missing_cmd_args(FLERR, "fix_modify colname", error); + int icol = -1; + if (utils::is_integer(arg[1])) { + icol = utils::inumeric(FLERR, arg[1], false, lmp); + if (icol < 0) icol = keyword.size() + icol + 1; + icol--; + } else { + try { + icol = key2col.at(arg[1]); + } catch (std::out_of_range &) { + icol = -1; + } + } + if ((icol < 0) || (icol >= (int) keyword.size())) + error->all(FLERR, "Thermo_modify colname column {} invalid", arg[1]); + keyword[icol] = arg[2]; + return 3; + } + return 0; +} + /* ---------------------------------------------------------------------- parse optional args ------------------------------------------------------------------------- */ @@ -1009,6 +1068,7 @@ void FixAveTime::options(int iarg, int narg, char **arg) noff = 0; offlist = nullptr; overwrite = 0; + yaml_flag = yaml_header = false; format_user = nullptr; format = (char *) " %g"; title1 = nullptr; @@ -1020,11 +1080,12 @@ void FixAveTime::options(int iarg, int narg, char **arg) while (iarg < narg) { if (strcmp(arg[iarg],"file") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal fix ave/time command"); + yaml_flag = utils::strmatch(arg[iarg+1],"\\.[yY][aA]?[mM][lL]$"); if (me == 0) { fp = fopen(arg[iarg+1],"w"); if (fp == nullptr) error->one(FLERR,"Cannot open fix ave/time file {}: {}", - arg[iarg+1], utils::getsyserror()); + arg[iarg+1], utils::getsyserror()); } iarg += 2; } else if (strcmp(arg[iarg],"ave") == 0) { diff --git a/src/fix_ave_time.h b/src/fix_ave_time.h index d2c7b4752b..55df82c1f7 100644 --- a/src/fix_ave_time.h +++ b/src/fix_ave_time.h @@ -22,6 +22,8 @@ FixStyle(ave/time,FixAveTime); #include "fix.h" +#include + namespace LAMMPS_NS { class FixAveTime : public Fix { @@ -32,6 +34,7 @@ class FixAveTime : public Fix { void init() override; void setup(int) override; void end_of_step() override; + int modify_param(int, char **) override; double compute_scalar() override; double compute_vector(int) override; double compute_array(int, int) override; @@ -48,6 +51,7 @@ class FixAveTime : public Fix { int any_variable_length; int all_variable_length; int lockforever; + bool yaml_flag, yaml_header; int ave, nwindow, startstep, mode; int noff, overwrite; @@ -56,6 +60,9 @@ class FixAveTime : public Fix { char *title1, *title2, *title3; bigint filepos; + std::map key2col; + std::vector keyword; + int norm, iwindow, window_limit; double *vector; double *vector_total; diff --git a/src/thermo.cpp b/src/thermo.cpp index 0a810662ca..257c2b78ce 100644 --- a/src/thermo.cpp +++ b/src/thermo.cpp @@ -335,10 +335,8 @@ void Thermo::header() hdr += fmt::format("{:^14} ", head); else if ((vtype[i] == INT) || (vtype[i] == BIGINT)) hdr += fmt::format("{:^11} ", head); - } else if (lineflag == YAMLLINE) { - hdr += keyword[i]; - hdr += ", "; - } + } else if (lineflag == YAMLLINE) + hdr += fmt::format("'{}', ", head); } if (lineflag == YAMLLINE) hdr += "]\ndata:"; diff --git a/unittest/utils/test_utils.cpp b/unittest/utils/test_utils.cpp index 5967ac087a..fac9767f62 100644 --- a/unittest/utils/test_utils.cpp +++ b/unittest/utils/test_utils.cpp @@ -519,6 +519,27 @@ TEST(Utils, strmatch_opt_char) ASSERT_FALSE(utils::strmatch("x_name", "^[cfvid]2?_name")); } +TEST(Utils, strmatch_yaml_suffix) +{ + ASSERT_TRUE(utils::strmatch("test.yaml", "\\.[yY][aA]?[mM][lL]$")); + ASSERT_TRUE(utils::strmatch("test.yml", "\\.[yY][aA]?[mM][lL]$")); + ASSERT_TRUE(utils::strmatch("TEST.YAML", "\\.[yY][aA]?[mM][lL]$")); + ASSERT_TRUE(utils::strmatch("TEST.YML", "\\.[yY][aA]?[mM][lL]$")); + ASSERT_FALSE(utils::strmatch("test.yamlx", "\\.[yY][aA]?[mM][lL]$")); + ASSERT_FALSE(utils::strmatch("test.ymlx", "\\.[yY][aA]?[mM][lL]$")); + ASSERT_FALSE(utils::strmatch("TEST.YAMLX", "\\.[yY][aA]?[mM][lL]$")); + ASSERT_FALSE(utils::strmatch("TEST.YMLX", "\\.[yY][aA]?[mM][lL]$")); + ASSERT_FALSE(utils::strmatch("testyaml", "\\.[yY][aA]?[mM][lL]$")); + ASSERT_FALSE(utils::strmatch("testyml", "\\.[yY][aA]?[mM][lL]$")); + ASSERT_FALSE(utils::strmatch("TESTYAML", "\\.[yY][aA]?[mM][lL]$")); + ASSERT_FALSE(utils::strmatch("TESTYML", "\\.[yY][aA]?[mM][lL]$")); + ASSERT_FALSE(utils::strmatch("yaml.test", "\\.[yY][aA]?[mM][lL]$")); + ASSERT_FALSE(utils::strmatch("yml.test", "\\.[yY][aA]?[mM][lL]$")); + ASSERT_FALSE(utils::strmatch("YAML.TEST", "\\.[yY][aA]?[mM][lL]$")); + ASSERT_FALSE(utils::strmatch("YML.TEST", "\\.[yY][aA]?[mM][lL]$")); + ASSERT_FALSE(utils::strmatch("test", "\\.[yY][aA]?[mM][lL]$")); +} + TEST(Utils, strmatch_dot) { ASSERT_TRUE(utils::strmatch("rigid", ".igid"));