From 977ba9ff66245b5173bc25a33d621e992ea8a681 Mon Sep 17 00:00:00 2001
From: Richard Berger <richard.berger@temple.edu>
Date: Thu, 25 Feb 2021 15:32:53 -0500
Subject: [PATCH 1/3] Add LogFile and AvgChunkFile readers

Implements changes proposed in #144
---
 python/lammps/formats.py          | 167 ++++++++++++++++++++++++++++++
 unittest/python/CMakeLists.txt    |   5 +
 unittest/python/python-formats.py |  91 ++++++++++++++++
 3 files changed, 263 insertions(+)
 create mode 100644 python/lammps/formats.py
 create mode 100644 unittest/python/python-formats.py

diff --git a/python/lammps/formats.py b/python/lammps/formats.py
new file mode 100644
index 0000000000..6cbff321a2
--- /dev/null
+++ b/python/lammps/formats.py
@@ -0,0 +1,167 @@
+# ----------------------------------------------------------------------
+#   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+#   http://lammps.sandia.gov, Sandia National Laboratories
+#   Steve Plimpton, sjplimp@sandia.gov
+#
+#   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+#   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+#   certain rights in this software.  This software is distributed under
+#   the GNU General Public License.
+#
+#   See the README file in the top-level LAMMPS directory.
+# -------------------------------------------------------------------------
+
+################################################################################
+# LAMMPS data formats
+# Written by Richard Berger <richard.berger@temple.edu>
+################################################################################
+
+import re
+
+class LogFile:
+  STYLE_DEFAULT = 0
+  STYLE_MULTI   = 1
+
+  def __init__(self, filename):
+    alpha = re.compile(r'[a-df-zA-DF-Z]') # except e or E for floating-point numbers
+    kvpairs = re.compile(r'([a-zA-Z_0-9]+)\s+=\s*([0-9\.eE\-]+)')
+    style = LogFile.STYLE_DEFAULT
+    self.runs = []
+    self.errors = []
+    with open(filename, 'rt') as f:
+        in_thermo = False
+        in_data_section = False
+        for line in f:
+            if "ERROR" in line or "exited on signal" in line:
+                self.errors.append(line)
+            elif line.startswith('Step '):
+                in_thermo = True
+                in_data_section = True
+                keys = line.split()
+                current_run = {}
+                for k in keys:
+                    current_run[k] = []
+            elif line.startswith('---------------- Step'):
+                if not in_thermo:
+                   current_run = {'Step': [], 'CPU': []}
+                in_thermo = True
+                in_data_section = True
+                style = LogFile.STYLE_MULTI
+                str_step, str_cpu = line.strip('-\n').split('-----')
+                step = float(str_step.split()[1])
+                cpu  = float(str_cpu.split('=')[1].split()[0])
+                current_run["Step"].append(step)
+                current_run["CPU"].append(cpu)
+            elif line.startswith('Loop time of'):
+                in_thermo = False
+                self.runs.append(current_run)
+            elif in_thermo and in_data_section:
+                if style == LogFile.STYLE_DEFAULT:
+                    if alpha.search(line):
+                        continue
+
+                    for k, v in zip(keys, map(float, line.split())):
+                        current_run[k].append(v)
+                elif style == LogFile.STYLE_MULTI:
+                    if '=' not in line:
+                        in_data_section = False
+                        continue
+
+                    for k,v in kvpairs.findall(line):
+                        if k not in current_run:
+                            current_run[k] = [float(v)]
+                        else:
+                            current_run[k].append(float(v))
+
+class AvgChunkFile:
+  def __init__(self, filename):
+    with open(filename, 'rt') as f:
+      timestep = None
+      chunks_read = 0
+
+      self.timesteps = []
+      self.total_count = []
+      self.chunks = []
+
+      for lineno, line in enumerate(f):
+        if lineno == 0:
+          if not line.startswith("# Chunk-averaged data for fix"):
+            raise Exception("Chunk data reader only supports default avg/chunk headers!")
+          parts = line.split()
+          self.fix_name = parts[5]
+          self.group_name = parts[8]
+          continue
+        elif lineno == 1:
+          if not line.startswith("# Timestep Number-of-chunks Total-count"):
+            raise Exception("Chunk data reader only supports default avg/chunk headers!")
+          continue
+        elif lineno == 2:
+          if not line.startswith("#"):
+            raise Exception("Chunk data reader only supports default avg/chunk headers!")
+          columns = line.split()[1:]
+          ndim = line.count("Coord")
+          compress = 'OrigID' in line
+          if ndim > 0:
+            coord_start = columns.index("Coord1")
+            coord_end   = columns.index("Coord%d" % ndim)
+            ncount_start = coord_end + 1
+            data_start = ncount_start + 1
+          else:
+            coord_start = None
+            coord_end = None
+            ncount_start = 2
+            data_start = 3
+          continue
+
+        parts = line.split()
+
+        if timestep is None:
+          timestep = int(parts[0])
+          num_chunks = int(parts[1])
+          total_count = float(parts[2])
+
+          self.timesteps.append(timestep)
+
+          for i in range(num_chunks):
+            self.chunks.append({
+              'coord' : [],
+              'ncount' : []
+            })
+        elif chunks_read < num_chunks:
+          chunk = int(parts[0])
+          ncount = float(parts[ncount_start])
+
+          if compress:
+            chunk_id = int(parts[1])
+          else:
+            chunk_id = chunk
+
+          current = self.chunks[chunk_id - 1]
+          current['id'] = chunk_id
+          current['ncount'].append(ncount)
+
+          if ndim > 0:
+            coord = tuple(map(float, parts[coord_start:coord_end+1]))
+            current['coord'].append(coord)
+
+          for i, data_column in list(enumerate(columns))[data_start:]:
+            value = float(parts[i])
+
+            if data_column in current:
+              current[data_column].append(value)
+            else:
+              current[data_column] = [value]
+
+          chunks_read += 1
+          assert (chunk == chunks_read)
+        else:
+          # do not support changing number of chunks
+          if not (num_chunks == int(parts[1])):
+            raise Exception("Currently, changing numbers of chunks are not supported.")
+
+          timestep = int(parts[0])
+          total_count = float(parts[2])
+          chunks_read = 0
+
+          self.timesteps.append(timestep)
+          self.total_count.append(total_count)
diff --git a/unittest/python/CMakeLists.txt b/unittest/python/CMakeLists.txt
index d5328b1cfe..d508602c93 100644
--- a/unittest/python/CMakeLists.txt
+++ b/unittest/python/CMakeLists.txt
@@ -79,6 +79,11 @@ if(Python_EXECUTABLE)
            COMMAND ${PYTHON_TEST_RUNNER} ${CMAKE_CURRENT_SOURCE_DIR}/python-pylammps.py -v
            WORKING_DIRECTORY ${EXECUTABLE_OUTPUT_PATH})
   set_tests_properties(PythonPyLammps PROPERTIES ENVIRONMENT "${PYTHON_TEST_ENVIRONMENT}")
+
+  add_test(NAME PythonFormats
+           COMMAND ${PYTHON_TEST_RUNNER} ${CMAKE_CURRENT_SOURCE_DIR}/python-formats.py -v
+           WORKING_DIRECTORY ${EXECUTABLE_OUTPUT_PATH})
+  set_tests_properties(PythonFormats PROPERTIES ENVIRONMENT "${PYTHON_TEST_ENVIRONMENT}")
 else()
   message(STATUS "Skipping Tests for the LAMMPS Python Module: no suitable Python interpreter")
 endif()
diff --git a/unittest/python/python-formats.py b/unittest/python/python-formats.py
new file mode 100644
index 0000000000..a5d4e84cfa
--- /dev/null
+++ b/unittest/python/python-formats.py
@@ -0,0 +1,91 @@
+import os
+import unittest
+from lammps.formats import LogFile, AvgChunkFile
+
+EXAMPLES_DIR=os.path.abspath(os.path.join(__file__, '..', '..', '..', 'examples'))
+
+DEFAULT_STYLE_EXAMPLE_LOG="melt/log.27Nov18.melt.g++.1"
+MULTI_STYLE_EXAMPLE_LOG="USER/fep/CC-CO/fep10/log.lammps"
+AVG_CHUNK_FILE="VISCOSITY/profile.13Oct16.nemd.2d.g++.1"
+
+class Logfiles(unittest.TestCase):
+    def testLogFileNotFound(self):
+        with self.assertRaises(FileNotFoundError):
+            LogFile('test.log')
+
+    def testDefaultLogFile(self):
+        log = LogFile(os.path.join(EXAMPLES_DIR, DEFAULT_STYLE_EXAMPLE_LOG))
+        self.assertEqual(len(log.runs), 1)
+        run = log.runs[0]
+        self.assertEqual(len(run.keys()), 6)
+        self.assertIn("Step", run)
+        self.assertIn("Temp", run)
+        self.assertIn("E_pair", run)
+        self.assertIn("E_mol", run)
+        self.assertIn("TotEng", run)
+        self.assertIn("Press", run)
+        self.assertEqual(len(run["Step"]), 6)
+        self.assertEqual(len(run["Temp"]), 6)
+        self.assertEqual(len(run["E_pair"]), 6)
+        self.assertEqual(len(run["E_mol"]), 6)
+        self.assertEqual(len(run["TotEng"]), 6)
+        self.assertEqual(len(run["Press"]), 6)
+        self.assertEqual(log.runs[0]["Step"], [0, 50, 100, 150, 200, 250])
+
+    def testMultiLogFile(self):
+        log = LogFile(os.path.join(EXAMPLES_DIR, MULTI_STYLE_EXAMPLE_LOG))
+        self.assertEqual(len(log.runs), 2)
+        run0 = log.runs[0]
+        run1 = log.runs[1]
+
+        self.assertEqual(len(run0.keys()), 15)
+        self.assertIn("Step", run0)
+        self.assertIn("CPU", run0)
+        self.assertIn("TotEng", run0)
+        self.assertIn("KinEng", run0)
+        self.assertIn("Temp", run0)
+        self.assertIn("PotEng", run0)
+        self.assertIn("E_bond", run0)
+        self.assertIn("E_angle", run0)
+        self.assertIn("E_dihed", run0)
+        self.assertIn("E_impro", run0)
+        self.assertIn("E_vdwl", run0)
+        self.assertIn("E_coul", run0)
+        self.assertIn("E_long", run0)
+        self.assertIn("Press", run0)
+        self.assertIn("Volume", run0)
+
+        for k in run0:
+            self.assertEqual(len(run0[k]), 51)
+
+        self.assertEqual(run0["Step"], list(range(0,255000, 5000)))
+
+
+class AvgChunkFiles(unittest.TestCase):
+    def testAvgChunkFileNotFound(self):
+        with self.assertRaises(FileNotFoundError):
+            AvgChunkFile('test.log')
+
+    def testRead(self):
+        cfile = AvgChunkFile(os.path.join(EXAMPLES_DIR, AVG_CHUNK_FILE))
+        self.assertEqual(cfile.fix_name, "4")
+        self.assertEqual(cfile.group_name, "all")
+        self.assertEqual(cfile.timesteps, list(range(10000, 110000, 5000)))
+
+        ntimesteps = len(cfile.timesteps)
+        nchunks = len(cfile.chunks)
+        self.assertEqual(nchunks, 20)
+
+        for i in range(1, nchunks+1):
+            chunk  = cfile.chunks[i-1];
+            self.assertEqual(chunk['id'], i)
+            self.assertEqual(len(chunk['coord']), ntimesteps)
+            self.assertEqual(len(chunk['ncount']), ntimesteps)
+            self.assertIn("vx", chunk)
+            self.assertEqual(len(chunk['vx']), ntimesteps)
+
+        self.assertEqual(len(chunk['coord'][0]), 1)
+
+
+if __name__ == "__main__":
+    unittest.main()

From 3d96d0a674204d03042121b1e4ee28bb8d1549dd Mon Sep 17 00:00:00 2001
From: Richard Berger <richard.berger@temple.edu>
Date: Thu, 25 Feb 2021 15:34:29 -0500
Subject: [PATCH 2/3] Fix wrong group name output in fix ave/chunk

---
 examples/VISCOSITY/profile.13Oct16.mp.2d.g++.1   |  2 +-
 examples/VISCOSITY/profile.13Oct16.nemd.2d.g++.1 |  2 +-
 examples/VISCOSITY/profile.13Oct16.wall.2d.g++.1 |  2 +-
 src/fix_ave_chunk.cpp                            | 15 ++++++++-------
 src/fix_ave_chunk.h                              |  2 +-
 5 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/examples/VISCOSITY/profile.13Oct16.mp.2d.g++.1 b/examples/VISCOSITY/profile.13Oct16.mp.2d.g++.1
index 64303827b5..a969bd5c0e 100644
--- a/examples/VISCOSITY/profile.13Oct16.mp.2d.g++.1
+++ b/examples/VISCOSITY/profile.13Oct16.mp.2d.g++.1
@@ -1,4 +1,4 @@
-# Chunk-averaged data for fix 5 and group file
+# Chunk-averaged data for fix 5 and group all
 # Timestep Number-of-chunks Total-count
 # Chunk Coord1 Ncount vx
 6000 20 800
diff --git a/examples/VISCOSITY/profile.13Oct16.nemd.2d.g++.1 b/examples/VISCOSITY/profile.13Oct16.nemd.2d.g++.1
index 463a549d5e..efa03b1192 100644
--- a/examples/VISCOSITY/profile.13Oct16.nemd.2d.g++.1
+++ b/examples/VISCOSITY/profile.13Oct16.nemd.2d.g++.1
@@ -1,4 +1,4 @@
-# Chunk-averaged data for fix 4 and group file
+# Chunk-averaged data for fix 4 and group all
 # Timestep Number-of-chunks Total-count
 # Chunk Coord1 Ncount vx
 10000 20 800
diff --git a/examples/VISCOSITY/profile.13Oct16.wall.2d.g++.1 b/examples/VISCOSITY/profile.13Oct16.wall.2d.g++.1
index 191496f0cd..06033fa883 100644
--- a/examples/VISCOSITY/profile.13Oct16.wall.2d.g++.1
+++ b/examples/VISCOSITY/profile.13Oct16.wall.2d.g++.1
@@ -1,4 +1,4 @@
-# Chunk-averaged data for fix 4 and group file
+# Chunk-averaged data for fix 4 and group all
 # Timestep Number-of-chunks Total-count
 # Chunk Coord1 Ncount vx
 10000 20 1020
diff --git a/src/fix_ave_chunk.cpp b/src/fix_ave_chunk.cpp
index d13d46fc20..3b612aeb73 100644
--- a/src/fix_ave_chunk.cpp
+++ b/src/fix_ave_chunk.cpp
@@ -15,6 +15,7 @@
 
 #include "arg_info.h"
 #include "atom.h"
+#include "comm.h"
 #include "compute.h"
 #include "compute_chunk_atom.h"
 #include "domain.h"
@@ -52,8 +53,6 @@ FixAveChunk::FixAveChunk(LAMMPS *lmp, int narg, char **arg) :
 {
   if (narg < 7) error->all(FLERR,"Illegal fix ave/chunk command");
 
-  MPI_Comm_rank(world,&me);
-
   nevery = utils::inumeric(FLERR,arg[3],false,lmp);
   nrepeat = utils::inumeric(FLERR,arg[4],false,lmp);
   nfreq = utils::inumeric(FLERR,arg[5],false,lmp);
@@ -65,6 +64,8 @@ FixAveChunk::FixAveChunk(LAMMPS *lmp, int narg, char **arg) :
   global_freq = nfreq;
   no_change_box = 1;
 
+  char * group = arg[1];
+
   // expand args if any have wildcard character "*"
 
   int expand = 0;
@@ -206,7 +207,7 @@ FixAveChunk::FixAveChunk(LAMMPS *lmp, int narg, char **arg) :
 
     } else if (strcmp(arg[iarg],"file") == 0) {
       if (iarg+2 > narg) error->all(FLERR,"Illegal fix ave/chunk command");
-      if (me == 0) {
+      if (comm->me == 0) {
         fp = fopen(arg[iarg+1],"w");
         if (fp == nullptr)
           error->one(FLERR,fmt::format("Cannot open fix ave/chunk file {}: {}",
@@ -328,11 +329,11 @@ FixAveChunk::FixAveChunk(LAMMPS *lmp, int narg, char **arg) :
 
   // print file comment lines
 
-  if (fp && me == 0) {
+  if (fp && comm->me == 0) {
     clearerr(fp);
     if (title1) fprintf(fp,"%s\n",title1);
     else fprintf(fp,"# Chunk-averaged data for fix %s and group %s\n",
-                 id,arg[1]);
+                 id, group);
     if (title2) fprintf(fp,"%s\n",title2);
     else fprintf(fp,"# Timestep Number-of-chunks Total-count\n");
     if (title3) fprintf(fp,"%s\n",title3);
@@ -423,7 +424,7 @@ FixAveChunk::~FixAveChunk()
   delete [] ids;
   delete [] value2index;
 
-  if (fp && me == 0) fclose(fp);
+  if (fp && comm->me == 0) fclose(fp);
 
   memory->destroy(varatom);
   memory->destroy(count_one);
@@ -949,7 +950,7 @@ void FixAveChunk::end_of_step()
 
   // output result to file
 
-  if (fp && me == 0) {
+  if (fp && comm->me == 0) {
     clearerr(fp);
     if (overwrite) fseek(fp,filepos,SEEK_SET);
     double count = 0.0;
diff --git a/src/fix_ave_chunk.h b/src/fix_ave_chunk.h
index dac5761ae8..debab13165 100644
--- a/src/fix_ave_chunk.h
+++ b/src/fix_ave_chunk.h
@@ -36,7 +36,7 @@ class FixAveChunk : public Fix {
   double memory_usage();
 
  private:
-  int me,nvalues;
+  int nvalues;
   int nrepeat,nfreq,irepeat;
   int normflag,scaleflag,overwrite,biasflag,colextra;
   bigint nvalid,nvalid_last;

From f92089298d869a7892beed62162685810c03284d Mon Sep 17 00:00:00 2001
From: Richard Berger <richard.berger@temple.edu>
Date: Thu, 25 Feb 2021 16:56:19 -0500
Subject: [PATCH 3/3] Fix bug in AvgChunkReader and add docs

---
 doc/src/Python_formats.rst        | 11 +++++++++++
 doc/src/Python_head.rst           |  1 +
 python/lammps/formats.py          | 25 ++++++++++++++++++++++++-
 unittest/python/python-formats.py |  2 ++
 4 files changed, 38 insertions(+), 1 deletion(-)
 create mode 100644 doc/src/Python_formats.rst

diff --git a/doc/src/Python_formats.rst b/doc/src/Python_formats.rst
new file mode 100644
index 0000000000..b9ffdba632
--- /dev/null
+++ b/doc/src/Python_formats.rst
@@ -0,0 +1,11 @@
+Output Readers
+==============
+
+.. py:module:: lammps.formats
+
+The Python package contains the :py:mod:`lammps.formats` module, which
+provides classes to post-process some of the output files generated by LAMMPS.
+
+.. automodule:: lammps.formats
+   :members:
+   :noindex:
diff --git a/doc/src/Python_head.rst b/doc/src/Python_head.rst
index 09071171ad..3e84ed4506 100644
--- a/doc/src/Python_head.rst
+++ b/doc/src/Python_head.rst
@@ -13,6 +13,7 @@ together.
    Python_module
    Python_ext
    Python_call
+   Python_formats
    Python_examples
    Python_error
    Python_trouble
diff --git a/python/lammps/formats.py b/python/lammps/formats.py
index 6cbff321a2..3ebc2e1bac 100644
--- a/python/lammps/formats.py
+++ b/python/lammps/formats.py
@@ -12,13 +12,25 @@
 # -------------------------------------------------------------------------
 
 ################################################################################
-# LAMMPS data formats
+# LAMMPS output formats
 # Written by Richard Berger <richard.berger@temple.edu>
 ################################################################################
 
 import re
 
 class LogFile:
+  """Reads LAMMPS log files and extracts the thermo information
+
+  It supports both the default thermo output style (including custom) and multi.
+
+  :param filename: path to log file
+  :type  filename: str
+
+  :ivar runs: List of LAMMPS runs in log file. Each run is a dictionary with
+              thermo fields as keys, storing the values over time
+  :ivar errors: List of error lines in log file
+  """
+
   STYLE_DEFAULT = 0
   STYLE_MULTI   = 1
 
@@ -73,7 +85,17 @@ class LogFile:
                         else:
                             current_run[k].append(float(v))
 
+
 class AvgChunkFile:
+  """Reads files generated by fix ave/chunk
+
+  :param filename: path to ave/chunk file
+  :type  filename: str
+
+  :ivar timesteps: List of timesteps stored in file
+  :ivar total_count: total count over time
+  :ivar chunks: List of chunks. Each chunk is a dictionary containing its ID, the coordinates, and the averaged quantities
+  """
   def __init__(self, filename):
     with open(filename, 'rt') as f:
       timestep = None
@@ -121,6 +143,7 @@ class AvgChunkFile:
           total_count = float(parts[2])
 
           self.timesteps.append(timestep)
+          self.total_count.append(total_count)
 
           for i in range(num_chunks):
             self.chunks.append({
diff --git a/unittest/python/python-formats.py b/unittest/python/python-formats.py
index a5d4e84cfa..ba68e50f4b 100644
--- a/unittest/python/python-formats.py
+++ b/unittest/python/python-formats.py
@@ -73,7 +73,9 @@ class AvgChunkFiles(unittest.TestCase):
         self.assertEqual(cfile.timesteps, list(range(10000, 110000, 5000)))
 
         ntimesteps = len(cfile.timesteps)
+        ntotal_count = len(cfile.total_count)
         nchunks = len(cfile.chunks)
+        self.assertEqual(ntimesteps, ntotal_count)
         self.assertEqual(nchunks, 20)
 
         for i in range(1, nchunks+1):