From 9291d2a9d7a88a3f4b78c8647886a90e38bb40bc Mon Sep 17 00:00:00 2001
From: Richard Berger <richard.berger@temple.edu>
Date: Thu, 4 Jun 2020 10:35:36 -0400
Subject: [PATCH] Simplify count_words, add trim_and_count_words

The original count_words function (before it was put into utils::) also trimmed
comments. For compatibility this behaviour was retained at first. However, due
to the name the trimming is not immediatly apparent and many times not
wanted.

Therefore, this commit replaces count_words with an implementation that
just does what it says. If a comment should be trimmed there is a
trim_comment function. For convenience, a trim_and_count_words function was
added and is now used where the old behaviour was needed.
---
 src/RIGID/fix_rigid_small.cpp |  2 +-
 src/SNAP/pair_snap.cpp        |  4 ++--
 src/SPIN/neb_spin.cpp         |  2 +-
 src/atom.cpp                  |  6 +++---
 src/fix_property_atom.cpp     |  2 +-
 src/fix_tmd.cpp               |  4 ++--
 src/molecule.cpp              |  2 +-
 src/read_data.cpp             |  4 ++--
 src/reader_native.cpp         |  2 +-
 src/text_file_reader.cpp      |  2 +-
 src/thermo.cpp                |  2 +-
 src/utils.cpp                 | 12 ++++++++++--
 src/utils.h                   | 10 +++++++++-
 unittest/utils/test_utils.cpp |  6 +++++-
 14 files changed, 40 insertions(+), 20 deletions(-)

diff --git a/src/RIGID/fix_rigid_small.cpp b/src/RIGID/fix_rigid_small.cpp
index 89fa3add4b..d3a2491048 100644
--- a/src/RIGID/fix_rigid_small.cpp
+++ b/src/RIGID/fix_rigid_small.cpp
@@ -2489,7 +2489,7 @@ void FixRigidSmall::readfile(int which, double **array, int *inbody)
     buf = buffer;
     next = strchr(buf,'\n');
     *next = '\0';
-    int nwords = utils::count_words(buf);
+    int nwords = utils::trim_and_count_words(buf);
     *next = '\n';
 
     if (nwords != ATTRIBUTE_PERBODY)
diff --git a/src/SNAP/pair_snap.cpp b/src/SNAP/pair_snap.cpp
index 7108e62142..389fde28a0 100644
--- a/src/SNAP/pair_snap.cpp
+++ b/src/SNAP/pair_snap.cpp
@@ -568,7 +568,7 @@ void PairSNAP::read_files(char *coefffilename, char *paramfilename)
     MPI_Bcast(&n,1,MPI_INT,0,world);
     MPI_Bcast(line,n,MPI_CHAR,0,world);
 
-    nwords = utils::count_words(line);
+    nwords = utils::trim_and_count_words(line);
     if (nwords != 3)
       error->all(FLERR,"Incorrect format in SNAP coefficient file");
 
@@ -610,7 +610,7 @@ void PairSNAP::read_files(char *coefffilename, char *paramfilename)
       MPI_Bcast(&n,1,MPI_INT,0,world);
       MPI_Bcast(line,n,MPI_CHAR,0,world);
 
-      nwords = utils::count_words(line);
+      nwords = utils::trim_and_count_words(line);
       if (nwords != 1)
         error->all(FLERR,"Incorrect format in SNAP coefficient file");
 
diff --git a/src/SPIN/neb_spin.cpp b/src/SPIN/neb_spin.cpp
index 5d58a22567..168e288181 100644
--- a/src/SPIN/neb_spin.cpp
+++ b/src/SPIN/neb_spin.cpp
@@ -441,7 +441,7 @@ void NEBSpin::readfile(char *file, int flag)
     buf = buffer;
     next = strchr(buf,'\n');
     *next = '\0';
-    int nwords = utils::count_words(buf);
+    int nwords = utils::trim_and_count_words(buf);
     *next = '\n';
 
     if (nwords != ATTRIBUTE_PERLINE)
diff --git a/src/atom.cpp b/src/atom.cpp
index 134def74a3..5ba3cb23ff 100644
--- a/src/atom.cpp
+++ b/src/atom.cpp
@@ -1089,7 +1089,7 @@ void Atom::data_atoms(int n, char *buf, tagint id_offset, tagint mol_offset,
 
   next = strchr(buf,'\n');
   *next = '\0';
-  int nwords = utils::count_words(buf);
+  int nwords = utils::trim_and_count_words(buf);
   *next = '\n';
 
   if (nwords != avec->size_data_atom && nwords != avec->size_data_atom + 3)
@@ -1239,7 +1239,7 @@ void Atom::data_vels(int n, char *buf, tagint id_offset)
 
   next = strchr(buf,'\n');
   *next = '\0';
-  int nwords = utils::count_words(buf);
+  int nwords = utils::trim_and_count_words(buf);
   *next = '\n';
 
   if (nwords != avec->size_data_vel)
@@ -1591,7 +1591,7 @@ void Atom::data_bonus(int n, char *buf, AtomVec *avec_bonus, tagint id_offset)
 
   next = strchr(buf,'\n');
   *next = '\0';
-  int nwords = utils::count_words(buf);
+  int nwords = utils::trim_and_count_words(buf);
   *next = '\n';
 
   if (nwords != avec_bonus->size_data_bonus)
diff --git a/src/fix_property_atom.cpp b/src/fix_property_atom.cpp
index 5fb17ca636..372156b5c5 100644
--- a/src/fix_property_atom.cpp
+++ b/src/fix_property_atom.cpp
@@ -218,7 +218,7 @@ void FixPropertyAtom::read_data_section(char *keyword, int n, char *buf,
 
   next = strchr(buf,'\n');
   *next = '\0';
-  int nwords = utils::count_words(buf);
+  int nwords = utils::trim_and_count_words(buf);
   *next = '\n';
 
   if (nwords != nvalue+1) {
diff --git a/src/fix_tmd.cpp b/src/fix_tmd.cpp
index 182da8ecaa..00fdfb41ad 100644
--- a/src/fix_tmd.cpp
+++ b/src/fix_tmd.cpp
@@ -447,12 +447,12 @@ void FixTMD::readfile(char *file)
           zprd = hi - lo;
           bufptr = next + 1;
           continue;
-        } else if (utils::count_words(bufptr) == 4) {
+        } else if (utils::trim_and_count_words(bufptr) == 4) {
           if (xprd >= 0.0 || yprd >= 0.0 || zprd >= 0.0)
             error->all(FLERR,"Incorrect format in TMD target file");
           imageflag = 0;
           firstline = 0;
-        } else if (utils::count_words(bufptr) == 7) {
+        } else if (utils::trim_and_count_words(bufptr) == 7) {
           if (xprd < 0.0 || yprd < 0.0 || zprd < 0.0)
             error->all(FLERR,"Incorrect format in TMD target file");
           imageflag = 1;
diff --git a/src/molecule.cpp b/src/molecule.cpp
index 1f0a9e1601..33d14bfebd 100644
--- a/src/molecule.cpp
+++ b/src/molecule.cpp
@@ -1401,7 +1401,7 @@ void Molecule::body(int flag, int pflag, char *line)
   while (nword < nparam) {
     readline(line);
 
-    ncount = utils::count_words(line);
+    ncount = utils::trim_and_count_words(line);
     if (ncount == 0)
       error->one(FLERR,"Too few values in body section of molecule file");
     if (nword+ncount > nparam)
diff --git a/src/read_data.cpp b/src/read_data.cpp
index edeb6398b8..fdacac8148 100644
--- a/src/read_data.cpp
+++ b/src/read_data.cpp
@@ -1734,7 +1734,7 @@ void ReadData::bodies(int firstpass, AtomVec *ptr)
         while (nword < ninteger) {
           eof = fgets(&buffer[m],MAXLINE,fp);
           if (eof == NULL) error->one(FLERR,"Unexpected end of data file");
-          ncount = utils::count_words(&buffer[m]);
+          ncount = utils::trim_and_count_words(&buffer[m]);
           if (ncount == 0)
             error->one(FLERR,"Too few values in body lines in data file");
           nword += ncount;
@@ -1748,7 +1748,7 @@ void ReadData::bodies(int firstpass, AtomVec *ptr)
         while (nword < ndouble) {
           eof = fgets(&buffer[m],MAXLINE,fp);
           if (eof == NULL) error->one(FLERR,"Unexpected end of data file");
-          ncount = utils::count_words(&buffer[m]);
+          ncount = utils::trim_and_count_words(&buffer[m]);
           if (ncount == 0)
             error->one(FLERR,"Too few values in body lines in data file");
           nword += ncount;
diff --git a/src/reader_native.cpp b/src/reader_native.cpp
index 6cb8096d9c..9f7caa01da 100644
--- a/src/reader_native.cpp
+++ b/src/reader_native.cpp
@@ -162,7 +162,7 @@ bigint ReaderNative::read_header(double box[3][3], int &boxinfo, int &triclinic,
 
   char *labelline = &line[strlen("ITEM: ATOMS ")];
 
-  nwords = utils::count_words(labelline);
+  nwords = utils::trim_and_count_words(labelline);
   char **labels = new char*[nwords];
   labels[0] = strtok(labelline," \t\n\r\f");
   if (labels[0] == NULL) {
diff --git a/src/text_file_reader.cpp b/src/text_file_reader.cpp
index c5913d8c51..7d9bfa1d03 100644
--- a/src/text_file_reader.cpp
+++ b/src/text_file_reader.cpp
@@ -29,7 +29,7 @@
 using namespace LAMMPS_NS;
 
 TextFileReader::TextFileReader(const std::string &filename, const std::string &filetype)
-  : filename(filename), filetype(filetype)
+  : filename(filename), filetype(filetype), ignore_comments(true)
 {
   fp = fopen(filename.c_str(), "r");
 
diff --git a/src/thermo.cpp b/src/thermo.cpp
index 73c915da4a..fcc61be503 100644
--- a/src/thermo.cpp
+++ b/src/thermo.cpp
@@ -160,7 +160,7 @@ Thermo::Thermo(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
   // allocate per-field memory
   // process line of keywords
 
-  nfield_initial = utils::count_words(line);
+  nfield_initial = utils::trim_and_count_words(line);
   allocate();
   parse_fields(line);
 
diff --git a/src/utils.cpp b/src/utils.cpp
index acdec2b77b..a423586366 100644
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -355,14 +355,22 @@ std::string utils::trim_comment(const std::string & line) {
 }
 
 /* ----------------------------------------------------------------------
-   Trim comment from string and return number of words
+   Return number of words
 ------------------------------------------------------------------------- */
 
 size_t utils::count_words(const std::string & text, const std::string & seperators) {
-  Tokenizer words(utils::trim_comment(text), seperators);
+  ValueTokenizer words(text, seperators);
   return words.count();
 }
 
+/* ----------------------------------------------------------------------
+   Trim comment from string and return number of words
+------------------------------------------------------------------------- */
+
+size_t utils::trim_and_count_words(const std::string & text, const std::string & seperators) {
+  return utils::count_words(utils::trim_comment(text), seperators);
+}
+
 /* ----------------------------------------------------------------------
    Return whether string is a valid integer number
 ------------------------------------------------------------------------- */
diff --git a/src/utils.h b/src/utils.h
index 601ec1a031..977d63a02c 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -142,13 +142,21 @@ namespace LAMMPS_NS {
      */
     std::string trim_comment(const std::string & line);
 
+    /**
+     * \brief Count words in string
+     * \param text string that should be searched
+     * \param seperators string containing characters that will be treated as whitespace
+     * \return number of words found
+     */
+    size_t count_words(const std::string & text, const std::string & seperators = " \t\r\n\f");
+
     /**
      * \brief Count words in a single line, trim anything from '#' onward
      * \param text string that should be trimmed and searched
      * \param seperators string containing characters that will be treated as whitespace
      * \return number of words found
      */
-    size_t count_words(const std::string & text, const std::string & seperators = " \t\r\n\f");
+    size_t trim_and_count_words(const std::string & text, const std::string & seperators = " \t\r\n\f");
 
     /**
      * \brief Check if string can be converted to valid integer
diff --git a/unittest/utils/test_utils.cpp b/unittest/utils/test_utils.cpp
index 3b83649465..e1c458a173 100644
--- a/unittest/utils/test_utils.cpp
+++ b/unittest/utils/test_utils.cpp
@@ -25,7 +25,11 @@ TEST(Utils, trim_comment) {
 }
 
 TEST(Utils, count_words) {
-    ASSERT_EQ(utils::count_words("some text # comment"), 2);
+    ASSERT_EQ(utils::count_words("some text # comment"), 4);
+}
+
+TEST(Utils, trim_and_count_words) {
+    ASSERT_EQ(utils::trim_and_count_words("some text # comment"), 2);
 }
 
 TEST(Utils, valid_integer1) {