From db46521d64d7b4dc1e9780000e7e62703f845b11 Mon Sep 17 00:00:00 2001
From: Richard Berger <richard.berger@temple.edu>
Date: Fri, 15 May 2020 15:46:38 -0400
Subject: [PATCH] Add utils::count_words and utils::trim_comment

---
 src/utils.cpp                 | 22 ++++++++++++++++++++++
 src/utils.h                   | 16 ++++++++++++++++
 unittest/utils/CMakeLists.txt |  4 ++++
 unittest/utils/test_utils.cpp | 16 ++++++++++++++++
 4 files changed, 58 insertions(+)
 create mode 100644 unittest/utils/test_utils.cpp
diff --git a/src/utils.cpp b/src/utils.cpp
index 4375b5d9c9..367f1a01db 100644
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -16,6 +16,7 @@
 #include <cstdlib>
 #include "lammps.h"
 #include "error.h"
+#include "tokenizer.h"
 
 #if defined(__linux__)
 #include <unistd.h>  // for readlink
@@ -326,6 +327,26 @@ tagint utils::tnumeric(const char *file, int line, const char *str,
   return ATOTAGINT(str);
 }
 
+/* ----------------------------------------------------------------------
+   Return string without trailing # comment
+------------------------------------------------------------------------- */
+
+std::string utils::trim_comment(const std::string & line) {
+  auto end = line.find_first_of("#");
+  if (end != std::string::npos) {
+    return line.substr(0, end);
+  }
+  return std::string(line);
+}
+
+/* ----------------------------------------------------------------------
+   Trim comment from string and return number of words
+------------------------------------------------------------------------- */
+
+size_t utils::count_words(const std::string & text, const std::string & seperators) {
+  Tokenizer words(utils::trim_comment(text), seperators);
+  return words.count();
+}
 
 /* ------------------------------------------------------------------ */
 
@@ -668,4 +689,5 @@ extern "C" {
 
     return 0;
   }
+
 }
diff --git a/src/utils.h b/src/utils.h
index 45fb95aaac..4698035130 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -126,6 +126,22 @@ namespace LAMMPS_NS {
      */
     tagint tnumeric(const char *file, int line, const char *str,
                     bool do_abort, LAMMPS *lmp);
+   
+
+    /**
+     * \brief Trim anything from '#' onward
+     * \param line string that should be trimmed
+     * \return new string without comment (string)
+     */
+    std::string trim_comment(const std::string & line);
+
+    /**
+     * \brief Count words in a single line, trim anything from '#' onward
+     * \param text string that should be trimmed and searched
+     * \param seperators string containing characters that will be treated as whitespace
+     * \return number of words found
+     */
+    size_t count_words(const std::string & text, const std::string & seperators = " \t\r\n\f");
   }
 }
 
diff --git a/unittest/utils/CMakeLists.txt b/unittest/utils/CMakeLists.txt
index 1185a36345..f89878bfe1 100644
--- a/unittest/utils/CMakeLists.txt
+++ b/unittest/utils/CMakeLists.txt
@@ -1,3 +1,7 @@
 add_executable(test_tokenizer test_tokenizer.cpp)
 target_link_libraries(test_tokenizer PRIVATE lammps GTest::GMockMain GTest::GMock GTest::GTest)
 add_test(Tokenizer test_tokenizer)
+
+add_executable(test_utils test_utils.cpp)
+target_link_libraries(test_utils PRIVATE lammps GTest::GMockMain GTest::GMock GTest::GTest)
+add_test(Utils test_utils)
\ No newline at end of file
diff --git a/unittest/utils/test_utils.cpp b/unittest/utils/test_utils.cpp
new file mode 100644
index 0000000000..761a823ba7
--- /dev/null
+++ b/unittest/utils/test_utils.cpp
@@ -0,0 +1,16 @@
+#include <gtest/gtest.h>
+#include <gmock/gmock.h>
+#include "utils.h"
+#include <string>
+
+using namespace LAMMPS_NS;
+using ::testing::Eq;
+
+TEST(Utils, trim_comment) {
+    auto trimmed = utils::trim_comment("some text # comment");
+    ASSERT_THAT(trimmed, Eq("some text "));
+}
+
+TEST(Utils, count_words) {
+    ASSERT_EQ(utils::count_words("some text # comment"), 2);
+}
\ No newline at end of file