From be813764262053400b71dafe07c497341cb2303e Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 25 Feb 2021 14:45:17 -0500 Subject: [PATCH 1/4] use crc32 checksum instead of pointer addresses as unique id for citations --- src/citeme.cpp | 35 ++++++++++++++++++++++++++++++----- src/citeme.h | 6 +++--- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/src/citeme.cpp b/src/citeme.cpp index 41ac87f5bb..1a654bad6f 100644 --- a/src/citeme.cpp +++ b/src/citeme.cpp @@ -15,6 +15,8 @@ #include "comm.h" #include "universe.h" +#include + using namespace LAMMPS_NS; static const char cite_separator[] = @@ -26,6 +28,24 @@ static const char cite_nagline[] = static const char cite_file[] = "The {} {} lists these citations in " "BibTeX format.\n\n"; +// for crc32 checksums +static uint32_t crc32_table[0x100]; +static uint32_t crc32_for_byte(uint32_t r) +{ + for(int j = 0; j < 8; ++j) + r = (r & 1? 0: (uint32_t)0xEDB88320L) ^ r >> 1; + return r ^ (uint32_t)0xFF000000L; +} + +// compute crc32 for string +static unsigned int get_crc32(const std::string &text) +{ + uint32_t crc = 0; + for (auto c : text) + crc = crc32_table[(uint8_t)crc ^ (uint8_t)c] ^ crc >> 8; + return crc; +} + /* ---------------------------------------------------------------------- */ CiteMe::CiteMe(LAMMPS *lmp, int _screen, int _logfile, const char *_file) @@ -34,6 +54,10 @@ CiteMe::CiteMe(LAMMPS *lmp, int _screen, int _logfile, const char *_file) fp = nullptr; cs = new citeset(); + // fill crc32 table + for(size_t i = 0; i < 0x100; ++i) + crc32_table[i] = crc32_for_byte(i); + screen_flag = _screen; scrbuffer.clear(); logfile_flag = _logfile; @@ -68,14 +92,16 @@ CiteMe::~CiteMe() process an added citation so it will be shown only once and as requested ------------------------------------------------------------------------- */ -void CiteMe::add(const char *ref) +void CiteMe::add(const std::string &reference) { if (comm->me != 0) return; - if (cs->find(ref) != cs->end()) return; - cs->insert(ref); + + unsigned int crc = get_crc32(reference); + if (cs->find(crc) != cs->end()) return; + cs->insert(crc); if (fp) { - fputs(ref,fp); + fputs(reference.c_str(),fp); fflush(fp); } @@ -93,7 +119,6 @@ void CiteMe::add(const char *ref) if (logfile_flag == VERBOSE) logbuffer += "\n"; } - std::string reference = ref; std::size_t found = reference.find_first_of("\n"); std::string header = reference.substr(0,found+1); if (screen_flag == VERBOSE) scrbuffer += "- " + reference; diff --git a/src/citeme.h b/src/citeme.h index dd54268a3b..10fa055b2c 100644 --- a/src/citeme.h +++ b/src/citeme.h @@ -23,8 +23,8 @@ class CiteMe : protected Pointers { public: CiteMe(class LAMMPS *, int, int, const char *); virtual ~CiteMe(); - void add(const char *); // register publication for output - void flush(); // flush buffers to screen and logfile + void add(const std::string &); // register publication for output + void flush(); // flush buffers to screen and logfile enum {VERBOSE, TERSE}; private: @@ -34,7 +34,7 @@ class CiteMe : protected Pointers { int logfile_flag; // determine whether verbose or terse output std::string scrbuffer; // output buffer for screen std::string logbuffer; // output buffer for logfile - typedef std::set citeset; + typedef std::set citeset; citeset *cs; // registered set of publications }; } From 69245cb294e4450bac451e3406a8b6a5b405c32b Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 25 Feb 2021 15:52:38 -0500 Subject: [PATCH 2/4] add unit tests for citeme class crc32 checks of unique citations --- unittest/commands/test_simple_commands.cpp | 46 ++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/unittest/commands/test_simple_commands.cpp b/unittest/commands/test_simple_commands.cpp index 644f7071ad..4fce58c668 100644 --- a/unittest/commands/test_simple_commands.cpp +++ b/unittest/commands/test_simple_commands.cpp @@ -13,6 +13,7 @@ #include "lammps.h" +#include "citeme.h" #include "force.h" #include "info.h" #include "input.h" @@ -368,6 +369,51 @@ TEST_F(SimpleCommandsTest, Shell) ASSERT_THAT(other_var, StrEq("2")); } +TEST_F(SimpleCommandsTest, CiteMe) +{ + ASSERT_EQ(lmp->citeme, nullptr); + + lmp->citeme = new LAMMPS_NS::CiteMe(lmp, CiteMe::TERSE, CiteMe::TERSE, nullptr); + + ::testing::internal::CaptureStdout(); + lmp->citeme->add("test citation one:\n 1\n"); + lmp->citeme->add("test citation two:\n 2\n"); + lmp->citeme->add("test citation one:\n 1\n"); + lmp->citeme->flush(); + std::string text = ::testing::internal::GetCapturedStdout(); + if (verbose) std::cout << text; + + // find the two unique citations, but not the third + ASSERT_THAT(text, MatchesRegex(".*one.*two.*")); + ASSERT_THAT(text, Not(MatchesRegex(".*one.*two.*one.*"))); + + ::testing::internal::CaptureStdout(); + lmp->citeme->add("test citation one:\n 0\n"); + lmp->citeme->add("test citation two:\n 2\n"); + lmp->citeme->add("test citation three:\n 3\n"); + lmp->citeme->flush(); + + text = ::testing::internal::GetCapturedStdout(); + if (verbose) std::cout << text; + + // find the forth (only differs in long citation) and sixth added citation + ASSERT_THAT(text, MatchesRegex(".*one.*three.*")); + ASSERT_THAT(text, Not(MatchesRegex(".*two.*"))); + + ::testing::internal::CaptureStdout(); + lmp->citeme->add("test citation one:\n 1\n"); + lmp->citeme->add("test citation two:\n 2\n"); + lmp->citeme->add("test citation one:\n 0\n"); + lmp->citeme->add("test citation two:\n 2\n"); + lmp->citeme->add("test citation three:\n 3\n"); + lmp->citeme->flush(); + + text = ::testing::internal::GetCapturedStdout(); + if (verbose) std::cout << text; + + // no new citation. no CITE-CITE-CITE- lines + ASSERT_THAT(text, Not(MatchesRegex(".*CITE-CITE-CITE-CITE.*"))); +} } // namespace LAMMPS_NS int main(int argc, char **argv) From 37086c391fa9572a987615e8c70938f56f946791 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 25 Feb 2021 18:15:09 -0500 Subject: [PATCH 3/4] use std::hash instead of a local crc32 implementation. --- src/citeme.cpp | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/src/citeme.cpp b/src/citeme.cpp index 1a654bad6f..0ffdf02ddb 100644 --- a/src/citeme.cpp +++ b/src/citeme.cpp @@ -15,7 +15,7 @@ #include "comm.h" #include "universe.h" -#include +#include using namespace LAMMPS_NS; @@ -28,23 +28,8 @@ static const char cite_nagline[] = static const char cite_file[] = "The {} {} lists these citations in " "BibTeX format.\n\n"; -// for crc32 checksums -static uint32_t crc32_table[0x100]; -static uint32_t crc32_for_byte(uint32_t r) -{ - for(int j = 0; j < 8; ++j) - r = (r & 1? 0: (uint32_t)0xEDB88320L) ^ r >> 1; - return r ^ (uint32_t)0xFF000000L; -} - -// compute crc32 for string -static unsigned int get_crc32(const std::string &text) -{ - uint32_t crc = 0; - for (auto c : text) - crc = crc32_table[(uint8_t)crc ^ (uint8_t)c] ^ crc >> 8; - return crc; -} +// define hash function +static std::hash get_hash; /* ---------------------------------------------------------------------- */ @@ -54,10 +39,6 @@ CiteMe::CiteMe(LAMMPS *lmp, int _screen, int _logfile, const char *_file) fp = nullptr; cs = new citeset(); - // fill crc32 table - for(size_t i = 0; i < 0x100; ++i) - crc32_table[i] = crc32_for_byte(i); - screen_flag = _screen; scrbuffer.clear(); logfile_flag = _logfile; @@ -96,7 +77,7 @@ void CiteMe::add(const std::string &reference) { if (comm->me != 0) return; - unsigned int crc = get_crc32(reference); + unsigned int crc = get_hash(reference); if (cs->find(crc) != cs->end()) return; cs->insert(crc); From e7c829e31e70f582fc469057f474b60b2deb5668 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 26 Feb 2021 09:49:06 -0500 Subject: [PATCH 4/4] use correct data type for storing hashes --- src/citeme.cpp | 2 +- src/citeme.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/citeme.cpp b/src/citeme.cpp index 0ffdf02ddb..b8a1d656d9 100644 --- a/src/citeme.cpp +++ b/src/citeme.cpp @@ -77,7 +77,7 @@ void CiteMe::add(const std::string &reference) { if (comm->me != 0) return; - unsigned int crc = get_hash(reference); + std::size_t crc = get_hash(reference); if (cs->find(crc) != cs->end()) return; cs->insert(crc); diff --git a/src/citeme.h b/src/citeme.h index 10fa055b2c..df87f1f9e5 100644 --- a/src/citeme.h +++ b/src/citeme.h @@ -34,7 +34,7 @@ class CiteMe : protected Pointers { int logfile_flag; // determine whether verbose or terse output std::string scrbuffer; // output buffer for screen std::string logbuffer; // output buffer for logfile - typedef std::set citeset; + typedef std::set citeset; citeset *cs; // registered set of publications }; }