ENH: optionally eliminate duplicates on hashedWordList construction (issue #375)

- makes it easier to use as a wordHashSet replacement for situations
  where we want to avoid duplicates but retain the input order.

- support construction from HashTable, which means it works like the
  HashTable::sortedToc but with its own hashing for these keys.

- expose rehash() method for the user. There is normally no need for
  using it directly, but also no reason to lock it away as private.
This commit is contained in:
Mark Olesen
2017-01-10 12:42:40 +01:00
parent 3eb706a380
commit 934764d700
5 changed files with 247 additions and 139 deletions

View File

@ -42,7 +42,7 @@ int main(int argc, char *argv[])
"def",
"ghi"
};
words = { "def", "ghi", "xy", "all", "begin", "all" };
words = { "def", "ghi", "xy", "all", "end", "all" };
wordHashSet setA
{
@ -84,6 +84,26 @@ int main(int argc, char *argv[])
Info<< "hashedWordList: " << words << nl
<< "with lookup: " << words.lookup() << endl;
{
List<word> input = { "def", "ghi", "xy", "all", "end", "all", "def" };
hashedWordList words1(input, true);
Info<< "input word list: " << input << nl
<< "without dup: " << words1 << endl;
Info<< "from wordHashSet: " << hashedWordList(setA) << endl;
Info<< "from HashTable: " << hashedWordList(tableA) << endl;
Info<< "from HashTable: " << hashedWordList(tableB) << endl;
// even this works
Info<< "from hashSet: "
<< hashedWordList
(
wordHashSet(setA)
| wordHashSet(tableA) | wordHashSet(tableB)
) << endl;
}
Info<< "wordHashSet: " << setA << endl;
Info<< "Table-HashSet: " << tableA << endl;
Info<< "Map<label>: " << mapA << endl;