ENH: added HashTable count, filter and generalized toc methods

- Generalized means over filtering table entries based on their keys,
  values, or both.  Either filter (retain), or optionally prune elements
  that satisfy the specified predicate.

  filterKeys and filterValues:
  - Take a unary predicate with the signature

        bool operator()(const Key& k);

  - filterEntries:
    Takes a binary predicate with the signature

        bool operator()(const Key& k, const T& v);

==

  The predicates can be normal class methods, or provide on-the-fly
  using a C++ lambda. For example,

      wordRes goodFields = ...;
      allFieldNames.filterKeys
      (
          [&goodFields](const word& k){ return goodFields.match(k); }
      );

  Note that all classes that can match a string (eg, regExp, keyType,
  wordRe, wordRes) or that are derived from a Foam::string (eg, fileName,
  word) are provided with a corresponding

      bool operator()(const std::string&)

  that either performs a regular expression or a literal match.
  This allows such objects to be used directly as a unary predicate
  when filtering any string hash keys.

  Note that HashSet and hashedWordList both have the proper
  operator() methods that also allow them to be used as a unary
  predicate.

- Similar predicate selection with the following:
    * tocKeys, tocValues, tocEntries
    * countKeys, countValues, countEntries

  except that instead of pruning, there is a simple logic inversion.
This commit is contained in:
Mark Olesen
2017-05-17 10:18:14 +02:00
parent 8d018e7950
commit cf889306d0
8 changed files with 525 additions and 42 deletions

View File

@ -234,6 +234,97 @@ int main()
Info<<"\ntable1: " << table1 << endl;
// Start again
HashTable<scalar> table1start
{
{"aaa", 1.0},
{"aba", 2.0},
{"a_ca", 3.0},
{"ada", 4.0},
{"aeq_", 5.0},
{"aaw", 6.0},
{"abs", 7.0},
{"a_cr", 8.0},
{"adx", 9.0},
{"ae_c", 10.0}
};
table1 = table1start;
Info<< "\ntable has keys: "
<< flatOutput(table1.sortedToc()) << nl;
wordRe matcher(".*_.*", wordRe::REGEX);
table1.filterKeys
(
[&matcher](const word& k){ return matcher.match(k); }
);
Info<< "retain things matching " << matcher << " => "
<< flatOutput(table1.sortedToc()) << nl;
table1 = table1start;
table1.filterKeys
(
[&matcher](const word& k){ return matcher.match(k); },
true
);
Info<< "prune things matching " << matcher << " => "
<< flatOutput(table1.sortedToc()) << nl;
// Same, without a lambda
table1 = table1start;
table1.filterKeys(matcher, true);
Info<< "prune things matching " << matcher << " => "
<< flatOutput(table1.sortedToc()) << nl;
// Same idea, but inverted logic inside the lambda
table1 = table1start;
table1.filterKeys
(
[&matcher](const word& k){ return !matcher.match(k); },
true
);
Info<< "prune things matching " << matcher << " => "
<< flatOutput(table1.sortedToc()) << nl;
table1 = table1start;
Info<< "\ntable:" << table1 << nl;
table1.filterValues
(
[](const scalar& v){ return (v >= 5); }
);
Info<< "\ntable with values >= 5:" << table1 << nl;
table1 = table1start;
Info<< "\ntable:" << table1 << nl;
table1.filterEntries
(
[&matcher](const word& k, const scalar& v)
{
return matcher(k) && (v >= 5);
}
);
Info<< "\ntable with values >= 5 and matching " << matcher
<< table1 << nl;
table1 = table1start;
Info<< "\ntable:" << table1 << nl;
Info<< "has "
<< table1.countValues([](const scalar& v) { return v >= 7; })
<< " values >= 7 with these keys: "
<< table1.tocValues([](const scalar& v) { return v >= 7; })
<< nl;
Info<< "\nDone\n";
return 0;

View File

@ -158,6 +158,20 @@ Foam::label Foam::HashSet<Key, Hash>::insert(std::initializer_list<Key> lst)
// * * * * * * * * * * * * * * * Member Operators * * * * * * * * * * * * * //
template<class Key, class Hash>
inline bool Foam::HashSet<Key, Hash>::operator()(const Key& key) const
{
return this->found(key);
}
template<class Key, class Hash>
inline bool Foam::HashSet<Key, Hash>::operator[](const Key& key) const
{
return this->found(key);
}
template<class Key, class Hash>
void Foam::HashSet<Key, Hash>::operator=(const UList<Key>& lst)
{
@ -180,12 +194,6 @@ void Foam::HashSet<Key, Hash>::operator=(std::initializer_list<Key> lst)
}
template<class Key, class Hash>
inline bool Foam::HashSet<Key, Hash>::operator[](const Key& key) const
{
return this->found(key);
}
template<class Key, class Hash>
bool Foam::HashSet<Key, Hash>::operator==(const HashSet<Key, Hash>& rhs) const

View File

@ -156,22 +156,24 @@ public:
// Edit
//- Insert a new entry
// \return True if the entry inserted, which means that it did
// not previously exist in the set.
bool insert(const Key& key)
{
return this->parent_type::insert(key, nil());
}
//- Insert keys from the list of Key
// Return the number of new elements inserted
// \return The number of new elements inserted
label insert(const UList<Key>& lst);
//- Insert keys from the list of Key
// Return the number of new elements inserted
// \return The number of new elements inserted
template<unsigned Size>
label insert(const FixedList<Key, Size>& lst);
//- Insert keys from a initializer list of Key
// Return the number of new elements inserted
// \return The number of new elements inserted
label insert(std::initializer_list<Key> lst);
//- Same as insert (cannot overwrite nil content)
@ -200,18 +202,21 @@ public:
}
//- Unset the specified key - same as erase
// \return True if the entry existed and was removed
bool unset(const Key& key)
{
return this->parent_type::erase(key);
}
//- Unset the listed keys - same as erase
// \return The number of items removed
label unset(const UList<Key>& lst)
{
return this->parent_type::erase(lst);
}
//- Unset the listed keys - same as erase
// \return The number of items removed
template<unsigned Size>
label unset(const FixedList<Key, Size>& lst)
{
@ -219,11 +224,36 @@ public:
}
//- Unset the listed keys - same as erase
// \return The number of items removed
label unset(std::initializer_list<Key> lst)
{
return this->parent_type::erase(lst);
}
//- Not applicable for HashSet
template<class UnaryPredicate>
List<Key> tocValues(const UnaryPredicate&, const bool) = delete;
//- Not applicable for HashSet
template<class BinaryPredicate>
List<Key> tocEntries(const BinaryPredicate&, const bool) = delete;
//- Not applicable for HashSet
template<class UnaryPredicate>
label countValues(const UnaryPredicate&, const bool) = delete;
//- Not applicable for HashSet
template<class BinaryPredicate>
label countEntries(const BinaryPredicate&, const bool) = delete;
//- Not applicable for HashSet
template<class UnaryPredicate>
label filterValues(const UnaryPredicate&, const bool) = delete;
//- Not applicable for HashSet
template<class BinaryPredicate>
label filterEntries(const BinaryPredicate&, const bool) = delete;
// STL iterators
@ -248,12 +278,15 @@ public:
// Member Operators
//- This operation doesn't make much sense for a hash-set
void operator()(const Key& key) = delete;
//- Return true if the entry exists, same as found()
inline bool operator()(const Key& key) const;
//- Return true if the entry exists, same as found().
inline bool operator[](const Key& key) const;
// Comparison
//- Equality. Two hashset are equal when they have the same keys.
// Independent of table size or order.
bool operator==(const this_type& rhs) const;
@ -262,6 +295,8 @@ public:
bool operator!=(const this_type& rhs) const;
// Assignment
//- Assignment from a UList of keys
void operator=(const UList<Key>& lst);
@ -273,6 +308,8 @@ public:
void operator=(std::initializer_list<Key> lst);
// Logical operations
//- Combine entries from HashSets
void operator|=(const HashSet<Key, Hash>& rhs);

View File

@ -231,25 +231,169 @@ Foam::HashTable<T, Key, Hash>::find
template<class T, class Key, class Hash>
Foam::List<Key> Foam::HashTable<T, Key, Hash>::toc() const
{
List<Key> keys(nElmts_);
label keyI = 0;
List<Key> keyLst(nElmts_);
label count = 0;
for (const_iterator iter = cbegin(); iter != cend(); ++iter)
{
keys[keyI++] = iter.key();
keyLst[count++] = iter.key();
}
return keys;
return keyLst;
}
template<class T, class Key, class Hash>
Foam::List<Key> Foam::HashTable<T, Key, Hash>::sortedToc() const
{
List<Key> sortedLst = this->toc();
sort(sortedLst);
List<Key> keyLst = this->toc();
Foam::sort(keyLst);
return sortedLst;
return keyLst;
}
template<class T, class Key, class Hash>
template<class UnaryPredicate>
Foam::List<Key> Foam::HashTable<T, Key, Hash>::tocKeys
(
const UnaryPredicate& pred,
const bool invert
) const
{
List<Key> keyLst(nElmts_);
label count = 0;
for (const_iterator iter = cbegin(); iter != cend(); ++iter)
{
if ((pred(iter.key()) ? !invert : invert))
{
keyLst[count++] = iter.key();
}
}
keyLst.setSize(count);
Foam::sort(keyLst);
return keyLst;
}
template<class T, class Key, class Hash>
template<class UnaryPredicate>
Foam::List<Key> Foam::HashTable<T, Key, Hash>::tocValues
(
const UnaryPredicate& pred,
const bool invert
) const
{
List<Key> keyLst(nElmts_);
label count = 0;
for (const_iterator iter = cbegin(); iter != cend(); ++iter)
{
if ((pred(iter.object()) ? !invert : invert))
{
keyLst[count++] = iter.key();
}
}
keyLst.setSize(count);
Foam::sort(keyLst);
return keyLst;
}
template<class T, class Key, class Hash>
template<class BinaryPredicate>
Foam::List<Key> Foam::HashTable<T, Key, Hash>::tocEntries
(
const BinaryPredicate& pred,
const bool invert
) const
{
List<Key> keyLst(nElmts_);
label count = 0;
for (const_iterator iter = cbegin(); iter != cend(); ++iter)
{
if ((pred(iter.key(), iter.object()) ? !invert : invert))
{
keyLst[count++] = iter.key();
}
}
keyLst.setSize(count);
Foam::sort(keyLst);
return keyLst;
}
template<class T, class Key, class Hash>
template<class UnaryPredicate>
Foam::label Foam::HashTable<T, Key, Hash>::countKeys
(
const UnaryPredicate& pred,
const bool invert
) const
{
label count = 0;
for (const_iterator iter = cbegin(); iter != cend(); ++iter)
{
if ((pred(iter.key()) ? !invert : invert))
{
++count;
}
}
return count;
}
template<class T, class Key, class Hash>
template<class UnaryPredicate>
Foam::label Foam::HashTable<T, Key, Hash>::countValues
(
const UnaryPredicate& pred,
const bool invert
) const
{
label count = 0;
for (const_iterator iter = cbegin(); iter != cend(); ++iter)
{
if ((pred(iter.object()) ? !invert : invert))
{
++count;
}
}
return count;
}
template<class T, class Key, class Hash>
template<class BinaryPredicate>
Foam::label Foam::HashTable<T, Key, Hash>::countEntries
(
const BinaryPredicate& pred,
const bool invert
) const
{
label count = 0;
for (const_iterator iter = cbegin(); iter != cend(); ++iter)
{
if ((pred(iter.key(), iter.object()) ? !invert : invert))
{
++count;
}
}
return count;
}
@ -617,6 +761,87 @@ void Foam::HashTable<T, Key, Hash>::transfer(HashTable<T, Key, Hash>& ht)
}
template<class T, class Key, class Hash>
template<class UnaryPredicate>
Foam::label Foam::HashTable<T, Key, Hash>::filterKeys
(
const UnaryPredicate& pred,
const bool pruning
)
{
label changed = 0;
for (iterator iter = begin(); iter != end(); ++iter)
{
// Matches? either prune (pruning) or keep (!pruning)
if
(
(pred(iter.key()) ? pruning : !pruning)
&& erase(iter)
)
{
++changed;
}
}
return changed;
}
template<class T, class Key, class Hash>
template<class UnaryPredicate>
Foam::label Foam::HashTable<T, Key, Hash>::filterValues
(
const UnaryPredicate& pred,
const bool pruning
)
{
label changed = 0;
for (iterator iter = begin(); iter != end(); ++iter)
{
// Matches? either prune (pruning) or keep (!pruning)
if
(
(pred(iter.object()) ? pruning : !pruning)
&& erase(iter)
)
{
++changed;
}
}
return changed;
}
template<class T, class Key, class Hash>
template<class BinaryPredicate>
Foam::label Foam::HashTable<T, Key, Hash>::filterEntries
(
const BinaryPredicate& pred,
const bool pruning
)
{
label changed = 0;
for (iterator iter = begin(); iter != end(); ++iter)
{
// Matches? either prune (pruning) or keep (!pruning)
if
(
(pred(iter.key(), iter.object()) ? pruning : !pruning)
&& erase(iter)
)
{
++changed;
}
}
return changed;
}
// * * * * * * * * * * * * * * * Member Operators * * * * * * * * * * * * * //
template<class T, class Key, class Hash>

View File

@ -255,7 +255,7 @@ private:
inline label hashKeyIndex(const Key& key) const;
//- Assign a new hash-entry to a possibly already existing key.
// Return true if the new entry was set.
// \return True if the new entry was set.
bool set(const Key& key, const T& obj, const bool protect);
@ -330,22 +330,83 @@ public:
//- Return hashed entry if it exists, or return the given default
inline const T& lookup(const Key& key, const T& deflt) const;
// Table of contents
//- Return the table of contents
List<Key> toc() const;
//- Return the table of contents as a sorted list
List<Key> sortedToc() const;
//- Return the sorted table of contents with keys that satisfy
// the unary predicate, optionally with inverted logic.
template<class UnaryPredicate>
List<Key> tocKeys
(
const UnaryPredicate& pred,
const bool invert = false
) const;
//- Return the sorted table of contents with values that satisfy
// the unary predicate, optionally with inverted logic.
template<class UnaryPredicate>
List<Key> tocValues
(
const UnaryPredicate& pred,
const bool invert = false
) const;
//- Return the sorted table of contents with keys/values that satisfy
// the binary predicate, optionally with inverted logic.
template<class BinaryPredicate>
List<Key> tocEntries
(
const BinaryPredicate& pred,
const bool invert = false
) const;
// Counting
//- Count the number of keys that satisfy the unary predicate,
// optionally with inverted logic.
template<class UnaryPredicate>
label countKeys
(
const UnaryPredicate& pred,
const bool invert = false
) const;
//- Count the number of values that satisfy the unary predicate,
// optionally with inverted logic.
template<class UnaryPredicate>
label countValues
(
const UnaryPredicate& pred,
const bool invert = false
) const;
//- Count the number of entries that satisfy the binary predicate,
// optionally with inverted logic.
template<class BinaryPredicate>
label countEntries
(
const BinaryPredicate& pred,
const bool invert = false
) const;
// Edit
//- Insert a new entry
// Return true if the entry inserted, which means that it did
// \return True if the entry inserted, which means that it did
// not previously exist in the table.
inline bool insert(const Key& key, const T& obj);
//- Assign a new entry, overwriting existing entries.
// Returns true.
//
// \return True, since it always overwrites any entries.
inline bool set(const Key& key, const T& obj);
//- Erase an entry specified by given iterator
@ -357,30 +418,34 @@ public:
// auto iter = table.find(unknownKey);
// table.erase(iter);
// \endcode
// which is what \code table.erase(unknownKey) \endcode does anyhow
// which is what \code table.erase(unknownKey) \endcode does anyhow.
//
// \return True if the corresponding entry existed and was removed
bool erase(const iterator& iter);
//- Erase an entry specified by the given key
// \return True if the entry existed and was removed
bool erase(const Key& key);
//- Remove table entries given by the listed keys
// Return the number of elements removed
// \return The number of items removed
label erase(const UList<Key>& keys);
//- Remove table entries given by the listed keys
// Return the number of elements removed
// \return The number of items removed
template<unsigned Size>
label erase(const FixedList<Key, Size>& keys);
//- Remove table entries given by the listed keys
// Return the number of elements removed
// \return The number of items removed
label erase(std::initializer_list<Key> keys);
//- Remove table entries given by keys of the other hash-table.
// Return the number of elements removed.
//
// The other hash-table must have the same type of key, but the
// type of values held and the hashing function are arbitrary.
//
// \return The number of items removed
template<class AnyType, class AnyHash>
label erase(const HashTable<AnyType, Key, AnyHash>& other);
@ -388,9 +453,66 @@ public:
//
// The other hash-table must have the same type of key, but the
// type of values held and the hashing function are arbitrary.
//
// \return The number of items changed (removed)
template<class AnyType, class AnyHash>
label retain(const HashTable<AnyType, Key, AnyHash>& other);
//- Generalized means to filter table entries based on their keys.
// Keep (or optionally prune) entries with keys that satisfy
// the unary predicate, which has the following signature:
// \code
// bool operator()(const Key& k);
// \endcode
//
// For example,
// \code
// wordRes goodFields = ...;
// allFieldNames.filterKeys
// (
// [&goodFields](const word& k){ return goodFields.match(k); }
// );
// \endcode
//
// \return The number of items changed (removed)
template<class UnaryPredicate>
label filterKeys
(
const UnaryPredicate& pred,
const bool pruning = false
);
//- Generalized means to filter table entries based on their values.
// Keep (or optionally prune) entries with values that satisfy
// the unary predicate, which has the following signature:
// \code
// bool operator()(const T& v);
// \endcode
//
// \return The number of items changed (removed)
template<class UnaryPredicate>
label filterValues
(
const UnaryPredicate& pred,
const bool pruning = false
);
//- Generalized means to filter table entries based on their key/value.
// Keep (or optionally prune) entries with keys/values that satisfy
// the binary predicate, which has the following signature:
// \code
// bool operator()(const Key& k, const T& v);
// \endcode
//
// \return The number of items changed (removed)
template<class BinaryPredicate>
label filterEntries
(
const BinaryPredicate& pred,
const bool pruning = false
);
//- Resize the hash table for efficiency
void resize(const label sz);

View File

@ -46,26 +46,26 @@ Foam::label Foam::HashTableCore::canonicalSize(const label requested_size)
{
return 0;
}
else if (requested_size >= maxTableSize)
{
return maxTableSize;
}
// Enforce power of two - makes for a vey fast modulus etc.
// Enforce power of two - makes for a very fast modulus.
// Use unsigned for these calculations.
//
// - The lower limit (8) is somewhat arbitrary, but if the hash table
// is too small, there will be many direct table collisions.
// - The uper limit (approx. labelMax/4) must be a power of two,
// - The upper limit (approx. labelMax/4) must be a power of two,
// need not be extremely large for hashing.
uLabel powerOfTwo = 8; // lower-limit
const uLabel size = requested_size;
if (size < powerOfTwo)
if (size <= powerOfTwo)
{
return powerOfTwo;
}
else if (requested_size >= maxTableSize)
{
return maxTableSize;
}
else if (size & (size-1)) // <- Modulus of i^2
{
// Determine power-of-two. Brute-force is fast enough.

View File

@ -46,26 +46,26 @@ Foam::label Foam::StaticHashTableCore::canonicalSize(const label requested_size)
{
return 0;
}
else if (requested_size >= maxTableSize)
{
return maxTableSize;
}
// Enforce power of two - makes for a vey fast modulus etc.
// Enforce power of two - makes for a very fast modulus.
// Use unsigned for these calculations.
//
// - The lower limit (8) is somewhat arbitrary, but if the hash table
// is too small, there will be many direct table collisions.
// - The uper limit (approx. labelMax/4) must be a power of two,
// - The upper limit (approx. labelMax/4) must be a power of two,
// need not be extremely large for hashing.
uLabel powerOfTwo = 8; // lower-limit
const uLabel size = requested_size;
if (size < powerOfTwo)
if (size <= powerOfTwo)
{
return powerOfTwo;
}
else if (requested_size >= maxTableSize)
{
return maxTableSize;
}
else if (size & (size-1)) // <- Modulus of i^2
{
// Determine power-of-two. Brute-force is fast enough.

View File

@ -69,7 +69,7 @@ public:
// Constructors
//- Construct from a list of wordRe
inline wordRes(const UList<wordRe>& lst);
inline wordRes(const UList<wordRe>& list);
// Static Constructors, Helpers