From 3c5852ebfc46fb67395768d1e8e5f6fb1871c0c4 Mon Sep 17 00:00:00 2001 From: Mark Olesen Date: Mon, 5 Jan 2009 17:02:58 +0100 Subject: [PATCH] reworked regExp + wordRe a bit, minor change to keyType regExp: - added optional ignoreCase for constructor. - the compile() methods is now exposed as set(...) method with an optional ignoreCase argument. Not currently much use for the other regex compile flags though. The set() method can be used directly instead of the operator=() assignment. keyType + wordRe: - it's not clear that any particular characters are valid/invalid (compared to string or word), so just drop the valid(char) method for now wordRe: - a bool doesn't suffice, added enum compOption (compile-option) - most constructors now have a compOption. In *all* cases it defaults to LITERAL - ie, the same behaviour for std::string and Foam::string - added set(...) methods that do much the same as operator=(...), but the compOption can be specified. In all cases, it defaults to DETECT. In Summary By default the constructors will generally preserve the argument as string literal and the assignment operators will use the wordRe::DETECT compOption to scan the string for regular expression meta characters and/or invalid word characters and react accordingly. The exceptions are when constructing/assigning from another Foam::wordRe (preserve the same type) or from a Foam::word (always literal). --- applications/test/wordRe/testRegexps | 3 +- applications/test/wordRe/wordReTest.C | 53 ++++++--- src/OSspecific/Unix/regExp.C | 69 ++++++----- src/OSspecific/Unix/regExp.H | 38 ++++-- .../primitives/strings/keyType/keyType.H | 3 - .../primitives/strings/keyType/keyTypeI.H | 8 +- .../primitives/strings/wordRe/wordRe.H | 97 +++++++++------ .../primitives/strings/wordRe/wordReI.H | 110 +++++++++--------- .../primitives/strings/wordRe/wordReIO.C | 11 +- 9 files changed, 218 insertions(+), 174 deletions(-) diff --git a/applications/test/wordRe/testRegexps b/applications/test/wordRe/testRegexps index f27cc60b87..c18cac8028 100644 --- a/applications/test/wordRe/testRegexps +++ b/applications/test/wordRe/testRegexps @@ -12,7 +12,8 @@ ( ( "a.*" "abc" ) ( "a.*" "bac" ) - ( "a.*" "abcd" ) + ( "A.*" "abcd" ) + ( "a.*" "ABCD" ) ( "a.*" "def" ) ( "d(.*)f" "def" ) ( "plain" "def" ) diff --git a/applications/test/wordRe/wordReTest.C b/applications/test/wordRe/wordReTest.C index 54dcf1703c..a63b4aec2e 100644 --- a/applications/test/wordRe/wordReTest.C +++ b/applications/test/wordRe/wordReTest.C @@ -45,36 +45,42 @@ int main(int argc, char *argv[]) Foam::string s2("this .* file"); const char * s3 = "this .* file"; - Info<< wordRe(s1).info() << endl; - Info<< wordRe(s2, false).info() << endl; - Info<< wordRe(s2).info() << endl; - Info<< wordRe(s3, true).info() << endl; + wordRe(s1, wordRe::DETECT).info(Info) << endl; + wordRe(s2).info(Info) << endl; + wordRe(s2, wordRe::DETECT).info(Info) << endl; + wordRe(s3, wordRe::REGEXP).info(Info) << endl; wre = "this .* file"; - Info<< wre.info() << endl; + wre.info(Info) << endl; wre = s1; - Info<< wre.info() << endl; + wre.info(Info) << endl; wre.uncompile(); - Info<< wre.info() << " uncompiled" << endl; + wre.info(Info) << endl; - wre = "something"; - Info<< wre.info() << " before" << endl; + wre.info(Info) << " before" << endl; wre.uncompile(); - Info<< wre.info() << " uncompiled" << endl; - wre.compile(true); - Info<< wre.info() << " after auto-detect" << endl; + wre.info(Info) << " uncompiled" << endl; + wre.compile(wordRe::DETECT); + wre.info(Info) << " after DETECT" << endl; + wre.compile(wordRe::NOCASE); + wre.info(Info) << " after NOCASE" << endl; + wre.compile(wordRe::DETECT_NOCASE); + wre.info(Info) << " after DETECT_NOCASE" << endl; wre = "something .* value"; - Info<< wre.info() << " before" << endl; + wre.info(Info) << " before" << endl; wre.uncompile(); - Info<< wre.info() << " uncompiled" << endl; - wre.compile(true); - Info<< wre.info() << " after auto-detect" << endl; + wre.info(Info) << " uncompiled" << endl; + wre.compile(wordRe::DETECT); + wre.info(Info) << " after DETECT" << endl; wre.uncompile(); - Info<< wre.info() << " uncompiled" << endl; + wre.info(Info) << " uncompiled" << endl; wre.recompile(); - Info<< wre.info() << " recompiled" << endl; + wre.info(Info) << " recompiled" << endl; + + wre.set("something .* value", wordRe::LITERAL); + wre.info(Info) << " set as LITERAL" << endl; IOobject::writeDivider(Info); @@ -88,12 +94,21 @@ int main(int argc, char *argv[]) const wordRe& wre = rawList[elemI].first(); const string& str = rawList[elemI].second(); - Info<< wre.info() + wre.info(Info) << " equals:" << (wre == str) << "(" << wre.match(str, true) << ")" << " match:" << wre.match(str) << " str=" << str << endl; + + wordRe wre2; + wre2.set(wre, wordRe::NOCASE); + + wre2.info(Info) + << " match:" << wre2.match(str) + << " str=" << str + << endl; + } Info<< endl; diff --git a/src/OSspecific/Unix/regExp.C b/src/OSspecific/Unix/regExp.C index e6481bd1f7..44b68eb13d 100644 --- a/src/OSspecific/Unix/regExp.C +++ b/src/OSspecific/Unix/regExp.C @@ -32,30 +32,6 @@ License #include "List.H" #include "IOstreams.H" - -// * * * * * * * * * * * * Private Member Functions * * * * * * * * * * * * // - -void Foam::regExp::compile(const char* pattern) const -{ - clear(); - - // avoid NULL pointer and zero-length patterns - if (pattern && *pattern) - { - preg_ = new regex_t; - - if (regcomp(preg_, pattern, REG_EXTENDED) != 0) - { - FatalErrorIn - ( - "regExp::compile(const char*)" - ) << "Failed to compile regular expression '" << pattern << "'" - << exit(FatalError); - } - } -} - - // * * * * * * * * * * * * * * * * Constructors * * * * * * * * * * * * * * // Foam::regExp::regExp() @@ -64,19 +40,19 @@ Foam::regExp::regExp() {} -Foam::regExp::regExp(const char* pattern) +Foam::regExp::regExp(const char* pattern, const bool ignoreCase) : preg_(0) { - compile(pattern); + set(pattern, ignoreCase); } -Foam::regExp::regExp(const std::string& pattern) +Foam::regExp::regExp(const std::string& pattern, const bool ignoreCase) : preg_(0) { - compile(pattern.c_str()); + set(pattern.c_str(), ignoreCase); } @@ -90,6 +66,39 @@ Foam::regExp::~regExp() // * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * * // +void Foam::regExp::set(const char* pattern, const bool ignoreCase) const +{ + clear(); + + // avoid NULL pointer and zero-length patterns + if (pattern && *pattern) + { + preg_ = new regex_t; + + int cflags = REG_EXTENDED; + if (ignoreCase) + { + cflags |= REG_ICASE; + } + + if (regcomp(preg_, pattern, cflags) != 0) + { + FatalErrorIn + ( + "regExp::set(const char*)" + ) << "Failed to compile regular expression '" << pattern << "'" + << exit(FatalError); + } + } +} + + +void Foam::regExp::set(const std::string& pattern, const bool ignoreCase) const +{ + return set(pattern.c_str(), ignoreCase); +} + + bool Foam::regExp::clear() const { if (preg_) @@ -194,13 +203,13 @@ bool Foam::regExp::match(const string& str, List& groups) const void Foam::regExp::operator=(const char* pat) { - compile(pat); + set(pat); } void Foam::regExp::operator=(const std::string& pat) { - compile(pat.c_str()); + set(pat); } diff --git a/src/OSspecific/Unix/regExp.H b/src/OSspecific/Unix/regExp.H index 1cf9601c76..3688c45bf8 100644 --- a/src/OSspecific/Unix/regExp.H +++ b/src/OSspecific/Unix/regExp.H @@ -65,9 +65,6 @@ class regExp // Private member functions - //- Compile into a regular expression - void compile(const char*) const; - //- Disallow default bitwise copy construct regExp(const regExp&); @@ -100,11 +97,11 @@ public: //- Construct null regExp(); - //- Construct from character array - regExp(const char*); + //- Construct from character array, optionally ignoring case + regExp(const char*, const bool ignoreCase=false); - //- Construct from std::string (or string) - regExp(const std::string&); + //- Construct from std::string (or string), optionally ignoring case + regExp(const std::string&, const bool ignoreCase=false); // Destructor @@ -113,7 +110,9 @@ public: // Member functions - //- Is the precompiled expression set? + //- Access + + //- Does a precompiled expression exist? inline bool exists() const { return preg_ ? true : false; @@ -125,10 +124,23 @@ public: return preg_ ? preg_->re_nsub : 0; } + + //- Editing + + //- Compile pattern into a regular expression, optionally ignoring case + void set(const char*, const bool ignoreCase=false) const; + + //- Compile pattern into a regular expression, optionally ignoring case + void set(const std::string&, const bool ignoreCase=false) const; + + //- Release precompiled expression. // Returns true if precompiled expression existed before clear bool clear() const; + + //- Searching + //- Find position within string. // Returns the index where it begins or string::npos if not found std::string::size_type find(const std::string& str) const; @@ -150,12 +162,14 @@ public: // Member Operators - //- Assign from a string and compile regular expression - void operator=(const std::string&); - - //- Assign from a character array and compile regular expression + //- Assign and compile pattern from a character array + // Always case sensitive void operator=(const char*); + //- Assign and compile pattern from string + // Always case sensitive + void operator=(const std::string&); + }; diff --git a/src/OpenFOAM/primitives/strings/keyType/keyType.H b/src/OpenFOAM/primitives/strings/keyType/keyType.H index 00c3cef732..29f851c738 100644 --- a/src/OpenFOAM/primitives/strings/keyType/keyType.H +++ b/src/OpenFOAM/primitives/strings/keyType/keyType.H @@ -99,9 +99,6 @@ public: // Member functions - //- Is this character valid for a keyType - inline static bool valid(char c); - //- Should be treated as a match rather than a literal string inline bool isPattern() const; diff --git a/src/OpenFOAM/primitives/strings/keyType/keyTypeI.H b/src/OpenFOAM/primitives/strings/keyType/keyTypeI.H index c19c0312e0..391f666aff 100644 --- a/src/OpenFOAM/primitives/strings/keyType/keyTypeI.H +++ b/src/OpenFOAM/primitives/strings/keyType/keyTypeI.H @@ -81,13 +81,7 @@ inline Foam::keyType::keyType // * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * // -inline bool Foam::keyType::valid(char c) -{ - return c != '"'; -} - - -bool Foam::keyType::isPattern() const +inline bool Foam::keyType::isPattern() const { return isPattern_; } diff --git a/src/OpenFOAM/primitives/strings/wordRe/wordRe.H b/src/OpenFOAM/primitives/strings/wordRe/wordRe.H index 7433ce8c84..3eec24b2b2 100644 --- a/src/OpenFOAM/primitives/strings/wordRe/wordRe.H +++ b/src/OpenFOAM/primitives/strings/wordRe/wordRe.H @@ -29,13 +29,20 @@ Description A wordRe is a word, but can also have a regular expression for matching words. + By default the constructors will generally preserve the argument as + string literal and the assignment operators will use the wordRe::DETECT + compOption to scan the string for regular expression meta characters + and/or invalid word characters and react accordingly. + + The exceptions are when constructing/assigning from another + Foam::wordRe (preserve the same type) or from a Foam::word (always + literal). + Note If the string contents are changed - eg, by the operator+=() or by string::replace(), etc - it will be necessary to use compile() or recompile() to sychronize the regular expression. - THIS IS STILL A DRAFT -- NOT YET RELEASED FOR GENERAL USE - SourceFiles wordRe.C wordReIO.C @@ -47,7 +54,6 @@ SourceFiles #include "word.H" #include "regExp.H" -#include "InfoProxy.H" // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // @@ -79,12 +85,24 @@ class wordRe public: + // Public data types + + //- Enumeration with compile options + // Note that 'REGEXP' is implicit if 'NOCASE' is specified alone. + enum compOption + { + LITERAL = 0, /*!< treat as a strign literal */ + DETECT = 1, /*!< treat as regular expression */ + REGEXP = 2, /*!< detect if the string contains meta-characters */ + NOCASE = 4, /*!< ignore case in regular expression */ + DETECT_NOCASE = DETECT | NOCASE, + REGEXP_NOCASE = REGEXP | NOCASE + }; + + //- Is this a meta character? static inline bool meta(char); - //- Is this character valid for a wordRe - inline static bool valid(char); - //- Test string for regular expression meta characters static inline bool isPattern(const string&); @@ -100,64 +118,67 @@ public: inline wordRe(const word&); //- Construct as copy of character array - // Treat as regular expression specified explicitly. - inline wordRe(const char*, const bool asPattern=false); + // Optionally specify how it should be treated. + inline wordRe(const char*, const compOption=LITERAL); //- Construct as copy of string. - // Treat as regular expression specified explicitly. - inline wordRe(const string&, const bool asPattern); - - //- Construct as copy of string. - // Auto-test for regular expression - inline wordRe(const string&); + // Optionally specify how it should be treated. + inline wordRe(const string&, const compOption=LITERAL); //- Construct as copy of std::string - // Treat as regular expression specified explicitly. - inline wordRe(const std::string&, const bool asPattern); - - //- Construct as copy of std::string - // Auto-test for regular expression - inline wordRe(const std::string&); + // Optionally specify how it should be treated. + inline wordRe(const std::string&, const compOption=LITERAL); //- Construct from Istream + // Words are treated as literals, strings with an auto-test wordRe(Istream&); - // Destructor - - ~wordRe(); - - // Member functions + //- Access + //- Should be treated as a match rather than a literal string? inline bool isPattern() const; - //- Create and compile the regular expression - // Optionally detect if it appears to be a regular expression - inline bool compile(const bool detect=false) const; + //- Infrastructure + + //- Compile the regular expression + inline bool compile() const; + + //- Possibly compile the regular expression, with greater control + inline bool compile(const compOption) const; //- Recompile an existing regular expression inline bool recompile() const; - //- Frees precompiled regular expression and makes is a literal string. + //- Frees precompiled regular expression, making wordRe a literal. // Optionally strips invalid word characters inline void uncompile(const bool doStripInvalid=false) const; + //- Editing + + //- Copy string, auto-test for regular expression or other options + inline void set(const std::string&, const compOption=DETECT); + + //- Copy string, auto-test for regular expression or other options + inline void set(const char*, const compOption=DETECT); + //- Clear string and precompiled regular expression inline void clear(); + //- Searching + //- Smart match as regular expression or as a string // Optionally specify a literal match only inline bool match(const string&, bool literalMatch=false) const; + //- Miscellaneous + //- Return a string with quoted meta-characters inline string quotemeta() const; - //- Return info proxy. - InfoProxy info() const - { - return *this; - } + //- Output some basic info + Ostream& info(Ostream&) const; // Member operators @@ -165,18 +186,22 @@ public: // Assignment //- Assign copy + // Always case sensitive inline void operator=(const wordRe&); //- Copy word, never a regular expression inline void operator=(const word&); //- Copy string, auto-test for regular expression + // Always case sensitive inline void operator=(const string&); //- Copy string, auto-test for regular expression + // Always case sensitive inline void operator=(const std::string&); //- Copy string, auto-test for regular expression + // Always case sensitive inline void operator=(const char*); @@ -187,10 +212,6 @@ public: }; -template<> -Ostream& operator<<(Ostream&, const InfoProxy&); - - // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // } // End namespace Foam diff --git a/src/OpenFOAM/primitives/strings/wordRe/wordReI.H b/src/OpenFOAM/primitives/strings/wordRe/wordReI.H index 79e2925c91..b84694ba82 100644 --- a/src/OpenFOAM/primitives/strings/wordRe/wordReI.H +++ b/src/OpenFOAM/primitives/strings/wordRe/wordReI.H @@ -32,16 +32,6 @@ inline bool Foam::wordRe::meta(char c) } -inline bool Foam::wordRe::valid(char c) -{ - return - ( - !isspace(c) - && c != '"' - && c != '/' - ); -} - inline bool Foam::wordRe::isPattern(const string& str) { return string::meta(str); @@ -76,66 +66,33 @@ inline Foam::wordRe::wordRe(const word& str) {} -inline Foam::wordRe::wordRe(const char* str, const bool asPattern) +inline Foam::wordRe::wordRe(const char* str, const compOption opt) : word(str, false), re_() { - if (asPattern) - { - compile(); - } + compile(opt); } -inline Foam::wordRe::wordRe(const string& str, const bool asPattern) +inline Foam::wordRe::wordRe(const string& str, const compOption opt) : word(str, false), re_() { - if (asPattern) - { - compile(); - } + compile(opt); } -inline Foam::wordRe::wordRe(const string& str) +inline Foam::wordRe::wordRe(const std::string& str, const compOption opt) : word(str, false), re_() { - compile(true); // auto-detect regex + compile(opt); } -inline Foam::wordRe::wordRe(const std::string& str, const bool asPattern) -: - word(str, false), - re_() -{ - if (asPattern) - { - compile(); - } -} - - -inline Foam::wordRe::wordRe(const std::string& str) -: - word(str, false), - re_() -{ - compile(true); // auto-detect regex -} - - -// * * * * * * * * * * * * * * * * Destructor * * * * * * * * * * * * * * * // - -Foam::wordRe::~wordRe() -{} - - // * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * // inline bool Foam::wordRe::isPattern() const @@ -144,22 +101,47 @@ inline bool Foam::wordRe::isPattern() const } -inline bool Foam::wordRe::compile(const bool detect) const +inline bool Foam::wordRe::compile(const compOption opt) const { - // appears to be a plain word and not a regex - if (detect && string::valid(*this) && !string::meta(*this)) + bool doCompile = false; + + if (opt & wordRe::REGEXP) { - re_.clear(); + doCompile = true; + } + else if (opt & wordRe::DETECT) + { + if (string::meta(*this) || !string::valid(*this)) + { + doCompile = true; + } + } + else if (opt & wordRe::NOCASE) + { + doCompile = true; + } + + + if (doCompile) + { + re_.set(*this, (opt & wordRe::NOCASE)); } else { - re_ = *this; + re_.clear(); } return re_.exists(); } +inline bool Foam::wordRe::compile() const +{ + re_ = *this; + return re_.exists(); +} + + inline bool Foam::wordRe::recompile() const { if (re_.exists()) @@ -215,6 +197,20 @@ inline Foam::string Foam::wordRe::quotemeta() const } +inline void Foam::wordRe::set(const std::string& str, const compOption opt) +{ + string::operator=(str); + compile(opt); +} + + +inline void Foam::wordRe::set(const char* str, const compOption opt) +{ + string::operator=(str); + compile(opt); +} + + // * * * * * * * * * * * * * * * Member Operators * * * * * * * * * * * * * // inline void Foam::wordRe::operator=(const wordRe& str) @@ -242,21 +238,21 @@ inline void Foam::wordRe::operator=(const word& str) inline void Foam::wordRe::operator=(const string& str) { string::operator=(str); - compile(true); // auto-detect regex + compile(DETECT); // auto-detect regex } inline void Foam::wordRe::operator=(const std::string& str) { string::operator=(str); - compile(true); // auto-detect regex + compile(DETECT); // auto-detect regex } inline void Foam::wordRe::operator=(const char* str) { string::operator=(str); - compile(true); // auto-detect regex + compile(DETECT); // auto-detect regex } diff --git a/src/OpenFOAM/primitives/strings/wordRe/wordReIO.C b/src/OpenFOAM/primitives/strings/wordRe/wordReIO.C index 9e7c1f2b82..49bfd17ddf 100644 --- a/src/OpenFOAM/primitives/strings/wordRe/wordReIO.C +++ b/src/OpenFOAM/primitives/strings/wordRe/wordReIO.C @@ -91,18 +91,15 @@ Foam::Ostream& Foam::operator<<(Ostream& os, const wordRe& w) } -template<> -Foam::Ostream& Foam::operator<<(Ostream& os, const InfoProxy& ip) +Foam::Ostream& Foam::wordRe::info(Ostream& os) const { - const wordRe& wre = ip.t_; - - if (wre.isPattern()) + if (isPattern()) { - os << "wordRe(regex) " << wre; + os << "wordRe(regex) " << *this; } else { - os << "wordRe(plain) '" << wre << "'"; + os << "wordRe(plain) '" << *this << "'"; } os.flush();