mirror of
https://develop.openfoam.com/Development/openfoam.git
synced 2025-11-28 03:28:01 +00:00
ENH: support negated regular expressions (#2283)
- extendes the prefix syntax to handle '!' values. For example, "(?!).*processor.*" or "(?!i)inlet.*"
This commit is contained in:
@ -252,12 +252,16 @@ void testExpressions(const UList<regexTest>& tests)
|
||||
}
|
||||
else if (re.search(str))
|
||||
{
|
||||
Info<< "partial match";
|
||||
Info<< "partial";
|
||||
}
|
||||
else
|
||||
{
|
||||
Info<< "false";
|
||||
}
|
||||
if (re.negated())
|
||||
{
|
||||
Info<< " (negated)";
|
||||
}
|
||||
Info<< endl;
|
||||
}
|
||||
catch (const Foam::error& err)
|
||||
@ -329,6 +333,15 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
#endif
|
||||
|
||||
Info<< "sizeof std::regex: " << sizeof(std::regex) << nl;
|
||||
Info<< "sizeof regex C++11: " << sizeof(regExpCxx) << nl;
|
||||
#ifndef _WIN32
|
||||
Info<< "sizeof regex POSIX: " << sizeof(regExpPosix) << nl;
|
||||
#endif
|
||||
Info<< "sizeof word: " << sizeof(Foam::word) << nl;
|
||||
Info<< "sizeof wordRe: " << sizeof(Foam::wordRe) << nl;
|
||||
Info<< "sizeof keyType: " << sizeof(Foam::keyType) << nl;
|
||||
|
||||
if (!args.count({"cxx", "posix"}))
|
||||
{
|
||||
args.setOption("cxx");
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
/*--------------------------------*- C++ -*----------------------------------*\
|
||||
| ========= | |
|
||||
| \\ / F ield | OpenFOAM: The Open Source CFD Toolbox |
|
||||
| \\ / O peration | Version: v2106 |
|
||||
| \\ / O peration | Version: v2112 |
|
||||
| \\ / A nd | Website: www.openfoam.com |
|
||||
| \\/ M anipulation | |
|
||||
\*---------------------------------------------------------------------------*/
|
||||
@ -12,6 +12,8 @@
|
||||
(
|
||||
( true "(U|k|epsilon)" "U" )
|
||||
( false "(U|k|epsilon)" "alpha" )
|
||||
( true "(?!)(U|k|epsilon)" "alpha" )
|
||||
( true "(?! *&)(U|k|epsilon)" "alpha" ) // Ignore unknown content
|
||||
( true "ab.*" "abc" )
|
||||
( true ".*" "abc" )
|
||||
|
||||
|
||||
@ -6,7 +6,7 @@
|
||||
\\/ M anipulation |
|
||||
-------------------------------------------------------------------------------
|
||||
Copyright (C) 2011-2016 OpenFOAM Foundation
|
||||
Copyright (C) 2018 OpenCFD Ltd.
|
||||
Copyright (C) 2018-2021 OpenCFD Ltd.
|
||||
-------------------------------------------------------------------------------
|
||||
License
|
||||
This file is part of OpenFOAM.
|
||||
@ -49,16 +49,98 @@ static inline bool fullMatch(const regmatch_t& m, const regoff_t len)
|
||||
} // End anonymous namespace
|
||||
|
||||
|
||||
// * * * * * * * * * * * * * Private Member Functions * * * * * * * * * * * //
|
||||
|
||||
bool Foam::regExpPosix::set_pattern
|
||||
(
|
||||
const char* pattern,
|
||||
size_t len,
|
||||
bool ignoreCase
|
||||
)
|
||||
{
|
||||
clear(); // Also sets ctrl_ = 0
|
||||
|
||||
const char* pat = pattern;
|
||||
bool doNegate = false;
|
||||
|
||||
// Handle known embedded prefixes
|
||||
if (len > 2 && pat[0] == '(' && pat[1] == '?')
|
||||
{
|
||||
pat += 2;
|
||||
len -= 2;
|
||||
|
||||
for (bool done = false; !done && len; ++pat, --len)
|
||||
{
|
||||
switch (*pat)
|
||||
{
|
||||
case '!':
|
||||
{
|
||||
// Negated (inverted) match
|
||||
doNegate = true;
|
||||
break;
|
||||
}
|
||||
case 'i':
|
||||
{
|
||||
// Ignore-case
|
||||
ignoreCase = true;
|
||||
break;
|
||||
}
|
||||
case ')':
|
||||
{
|
||||
// End of prefix parsing
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Avoid zero-length patterns
|
||||
if (len)
|
||||
{
|
||||
int flags = REG_EXTENDED;
|
||||
if (ignoreCase)
|
||||
{
|
||||
flags |= REG_ICASE;
|
||||
}
|
||||
|
||||
{
|
||||
preg_ = new regex_t;
|
||||
int err = regcomp(preg_, pat, flags);
|
||||
|
||||
if (err == 0)
|
||||
{
|
||||
ctrl_ = (doNegate ? ctrlType::NEGATED : ctrlType::NORMAL);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
char errbuf[200];
|
||||
regerror(err, preg_, errbuf, sizeof(errbuf));
|
||||
|
||||
FatalErrorInFunction
|
||||
<< "Failed to compile regular expression '"
|
||||
<< pattern << "'\n" << errbuf
|
||||
<< exit(FatalError);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * * //
|
||||
|
||||
bool Foam::regExpPosix::clear()
|
||||
{
|
||||
ctrl_ = 0;
|
||||
|
||||
if (preg_)
|
||||
{
|
||||
regfree(preg_);
|
||||
delete preg_;
|
||||
preg_ = nullptr;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -66,71 +148,40 @@ bool Foam::regExpPosix::clear()
|
||||
}
|
||||
|
||||
|
||||
bool Foam::regExpPosix::set(const char* pattern, bool ignoreCase)
|
||||
{
|
||||
clear();
|
||||
|
||||
// Avoid nullptr and zero-length patterns
|
||||
if (pattern && *pattern)
|
||||
{
|
||||
int cflags = REG_EXTENDED;
|
||||
if (ignoreCase)
|
||||
{
|
||||
cflags |= REG_ICASE;
|
||||
}
|
||||
|
||||
const char* pat = pattern;
|
||||
|
||||
// Check for embedded prefix for ignore-case
|
||||
// this is the only embedded prefix we support
|
||||
// - a simple check is sufficient
|
||||
if (!strncmp(pattern, "(?i)", 4))
|
||||
{
|
||||
cflags |= REG_ICASE;
|
||||
pat += 4;
|
||||
|
||||
// avoid zero-length patterns
|
||||
if (!*pat)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
preg_ = new regex_t;
|
||||
int err = regcomp(preg_, pat, cflags);
|
||||
|
||||
if (err != 0)
|
||||
{
|
||||
char errbuf[200];
|
||||
regerror(err, preg_, errbuf, sizeof(errbuf));
|
||||
|
||||
FatalErrorInFunction
|
||||
<< "Failed to compile regular expression '" << pattern << "'"
|
||||
<< nl << errbuf
|
||||
<< exit(FatalError);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false; // Was cleared and nothing was set
|
||||
}
|
||||
|
||||
|
||||
bool Foam::regExpPosix::set(const std::string& pattern, bool ignoreCase)
|
||||
{
|
||||
return set(pattern.c_str(), ignoreCase);
|
||||
}
|
||||
|
||||
|
||||
std::string::size_type Foam::regExpPosix::find(const std::string& text) const
|
||||
{
|
||||
if (preg_ && !text.empty())
|
||||
// Find with negated is probably not very reliable...
|
||||
if (!preg_ || !ctrl_)
|
||||
{
|
||||
// Undefined: never matches
|
||||
return std::string::npos;
|
||||
}
|
||||
else if (text.empty())
|
||||
{
|
||||
if (ctrl_ == ctrlType::NEGATED)
|
||||
{
|
||||
return 0; // No match - pretend it starts at position 0
|
||||
}
|
||||
else
|
||||
{
|
||||
return std::string::npos;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const size_t nmatch = 1;
|
||||
regmatch_t pmatch[1];
|
||||
|
||||
if (regexec(preg_, text.c_str(), nmatch, pmatch, 0) == 0)
|
||||
const bool ok = (regexec(preg_, text.c_str(), nmatch, pmatch, 0) == 0);
|
||||
|
||||
if (ctrl_ == ctrlType::NEGATED)
|
||||
{
|
||||
if (!ok)
|
||||
{
|
||||
return 0; // No match - claim that is starts at position 0
|
||||
}
|
||||
}
|
||||
else if (ok)
|
||||
{
|
||||
return pmatch[0].rm_so;
|
||||
}
|
||||
@ -142,23 +193,31 @@ std::string::size_type Foam::regExpPosix::find(const std::string& text) const
|
||||
|
||||
bool Foam::regExpPosix::match(const std::string& text) const
|
||||
{
|
||||
const auto len = text.size();
|
||||
bool ok = false;
|
||||
|
||||
if (preg_ && len)
|
||||
if (!preg_ || !ctrl_)
|
||||
{
|
||||
// Undefined: never matches
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto len = text.length();
|
||||
|
||||
if (len)
|
||||
{
|
||||
const size_t nmatch = 1;
|
||||
regmatch_t pmatch[1];
|
||||
|
||||
// Verify that the entire string was matched
|
||||
// - [0] is the entire match result
|
||||
return
|
||||
ok =
|
||||
(
|
||||
regexec(preg_, text.c_str(), nmatch, pmatch, 0) == 0
|
||||
&& fullMatch(pmatch[0], len)
|
||||
);
|
||||
}
|
||||
|
||||
return false;
|
||||
return (ctrl_ == ctrlType::NEGATED ? !ok : ok);
|
||||
}
|
||||
|
||||
|
||||
@ -170,6 +229,12 @@ bool Foam::regExpPosix::match
|
||||
{
|
||||
matches.clear();
|
||||
|
||||
// Probably does not make sense for negated pattern...
|
||||
if (negated())
|
||||
{
|
||||
return match(text);
|
||||
}
|
||||
|
||||
const auto len = text.size();
|
||||
if (preg_ && len)
|
||||
{
|
||||
|
||||
@ -28,10 +28,20 @@ Class
|
||||
Foam::regExpPosix
|
||||
|
||||
Description
|
||||
Wrapper around POSIX extended regular expressions.
|
||||
Wrapper around POSIX extended regular expressions
|
||||
with some additional prefix-handling. The prefix-handling is
|
||||
loosely oriented on PCRE regular expressions and provides a
|
||||
simple means of tuning the expressions.
|
||||
|
||||
The PCRE '(?i)' extension is provided to compile the regular expression
|
||||
as being case-insensitive.
|
||||
The prefixes are detected as \c (?...) at the beginning of
|
||||
the regular expression. Any unknown/unsupported prefixes are silently
|
||||
ignored.
|
||||
|
||||
- "(?!i)" :
|
||||
one or more embedded pattern-match modifiers for the entire pattern.
|
||||
- the \c 'i' indicates ignore-case
|
||||
- the \c '!' (exclamation) indicates negated (inverted) matching
|
||||
.
|
||||
|
||||
SeeAlso
|
||||
The manpage regex(7) for more information about POSIX regular expressions.
|
||||
@ -49,8 +59,8 @@ Warning
|
||||
for regular expressions continues to improve.
|
||||
|
||||
SourceFiles
|
||||
regExpPosixI.H
|
||||
regExpPosix.C
|
||||
regExpPosixI.H
|
||||
|
||||
\*---------------------------------------------------------------------------*/
|
||||
|
||||
@ -74,11 +84,27 @@ template<class StringType> class SubStrings;
|
||||
|
||||
class regExpPosix
|
||||
{
|
||||
// Data Types
|
||||
|
||||
//- Simple control types
|
||||
enum ctrlType { EMPTY = 0, NORMAL = 1, NEGATED = 2 };
|
||||
|
||||
|
||||
// Private Data
|
||||
|
||||
//- Compiled regular expression
|
||||
regex_t* preg_;
|
||||
|
||||
//- Track if input pattern is non-empty, negated etc.
|
||||
unsigned char ctrl_;
|
||||
|
||||
|
||||
// Private Member Functions
|
||||
|
||||
//- Assign pattern
|
||||
bool set_pattern(const char* pattern, size_t len, bool ignoreCase);
|
||||
|
||||
|
||||
public:
|
||||
|
||||
// Public Types
|
||||
@ -158,7 +184,14 @@ public:
|
||||
//- Return true if a precompiled expression exists
|
||||
inline bool exists() const noexcept;
|
||||
|
||||
//- The number of capture groups for a non-empty expression
|
||||
//- True if pattern matching is negated
|
||||
inline bool negated() const noexcept;
|
||||
|
||||
//- Change pattern negation, return previous value
|
||||
inline bool negate(bool on) noexcept;
|
||||
|
||||
//- The number of capture groups for a non-empty,
|
||||
//- non-negated expressions
|
||||
inline unsigned ngroups() const;
|
||||
|
||||
|
||||
@ -173,17 +206,19 @@ public:
|
||||
|
||||
//- Compile pattern into a regular expression, optionally ignore case.
|
||||
// \return True if the pattern was compiled
|
||||
bool set(const char* pattern, bool ignoreCase=false);
|
||||
inline bool set(const char* pattern, bool ignoreCase=false);
|
||||
|
||||
//- Compile pattern into a regular expression, optionally ignore case.
|
||||
// \return True if the pattern was compiled
|
||||
bool set(const std::string& pattern, bool ignoreCase=false);
|
||||
inline bool set(const std::string& pattern, bool ignoreCase=false);
|
||||
|
||||
|
||||
// Matching/Searching
|
||||
|
||||
//- Find position within the text.
|
||||
// \return The index where it begins or string::npos if not found
|
||||
//
|
||||
// \note does not properly work with negated regex!
|
||||
std::string::size_type find(const std::string& text) const;
|
||||
|
||||
//- True if the regex matches the entire text.
|
||||
@ -193,6 +228,8 @@ public:
|
||||
//- True if the regex matches the text, set the matches.
|
||||
// The first group starts at index 1 (0 is the entire match).
|
||||
// The begin-of-line (^) and end-of-line ($) anchors are implicit
|
||||
//
|
||||
// \note does not properly work with negated regex!
|
||||
bool match(const std::string& text, results_type& matches) const;
|
||||
|
||||
//- Return true if the regex was found within the text
|
||||
|
||||
@ -31,15 +31,18 @@ License
|
||||
|
||||
inline Foam::regExpPosix::regExpPosix() noexcept
|
||||
:
|
||||
preg_(nullptr)
|
||||
preg_(nullptr),
|
||||
ctrl_(0)
|
||||
{}
|
||||
|
||||
|
||||
inline Foam::regExpPosix::regExpPosix(regExpPosix&& rgx) noexcept
|
||||
:
|
||||
preg_(rgx.preg_)
|
||||
preg_(rgx.preg_),
|
||||
ctrl_(rgx.ctrl_)
|
||||
{
|
||||
rgx.preg_ = nullptr;
|
||||
rgx.ctrl_ = 0;
|
||||
}
|
||||
|
||||
|
||||
@ -49,7 +52,8 @@ inline Foam::regExpPosix::regExpPosix
|
||||
const bool ignoreCase
|
||||
)
|
||||
:
|
||||
preg_(nullptr)
|
||||
preg_(nullptr),
|
||||
ctrl_(0)
|
||||
{
|
||||
set(pattern, ignoreCase);
|
||||
}
|
||||
@ -61,7 +65,8 @@ inline Foam::regExpPosix::regExpPosix
|
||||
const bool ignoreCase
|
||||
)
|
||||
:
|
||||
preg_(nullptr)
|
||||
preg_(nullptr),
|
||||
ctrl_(0)
|
||||
{
|
||||
set(pattern, ignoreCase);
|
||||
}
|
||||
@ -89,14 +94,72 @@ inline bool Foam::regExpPosix::exists() const noexcept
|
||||
}
|
||||
|
||||
|
||||
inline bool Foam::regExpPosix::negated() const noexcept
|
||||
{
|
||||
return (ctrl_ == ctrlType::NEGATED);
|
||||
}
|
||||
|
||||
|
||||
inline bool Foam::regExpPosix::negate(bool on) noexcept
|
||||
{
|
||||
bool old(ctrl_ == ctrlType::NEGATED);
|
||||
|
||||
if (on)
|
||||
{
|
||||
if (ctrl_)
|
||||
{
|
||||
ctrl_ = ctrlType::NEGATED;
|
||||
}
|
||||
}
|
||||
else if (old)
|
||||
{
|
||||
ctrl_ = ctrlType::NORMAL;
|
||||
}
|
||||
|
||||
return old;
|
||||
}
|
||||
|
||||
|
||||
inline unsigned Foam::regExpPosix::ngroups() const
|
||||
{
|
||||
return preg_ ? preg_->re_nsub : 0;
|
||||
return (preg_ && ctrl_ == ctrlType::NORMAL) ? preg_->re_nsub : 0;
|
||||
}
|
||||
|
||||
|
||||
inline bool Foam::regExpPosix::set(const char* pattern, bool ignoreCase)
|
||||
{
|
||||
// Silently handle nullptr
|
||||
return set_pattern
|
||||
(
|
||||
pattern,
|
||||
(pattern ? std::char_traits<char>::length(pattern) : 0),
|
||||
ignoreCase
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
inline bool Foam::regExpPosix::set(const std::string& pattern, bool ignoreCase)
|
||||
{
|
||||
return set_pattern
|
||||
(
|
||||
pattern.data(),
|
||||
pattern.length(),
|
||||
ignoreCase
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
inline bool Foam::regExpPosix::search(const std::string& text) const
|
||||
{
|
||||
if (!ctrl_)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
else if (text.empty())
|
||||
{
|
||||
return (ctrl_ == ctrlType::NEGATED);
|
||||
}
|
||||
|
||||
return std::string::npos != find(text);
|
||||
}
|
||||
|
||||
@ -107,6 +170,7 @@ inline void Foam::regExpPosix::swap(regExpPosix& rgx)
|
||||
{
|
||||
// Self-swap is a no-op
|
||||
std::swap(preg_, rgx.preg_);
|
||||
std::swap(ctrl_, rgx.ctrl_);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -5,7 +5,7 @@
|
||||
\\ / A nd | www.openfoam.com
|
||||
\\/ M anipulation |
|
||||
-------------------------------------------------------------------------------
|
||||
Copyright (C) 2017-2019 OpenCFD Ltd.
|
||||
Copyright (C) 2017-2021 OpenCFD Ltd.
|
||||
-------------------------------------------------------------------------------
|
||||
License
|
||||
This file is part of OpenFOAM.
|
||||
@ -106,42 +106,66 @@ static std::string error_string(const std::regex_error& err)
|
||||
} // End anonymous namespace
|
||||
|
||||
|
||||
// * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * * //
|
||||
// * * * * * * * * * * * * * Private Member Functions * * * * * * * * * * * //
|
||||
|
||||
bool Foam::regExpCxx::set(const char* pattern, bool ignoreCase)
|
||||
bool Foam::regExpCxx::set_pattern
|
||||
(
|
||||
const char* pattern,
|
||||
size_t len,
|
||||
bool ignoreCase
|
||||
)
|
||||
{
|
||||
clear(); // Also sets ok_ = false
|
||||
|
||||
size_t len = (pattern ? strlen(pattern) : 0);
|
||||
|
||||
// Avoid nullptr and zero-length patterns
|
||||
if (!len)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
std::regex::flag_type flags = syntax();
|
||||
if (ignoreCase)
|
||||
{
|
||||
flags |= std::regex::icase;
|
||||
}
|
||||
clear(); // Also sets ctrl_ = 0
|
||||
|
||||
const char* pat = pattern;
|
||||
bool doNegate = false;
|
||||
|
||||
// Has embedded ignore-case prefix?
|
||||
if (len >= 4 && !strncmp(pattern, "(?i)", 4))
|
||||
// Handle known embedded prefixes
|
||||
if (len > 2 && pat[0] == '(' && pat[1] == '?')
|
||||
{
|
||||
flags |= std::regex::icase;
|
||||
pat += 4;
|
||||
len -= 4;
|
||||
pat += 2;
|
||||
len -= 2;
|
||||
|
||||
for (bool done = false; !done && len; ++pat, --len)
|
||||
{
|
||||
switch (*pat)
|
||||
{
|
||||
case '!':
|
||||
{
|
||||
// Negated (inverted) match
|
||||
doNegate = true;
|
||||
break;
|
||||
}
|
||||
case 'i':
|
||||
{
|
||||
// Ignore-case
|
||||
ignoreCase = true;
|
||||
break;
|
||||
}
|
||||
case ')':
|
||||
{
|
||||
// End of prefix parsing
|
||||
done = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Avoid zero-length patterns
|
||||
if (len)
|
||||
{
|
||||
std::regex::flag_type flags = syntax();
|
||||
if (ignoreCase)
|
||||
{
|
||||
flags |= std::regex::icase;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
re_.assign(pat, flags);
|
||||
ok_ = true;
|
||||
re_.assign(pat, len, flags);
|
||||
ctrl_ = (doNegate ? ctrlType::NEGATED : ctrlType::NORMAL);
|
||||
return true;
|
||||
}
|
||||
catch (const std::regex_error& err)
|
||||
{
|
||||
@ -153,56 +177,7 @@ bool Foam::regExpCxx::set(const char* pattern, bool ignoreCase)
|
||||
}
|
||||
}
|
||||
|
||||
return ok_;
|
||||
}
|
||||
|
||||
|
||||
bool Foam::regExpCxx::set(const std::string& pattern, bool ignoreCase)
|
||||
{
|
||||
clear(); // Also sets ok_ = false
|
||||
|
||||
auto len = pattern.size();
|
||||
|
||||
// Avoid zero-length patterns
|
||||
if (!len)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
std::regex::flag_type flags = syntax();
|
||||
if (ignoreCase)
|
||||
{
|
||||
flags |= std::regex::icase;
|
||||
}
|
||||
|
||||
auto pat = pattern.begin();
|
||||
|
||||
// Has embedded ignore-case prefix?
|
||||
if (len >= 4 && !pattern.compare(0, 4, "(?i)"))
|
||||
{
|
||||
flags |= std::regex::icase;
|
||||
pat += 4;
|
||||
len -= 4;
|
||||
}
|
||||
|
||||
if (len)
|
||||
{
|
||||
try
|
||||
{
|
||||
re_.assign(pat, pattern.end(), flags);
|
||||
ok_ = true;
|
||||
}
|
||||
catch (const std::regex_error& err)
|
||||
{
|
||||
FatalErrorInFunction
|
||||
<< "Failed to compile regular expression '"
|
||||
<< pattern.c_str() << "'" << nl
|
||||
<< err.what() << ": " << error_string(err).c_str() << nl
|
||||
<< exit(FatalError);
|
||||
}
|
||||
}
|
||||
|
||||
return ok_;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -27,9 +27,23 @@ Class
|
||||
Foam::regExpCxx
|
||||
|
||||
Description
|
||||
Wrapper around C++11 regular expressions.
|
||||
Wrapper around C++11 regular expressions
|
||||
with some additional prefix-handling. The prefix-handling is loosely
|
||||
oriented on PCRE regular expressions and provides a simple means of
|
||||
tuning the expressions.
|
||||
|
||||
Using either POSIX extended regular expressions or
|
||||
The prefixes are detected as \c (?...) at the beginning of
|
||||
the regular expression. Any unknown/unsupported prefixes are silently
|
||||
ignored.
|
||||
|
||||
- "(?!i)" :
|
||||
one or more embedded pattern-match modifiers for the entire pattern.
|
||||
- the \c 'i' indicates ignore-case
|
||||
- the \c '!' (exclamation) indicates negated (inverted) matching
|
||||
.
|
||||
|
||||
Note
|
||||
Uses either POSIX extended regular expressions or
|
||||
<a href=
|
||||
"http://www.cplusplus.com/reference/regex/ECMAScript"
|
||||
>modified ECMAScript regular expression grammar</a>
|
||||
@ -37,10 +51,6 @@ Description
|
||||
Since ECMAScript grammar may not work correctly on all installations,
|
||||
the current default is to use extended regular expressions.
|
||||
|
||||
The JAVA/PCRE '(?i)' extension is supported as a prefix to compile the
|
||||
regular expression as being case-insensitive.
|
||||
|
||||
Note
|
||||
The C++11 regular expressions may be broken on some compilers.
|
||||
For example, gcc 4.8 is known to fail.
|
||||
For these systems the POSIX implementation or alternative must be used.
|
||||
@ -50,8 +60,8 @@ Warning
|
||||
Use the Foam::regExp typedef instead.
|
||||
|
||||
SourceFiles
|
||||
regExpCxxI.H
|
||||
regExpCxx.C
|
||||
regExpCxxI.H
|
||||
|
||||
\*---------------------------------------------------------------------------*/
|
||||
|
||||
@ -72,13 +82,19 @@ namespace Foam
|
||||
|
||||
class regExpCxx
|
||||
{
|
||||
// Data Types
|
||||
|
||||
//- Simple control types
|
||||
enum ctrlType { EMPTY = 0, NORMAL = 1, NEGATED = 2 };
|
||||
|
||||
|
||||
// Private Data
|
||||
|
||||
//- Regular expression (using char type)
|
||||
std::regex re_;
|
||||
|
||||
//- Track if input pattern was OK - ie, has a length
|
||||
bool ok_;
|
||||
//- Track if input pattern is non-empty, negated etc.
|
||||
unsigned char ctrl_;
|
||||
|
||||
|
||||
// Private Member Functions
|
||||
@ -87,6 +103,10 @@ class regExpCxx
|
||||
// 0 = extended, 1 = ECMAScript
|
||||
static inline std::regex::flag_type syntax();
|
||||
|
||||
//- Assign pattern
|
||||
bool set_pattern(const char* pattern, size_t len, bool ignoreCase);
|
||||
|
||||
|
||||
public:
|
||||
|
||||
// Public Types
|
||||
@ -174,13 +194,20 @@ public:
|
||||
|
||||
// Access
|
||||
|
||||
//- Return true if expression is empty
|
||||
//- True if expression is empty
|
||||
inline bool empty() const noexcept;
|
||||
|
||||
//- Return true if expression is non-empty
|
||||
//- True if expression is non-empty
|
||||
inline bool exists() const noexcept;
|
||||
|
||||
//- The number of capture groups for a non-empty expression
|
||||
//- True if pattern matching is negated
|
||||
inline bool negated() const noexcept;
|
||||
|
||||
//- Change pattern negation, return previous value
|
||||
inline bool negate(bool on) noexcept;
|
||||
|
||||
//- The number of capture groups for a non-empty,
|
||||
//- non-negated expressions
|
||||
inline unsigned ngroups() const;
|
||||
|
||||
// \return True if the pattern was set with ignore-case.
|
||||
@ -198,17 +225,19 @@ public:
|
||||
|
||||
//- Compile pattern into a regular expression, optionally ignore case.
|
||||
// \return True if the pattern was compiled
|
||||
bool set(const char* pattern, bool ignoreCase=false);
|
||||
inline bool set(const char* pattern, bool ignoreCase=false);
|
||||
|
||||
//- Compile pattern into a regular expression, optionally ignore case.
|
||||
// \return True if the pattern was compiled
|
||||
bool set(const std::string& pattern, bool ignoreCase=false);
|
||||
inline bool set(const std::string& pattern, bool ignoreCase=false);
|
||||
|
||||
|
||||
// Matching/Searching
|
||||
|
||||
//- Find position within the text.
|
||||
// \return The index where it begins or string::npos if not found
|
||||
//
|
||||
// \note does not properly work with negated regex!
|
||||
inline std::string::size_type find(const std::string& text) const;
|
||||
|
||||
//- True if the regex matches the entire text.
|
||||
@ -218,6 +247,8 @@ public:
|
||||
//- True if the regex matches the text, set the matches.
|
||||
// The first group starts at index 1 (0 is the entire match).
|
||||
// The begin-of-line (^) and end-of-line ($) anchors are implicit
|
||||
//
|
||||
// \note does not properly work with negated regex!
|
||||
inline bool match(const std::string& text, results_type& matches) const;
|
||||
|
||||
//- Return true if the regex was found within the text
|
||||
|
||||
@ -82,23 +82,23 @@ inline bool Foam::regExpCxx::is_meta
|
||||
inline Foam::regExpCxx::regExpCxx()
|
||||
:
|
||||
re_(),
|
||||
ok_(false)
|
||||
ctrl_(0)
|
||||
{}
|
||||
|
||||
|
||||
inline Foam::regExpCxx::regExpCxx(const regExpCxx& rgx)
|
||||
:
|
||||
re_(rgx.re_),
|
||||
ok_(rgx.ok_)
|
||||
ctrl_(rgx.ctrl_)
|
||||
{}
|
||||
|
||||
|
||||
inline Foam::regExpCxx::regExpCxx(regExpCxx&& rgx) noexcept
|
||||
:
|
||||
re_(std::move(rgx.re_)),
|
||||
ok_(rgx.ok_)
|
||||
ctrl_(rgx.ctrl_)
|
||||
{
|
||||
rgx.ok_ = false;
|
||||
rgx.ctrl_ = 0;
|
||||
}
|
||||
|
||||
|
||||
@ -109,7 +109,7 @@ inline Foam::regExpCxx::regExpCxx
|
||||
)
|
||||
:
|
||||
re_(),
|
||||
ok_(false)
|
||||
ctrl_(0)
|
||||
{
|
||||
set(pattern, ignoreCase);
|
||||
}
|
||||
@ -122,7 +122,7 @@ inline Foam::regExpCxx::regExpCxx
|
||||
)
|
||||
:
|
||||
re_(),
|
||||
ok_(false)
|
||||
ctrl_(0)
|
||||
{
|
||||
set(pattern, ignoreCase);
|
||||
}
|
||||
@ -132,34 +132,61 @@ inline Foam::regExpCxx::regExpCxx
|
||||
|
||||
inline bool Foam::regExpCxx::empty() const noexcept
|
||||
{
|
||||
return !ok_;
|
||||
return !ctrl_;
|
||||
}
|
||||
|
||||
|
||||
inline bool Foam::regExpCxx::exists() const noexcept
|
||||
{
|
||||
return ok_;
|
||||
return ctrl_;
|
||||
}
|
||||
|
||||
|
||||
inline bool Foam::regExpCxx::negated() const noexcept
|
||||
{
|
||||
return (ctrl_ == ctrlType::NEGATED);
|
||||
}
|
||||
|
||||
|
||||
inline bool Foam::regExpCxx::negate(bool on) noexcept
|
||||
{
|
||||
bool old(ctrl_ == ctrlType::NEGATED);
|
||||
|
||||
if (on)
|
||||
{
|
||||
if (ctrl_)
|
||||
{
|
||||
ctrl_ = ctrlType::NEGATED;
|
||||
}
|
||||
}
|
||||
else if (old)
|
||||
{
|
||||
ctrl_ = ctrlType::NORMAL;
|
||||
}
|
||||
|
||||
return old;
|
||||
}
|
||||
|
||||
|
||||
inline unsigned Foam::regExpCxx::ngroups() const
|
||||
{
|
||||
return ok_ ? re_.mark_count() : 0;
|
||||
// Groups only make sense for regular (not negated) matching
|
||||
return ctrl_ == ctrlType::NORMAL ? re_.mark_count() : 0;
|
||||
}
|
||||
|
||||
|
||||
inline bool Foam::regExpCxx::nocase() const
|
||||
{
|
||||
return ok_ && ((re_.flags() & std::regex::icase) == std::regex::icase);
|
||||
return ctrl_ && ((re_.flags() & std::regex::icase) == std::regex::icase);
|
||||
}
|
||||
|
||||
|
||||
inline bool Foam::regExpCxx::clear()
|
||||
{
|
||||
if (ok_)
|
||||
if (ctrl_)
|
||||
{
|
||||
re_.assign("");
|
||||
ok_ = false;
|
||||
ctrl_ = 0;
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -174,33 +201,102 @@ inline void Foam::regExpCxx::swap(regExpCxx& rgx)
|
||||
{
|
||||
// Self-swap is a no-op
|
||||
re_.swap(rgx.re_);
|
||||
std::swap(ok_, rgx.ok_);
|
||||
std::swap(ctrl_, rgx.ctrl_);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inline bool Foam::regExpCxx::set(const char* pattern, bool ignoreCase)
|
||||
{
|
||||
// Silently handle nullptr
|
||||
return set_pattern
|
||||
(
|
||||
pattern,
|
||||
(pattern ? std::char_traits<char>::length(pattern) : 0),
|
||||
ignoreCase
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
inline bool Foam::regExpCxx::set(const std::string& pattern, bool ignoreCase)
|
||||
{
|
||||
return set_pattern
|
||||
(
|
||||
pattern.data(),
|
||||
pattern.length(),
|
||||
ignoreCase
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
inline std::string::size_type
|
||||
Foam::regExpCxx::find(const std::string& text) const
|
||||
{
|
||||
std::smatch mat;
|
||||
if (!text.empty() && std::regex_search(text, mat, re_))
|
||||
// Find with negated is probably not very reliable...
|
||||
if (!ctrl_)
|
||||
{
|
||||
return mat.position(0);
|
||||
// Undefined: never matches
|
||||
return std::string::npos;
|
||||
}
|
||||
else if (text.empty())
|
||||
{
|
||||
if (ctrl_ == ctrlType::NEGATED)
|
||||
{
|
||||
return 0; // No match - pretend it starts at position 0
|
||||
}
|
||||
else
|
||||
{
|
||||
return std::string::npos;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::smatch mat;
|
||||
|
||||
const bool ok = std::regex_search(text, mat, re_);
|
||||
|
||||
if (ctrl_ == ctrlType::NEGATED)
|
||||
{
|
||||
if (!ok)
|
||||
{
|
||||
return 0; // No match - claim that is starts at position 0
|
||||
}
|
||||
}
|
||||
else if (ok)
|
||||
{
|
||||
return mat.position(0);
|
||||
}
|
||||
}
|
||||
|
||||
return std::string::npos;
|
||||
return std::string::npos; // False
|
||||
}
|
||||
|
||||
|
||||
inline bool Foam::regExpCxx::search(const std::string& text) const
|
||||
{
|
||||
return (ok_ && !text.empty() && std::regex_search(text, re_));
|
||||
if (!ctrl_)
|
||||
{
|
||||
// Undefined: never matches
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool ok = (!text.empty() && std::regex_search(text, re_));
|
||||
|
||||
return (ctrl_ == ctrlType::NEGATED) ? !ok : ok;
|
||||
}
|
||||
|
||||
|
||||
inline bool Foam::regExpCxx::match(const std::string& text) const
|
||||
{
|
||||
return (ok_ && !text.empty() && std::regex_match(text, re_));
|
||||
if (!ctrl_)
|
||||
{
|
||||
// Undefined: never matches
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool ok = (!text.empty() && std::regex_match(text, re_));
|
||||
|
||||
return (ctrl_ == ctrlType::NEGATED) ? !ok : ok;
|
||||
}
|
||||
|
||||
|
||||
@ -210,6 +306,13 @@ inline bool Foam::regExpCxx::match
|
||||
std::smatch& matches
|
||||
) const
|
||||
{
|
||||
// Probably does not make sense for negated pattern...
|
||||
if (negated())
|
||||
{
|
||||
// clear: matches = std::smatch();
|
||||
return match(text);
|
||||
}
|
||||
|
||||
return std::regex_match(text, matches, re_);
|
||||
}
|
||||
|
||||
@ -228,7 +331,7 @@ inline void Foam::regExpCxx::operator=(const regExpCxx& rgx)
|
||||
{
|
||||
// Self-assignment is a no-op
|
||||
re_ = rgx.re_;
|
||||
ok_ = rgx.ok_;
|
||||
ctrl_ = rgx.ctrl_;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user