regExp: Add support for case-insensitive patterns

From https://github.com/OpenFOAM/OpenFOAM-2.2.x/pull/1
This commit is contained in:
Henry
2015-01-28 16:35:36 +00:00
parent f97e276039
commit 33b1bf4c87
4 changed files with 199 additions and 90 deletions

View File

@ -2,7 +2,7 @@
========= | ========= |
\\ / F ield | OpenFOAM: The Open Source CFD Toolbox \\ / F ield | OpenFOAM: The Open Source CFD Toolbox
\\ / O peration | \\ / O peration |
\\ / A nd | Copyright (C) 2011 OpenFOAM Foundation \\ / A nd | Copyright (C) 2011-2015 OpenFOAM Foundation
\\/ M anipulation | \\/ M anipulation |
------------------------------------------------------------------------------- -------------------------------------------------------------------------------
License License
@ -22,6 +22,7 @@ License
along with OpenFOAM. If not, see <http://www.gnu.org/licenses/>. along with OpenFOAM. If not, see <http://www.gnu.org/licenses/>.
Description Description
Tests for regular expressions
\*---------------------------------------------------------------------------*/ \*---------------------------------------------------------------------------*/
@ -39,14 +40,13 @@ using namespace Foam;
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
List<Tuple2<string, string> > rawList(IFstream("testRegexps")()); List<Tuple2<string, string> > rawList(IFstream("testRegexps")());
Info<< "input list:" << rawList << endl; Info<< "Test expressions:" << rawList << endl;
IOobject::writeDivider(Info) << endl; IOobject::writeDivider(Info) << endl;
List<string> groups; List<string> groups;
// report matches: // Report matches:
forAll(rawList, elemI) forAll(rawList, elemI)
{ {
const string& pat = rawList[elemI].first(); const string& pat = rawList[elemI].first();
@ -60,50 +60,87 @@ int main(int argc, char *argv[])
Info<< "true"; Info<< "true";
if (re.ngroups()) if (re.ngroups())
{ {
Info<< " groups:" << groups; Info<< nl << "groups: " << groups;
} }
} }
else else
{ {
Info<< "false";
if (re.search(str)) if (re.search(str))
{ {
Info<< " partial match"; Info<< " partial match";
} }
else
{
Info<< "false";
}
} }
Info<< endl; Info<< endl;
} }
Info<<"test regExp(const char*) ..." << endl; Info<< nl << "test regExp(const char*) ..." << endl;
string me("Mark"); string me("Mark");
if (regExp("[Mm]ar[ck]").match(me)) // Handling of null strings
{
Info<< "matched: " << me << endl;
}
else
{
Info<< "no match" << endl;
}
if (regExp("").match(me))
{
Info<< "matched: " << me << endl;
}
else
{
Info<< "no match" << endl;
}
if (regExp(NULL).match(me)) if (regExp(NULL).match(me))
{ {
Info<< "matched: " << me << endl; Info<< "fail - matched: " << me << endl;
}
else
{
Info<< "pass - null pointer is no expression" << endl;
}
// Normal match
if (regExp("[Mm]ar[ck]").match(me))
{
Info<< "pass - matched: " << me << endl;
} }
else else
{ {
Info<< "no match" << endl; Info<< "no match" << endl;
} }
// Match ignore case
if (regExp("mar[ck]", true).match(me))
{
Info<< "pass - matched: " << me << endl;
}
else
{
Info<< "no match" << endl;
}
// Embedded prefix for match ignore case
if (regExp("(?i)mar[ck]").match(me))
{
Info<< "pass - matched: " << me << endl;
}
else
{
Info<< "no match" << endl;
}
// Handling of empty expression
if (regExp("").match(me))
{
Info<< "fail - matched: " << me << endl;
}
else
{
Info<< "pass - no match on empty expression" << endl;
}
// Embedded prefix - but expression is empty
if (regExp("(?i)").match(me))
{
Info<< "fail - matched: " << me << endl;
}
else
{
Info<< "pass - no match on empty expression" << endl;
}
Info<< endl; Info<< endl;
return 0; return 0;

View File

@ -8,14 +8,30 @@
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
// pattern, string // Pattern, String
( (
( "a.*" "abc" ) ( "a.*" "abc" ) // true
( "a.*" "bac" ) ( "a.*" "bac" ) // false - partial match only
( "a.*" "abcd" ) ( "a.*" "abcd" ) // true
( "a.*" "def" ) ( "a.*" "def" ) // false
( "d(.*)f" "def" ) ( ".*a.*" "Abc" ) // false
( " *([A-Za-z]+) *= *([^ /]+) *(//.*)?" " keyword = value // settings" ) ( "(?i).*a.*" "Abc" ) // true
( "d(.*)f" "def" ) // true
(
" *([A-Za-z]+) *= *([^ /]+) *(//.*)?"
" keyword = value // comment"
) // true
(
"[[:digit:]]"
"contains 1 or more digits"
) // false - partial match only
(
"[[:digit:]]+-[[:digit:]]+-[[:digit:]]+-[[:digit:]]+"
"1-905-123-2234"
) // true
) )
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //

View File

@ -2,7 +2,7 @@
========= | ========= |
\\ / F ield | OpenFOAM: The Open Source CFD Toolbox \\ / F ield | OpenFOAM: The Open Source CFD Toolbox
\\ / O peration | \\ / O peration |
\\ / A nd | Copyright (C) 2011-2012 OpenFOAM Foundation \\ / A nd | Copyright (C) 2011-2015 OpenFOAM Foundation
\\/ M anipulation | \\/ M anipulation |
------------------------------------------------------------------------------- -------------------------------------------------------------------------------
License License
@ -23,13 +23,61 @@ License
\*---------------------------------------------------------------------------*/ \*---------------------------------------------------------------------------*/
#include <sys/types.h>
#include "regExp.H" #include "regExp.H"
#include "label.H"
#include "string.H" #include "string.H"
#include "List.H" #include "List.H"
#include "IOstreams.H"
// * * * * * * * * * * * * * Private Member Functions * * * * * * * * * * * //
template<class StringType>
bool Foam::regExp::matchGrouping
(
const std::string& str,
List<StringType>& groups
) const
{
if (preg_ && str.size())
{
size_t nmatch = ngroups() + 1;
regmatch_t pmatch[nmatch];
// Also verify that the entire string was matched.
// pmatch[0] is the entire match
// pmatch[1..] are the (...) sub-groups
if
(
regexec(preg_, str.c_str(), nmatch, pmatch, 0) == 0
&& (pmatch[0].rm_so == 0 && pmatch[0].rm_eo == label(str.size()))
)
{
groups.setSize(ngroups());
label groupI = 0;
for (size_t matchI = 1; matchI < nmatch; matchI++)
{
if (pmatch[matchI].rm_so != -1 && pmatch[matchI].rm_eo != -1)
{
groups[groupI] = str.substr
(
pmatch[matchI].rm_so,
pmatch[matchI].rm_eo - pmatch[matchI].rm_so
);
}
else
{
groups[groupI].clear();
}
groupI++;
}
return true;
}
}
groups.clear();
return false;
}
// * * * * * * * * * * * * * * * * Constructors * * * * * * * * * * * * * * // // * * * * * * * * * * * * * * * * Constructors * * * * * * * * * * * * * * //
@ -69,18 +117,34 @@ void Foam::regExp::set(const char* pattern, const bool ignoreCase) const
{ {
clear(); clear();
// avoid NULL pointer and zero-length patterns // Avoid NULL pointer and zero-length patterns
if (pattern && *pattern) if (pattern && *pattern)
{ {
preg_ = new regex_t;
int cflags = REG_EXTENDED; int cflags = REG_EXTENDED;
if (ignoreCase) if (ignoreCase)
{ {
cflags |= REG_ICASE; cflags |= REG_ICASE;
} }
int err = regcomp(preg_, pattern, cflags); const char* pat = pattern;
// Check for embedded prefix for ignore-case
// this is the only embedded prefix we support
// - a simple check is sufficient
if (!strncmp(pattern, "(?i)", 4))
{
cflags |= REG_ICASE;
pat += 4;
// avoid zero-length patterns
if (!*pat)
{
return;
}
}
preg_ = new regex_t;
int err = regcomp(preg_, pat, cflags);
if (err != 0) if (err != 0)
{ {
@ -89,7 +153,7 @@ void Foam::regExp::set(const char* pattern, const bool ignoreCase) const
FatalErrorIn FatalErrorIn
( (
"regExp::set(const char*)" "regExp::set(const char*, const bool ignoreCase)"
) << "Failed to compile regular expression '" << pattern << "'" ) << "Failed to compile regular expression '" << pattern << "'"
<< nl << errbuf << nl << errbuf
<< exit(FatalError); << exit(FatalError);
@ -143,7 +207,7 @@ bool Foam::regExp::match(const std::string& str) const
size_t nmatch = 1; size_t nmatch = 1;
regmatch_t pmatch[1]; regmatch_t pmatch[1];
// also verify that the entire string was matched // Also verify that the entire string was matched
// pmatch[0] is the entire match // pmatch[0] is the entire match
if if
( (
@ -159,48 +223,23 @@ bool Foam::regExp::match(const std::string& str) const
} }
bool Foam::regExp::match(const string& str, List<string>& groups) const bool Foam::regExp::match
(
const std::string& str,
List<std::string>& groups
) const
{ {
if (preg_ && str.size()) return matchGrouping(str, groups);
{ }
size_t nmatch = ngroups() + 1;
regmatch_t pmatch[nmatch];
// also verify that the entire string was matched
// pmatch[0] is the entire match
// pmatch[1..] are the (...) sub-groups
if
(
regexec(preg_, str.c_str(), nmatch, pmatch, 0) == 0
&& (pmatch[0].rm_so == 0 && pmatch[0].rm_eo == label(str.size()))
)
{
groups.setSize(ngroups());
label groupI = 0;
for (size_t matchI = 1; matchI < nmatch; matchI++) bool Foam::regExp::match
{ (
if (pmatch[matchI].rm_so != -1 && pmatch[matchI].rm_eo != -1) const std::string& str,
{ List<Foam::string>& groups
groups[groupI] = str.substr ) const
( {
pmatch[matchI].rm_so, return matchGrouping(str, groups);
pmatch[matchI].rm_eo - pmatch[matchI].rm_so
);
}
else
{
groups[groupI].clear();
}
groupI++;
}
return true;
}
}
groups.clear();
return false;
} }

View File

@ -2,7 +2,7 @@
========= | ========= |
\\ / F ield | OpenFOAM: The Open Source CFD Toolbox \\ / F ield | OpenFOAM: The Open Source CFD Toolbox
\\ / O peration | \\ / O peration |
\\ / A nd | Copyright (C) 2011 OpenFOAM Foundation \\ / A nd | Copyright (C) 2011-2015 OpenFOAM Foundation
\\/ M anipulation | \\/ M anipulation |
------------------------------------------------------------------------------- -------------------------------------------------------------------------------
License License
@ -27,6 +27,9 @@ Class
Description Description
Wrapper around POSIX extended regular expressions. Wrapper around POSIX extended regular expressions.
The PCRE '(?i)' extension is provided to compile the regular expression
as being case-insensitive.
SeeAlso SeeAlso
The manpage regex(7) for more information about POSIX regular expressions. The manpage regex(7) for more information about POSIX regular expressions.
These differ somewhat from \c Perl and \c sed regular expressions. These differ somewhat from \c Perl and \c sed regular expressions.
@ -51,6 +54,7 @@ namespace Foam
class string; class string;
template<class T> class List; template<class T> class List;
/*---------------------------------------------------------------------------*\ /*---------------------------------------------------------------------------*\
Class regExp Declaration Class regExp Declaration
\*---------------------------------------------------------------------------*/ \*---------------------------------------------------------------------------*/
@ -71,6 +75,16 @@ class regExp
//- Disallow default bitwise assignment //- Disallow default bitwise assignment
void operator=(const regExp&); void operator=(const regExp&);
//- Return true if it matches and sets the sub-groups matched.
// Templated to support both std::string and Foam::string
template<class StringType>
bool matchGrouping
(
const std::string&,
List<StringType>& groups
) const;
public: public:
// Static Member Functions // Static Member Functions
@ -135,15 +149,14 @@ public:
// Editing // Editing
//- Compile pattern into a regular expression, optionally ignoring //- Compile pattern into a regular expression,
// case // optionally ignoring case
void set(const char*, const bool ignoreCase=false) const; void set(const char*, const bool ignoreCase=false) const;
//- Compile pattern into a regular expression, optionally ignoring //- Compile pattern into a regular expression,
// case // optionally ignoring case
void set(const std::string&, const bool ignoreCase=false) const; void set(const std::string&, const bool ignoreCase=false) const;
//- Release precompiled expression. //- Release precompiled expression.
// Returns true if precompiled expression existed before clear // Returns true if precompiled expression existed before clear
bool clear() const; bool clear() const;
@ -161,7 +174,11 @@ public:
//- Return true if it matches and sets the sub-groups matched //- Return true if it matches and sets the sub-groups matched
// The begin-of-line (^) and end-of-line ($) anchors are implicit // The begin-of-line (^) and end-of-line ($) anchors are implicit
bool match(const string&, List<string>& groups) const; bool match(const std::string&, List<std::string>& groups) const;
//- Return true if it matches and sets the sub-groups matched
// The begin-of-line (^) and end-of-line ($) anchors are implicit
bool match(const std::string&, List<string>& groups) const;
//- Return true if the regex was found within string //- Return true if the regex was found within string
bool search(const std::string& str) const bool search(const std::string& str) const