Files
openfoam/src/OpenFOAM/primitives/strings/stringOps/stringOpsSort.C
2020-05-04 09:15:21 +02:00

327 lines
10 KiB
C

/*---------------------------------------------------------------------------*\
========= |
\\ / F ield | OpenFOAM: The Open Source CFD Toolbox
\\ / O peration |
\\ / A nd | www.openfoam.com
\\/ M anipulation |
-------------------------------------------------------------------------------
Copyright (C) 2017 OpenCFD Ltd.
-------------------------------------------------------------------------------
Changes for OpenFOAM
Code cleanup, reduction, elimination of redundant code.
\*---------------------------------------------------------------------------*/
//========================================================================
// Copyright (c) 1998-2010,2011 Free Software Foundation, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the
// "Software"), to deal in the Software without restriction, including
// without limitation the rights to use, copy, modify, merge, publish,
// distribute, distribute with modifications, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
// IN NO EVENT SHALL THE ABOVE COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
// THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//
// Except as contained in this notice, the name(s) of the above copyright
// holders shall not be used in advertising or otherwise to promote the
// sale, use or other dealings in this Software without prior written
// authorization.
//========================================================================
//========================================================================
// Original Author: Jan-Marten Spit <jmspit@euronet.nl>
//========================================================================
#include "stringOpsSort.H"
#include <cctype>
#include <cstring>
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
// Local tweaks/preferences:
//
// [DIGITS_ALWAYS_FIRST] : as per original code
// This results in "file123.txt" sorting before "file.txt", which is
// inconsistent with what 'ls -v' produces
// - normally do not want this (Mark Olesen: Oct-2017)
#undef DIGITS_ALWAYS_FIRST
// [MANUAL_NUMCOMPARE] : handwritten code instead of strncmp
// The original code has a mix of strncmp for equality but handwritten code
// for greater-than/less-than.
//
// -> this does to be unneeded, rely on strncmp() return values
// (Mark Olesen: Oct-2017)
#undef MANUAL_NUMCOMPARE
// [DEBUG_NATSTRCMP] : debug info to std::cerr (for development purposes only)
#undef DEBUG_NATSTRCMP
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
#ifdef DEBUG_NATSTRCMP
#include <iostream>
template<class T>
static inline void debugPrint(const char* text, T item1, T item2)
{
std::cerr << text << ": <" << item1 << "> <" << item2 << ">\n";
}
// Character sequences, end pointer is _inclusive_.
static inline void debugPrint
(
const char* b1, const char* e1,
const char* b2, const char* e2
)
{
std::cerr << "<";
while (b1 < e1) { std::cerr << *b1++; }
std::cerr << *e1 << "> ";
std::cerr << "<";
while (b2 < e2) { std::cerr << *b2++; }
std::cerr << *e2 << ">\n";
}
#endif
#ifdef MANUAL_NUMCOMPARE
// Manual comparison of identical length (digit) sequences
static inline int manual_numcompare(const char* s1, const char* s2)
{
while (*s1 && *s2)
{
const int cmp = (*s1 - *s2);
if (!cmp) return cmp;
++s1; ++s2;
}
return 0; // Length check done before in caller.
}
#endif
// ------------------------------------------------------------------------- //
int Foam::stringOps::natstrcmp(const char* s1, const char* s2)
{
#ifdef DEBUG_NATSTRCMP
debugPrint("natstrcmp", s1, s2);
#endif
// States for state engine
enum stateType { SCAN, ALPHA, NUMERIC };
// Number of leading zeroes
unsigned zeros1 = 0;
unsigned zeros2 = 0;
// Pointers to begin/end of integer sequences (without leading zeros)
const char* numbeg1 = nullptr;
const char* numbeg2 = nullptr;
const char* numend1 = nullptr;
const char* numend2 = nullptr;
stateType state = SCAN;
const char* p1 = s1;
const char* p2 = s2;
while (*p1 && *p2)
{
// Bitmask for digits vs alpha
// - 0: neither are digits
// - 1: p1 is the only digit
// - 2: p2 is the only digit
// - 3: both p1 and p2 are digits
const unsigned digitMask =
((isdigit(*p2) ? 2:0) | (isdigit(*p1) ? 1:0));
switch (state)
{
case SCAN:
{
#ifdef DEBUG_NATSTRCMP
debugPrint("SCAN", *p1, *p2);
#endif
switch (digitMask)
{
case 0: // (alpha,alpha)
{
state = ALPHA;
if (*p1 == *p2)
{
++p1; ++p2;
}
else
{
// Lexical compare
return (*p1 - *p2);
}
break;
}
#ifdef DIGITS_ALWAYS_FIRST
case 0x1: // (digit,alpha) : digit < alpha
{
return -1;
break;
}
case 0x2: // (alpha,digit) : alpha > digit
{
return 1;
break;
}
#else /* DIGITS_ALWAYS_FIRST */
case 0x1: // (digit,alpha)
case 0x2: // (alpha,digit)
{
// Lexical compare for digits/alpha
return (*p1 - *p2);
break;
}
#endif /* DIGITS_ALWAYS_FIRST */
default: // (digit,digit)
{
state = NUMERIC;
// State changed from SCAN to NUMERIC, so skip leading
// leading zeros so the numeric comparison is
// untainted by them, but capture the first occurrence
// of leading zeroes for a final tie-break if needed.
if (!zeros1)
{
// First occurrence of any leading zeroes
while (*p1 == '0') { ++p1; ++zeros1; }
}
else
{
while (*p1 == '0') { ++p1; }
}
if (!zeros2)
{
// First occurrence of any leading zeroes
while (*p2 == '0') { ++p2; ++zeros2; }
}
else
{
while (*p2 == '0') { ++p2; }
}
if (zeros1 == zeros2)
{
// Same number of zeros - so irrelevant
zeros1 = zeros2 = 0;
}
if (!isdigit(*p1)) --p1;
if (!isdigit(*p2)) --p2;
numbeg1 = numend1 = p1;
numbeg2 = numend2 = p2;
break;
}
}
break;
}
case ALPHA:
{
#ifdef DEBUG_NATSTRCMP
debugPrint("ALPHA", *p1, *p2);
#endif
if (digitMask)
{
state = SCAN;
}
else // (alpha,alpha)
{
if (*p1 == *p2)
{
++p1; ++p2;
}
else
{
// Lexical compare
return (*p1 - *p2);
}
}
break;
}
case NUMERIC:
{
while (isdigit(*p1)) numend1 = p1++;
while (isdigit(*p2)) numend2 = p2++;
#ifdef DEBUG_NATSTRCMP
debugPrint("NUMERIC", *p1, *p2);
debugPrint(numbeg1,numend1, numbeg2,numend2);
#endif
// Length (minus 1) of each sequence
const size_t len1 = (numend1 - numbeg1);
const size_t len2 = (numend2 - numbeg2);
if (len1 < len2)
{
return -1;
}
else if (len1 > len2)
{
return 1;
}
// Same number of digits, leading zeros have been skipped.
// - so lexical and numerical compares are equivalent.
const int cmp = strncmp(numbeg1, numbeg2, len1+1);
if (!cmp)
{
// Identical (digit) sequence - continue
state = SCAN;
}
else
{
#ifdef MANUAL_STRNCMP
return manual_numcompare(numbeg1, numbeg2);
#else
return cmp;
#endif
}
break;
}
}
}
if (zeros1 < zeros2) return -1;
if (zeros1 > zeros2) return 1;
if (!*p1 && *p2) return -1; // s1 shorter than s2
if (*p1 && !*p2) return 1; // s1 longer than s2
return 0;
}
// ************************************************************************* //