From 05f08c07994a4da1fc9f93a23425183e4801ff92 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 25 Jan 2021 21:44:38 -0500 Subject: [PATCH] avoid false positives in has_utf8() check --- src/utils.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/utils.h b/src/utils.h index ab1d52c57a..0c619d316d 100644 --- a/src/utils.h +++ b/src/utils.h @@ -220,7 +220,9 @@ namespace LAMMPS_NS { * limited to 21-bits. * * For the sake of efficiency this test only checks if a character in the string - * has the highest two bits set and thus is likely an UTF-8 character. It + * has the highest bit set and thus is very likely an UTF-8 character. It will + * not be able to tell this this is a valid UTF-8 character or whether it is a + * 2-byte, 3-byte, or 4-byte character. * \verbatim embed:rst @@ -234,7 +236,7 @@ namespace LAMMPS_NS { inline bool has_utf8(const std::string &line) { const unsigned char * const in = (const unsigned char *)line.c_str(); - for (int i=0; i < line.size(); ++i) if (in[i] & 0xc0U) return true; + for (int i=0; i < line.size(); ++i) if (in[i] & 0x80U) return true; return false; }