add UTF-8 substitution and detection
This commit is contained in:
59
src/atom.cpp
59
src/atom.cpp
@ -1185,6 +1185,7 @@ void Atom::data_atoms(int n, char *buf, tagint id_offset, tagint mol_offset,
|
||||
coord[1] >= sublo[1] && coord[1] < subhi[1] &&
|
||||
coord[2] >= sublo[2] && coord[2] < subhi[2]) {
|
||||
avec->data_atom(xdata,imagedata,values,typestr);
|
||||
typestr = utils::utf8_subst(typestr);
|
||||
if (id_offset) tag[nlocal-1] += id_offset;
|
||||
if (mol_offset) molecule[nlocal-1] += mol_offset;
|
||||
// clang-format on
|
||||
@ -1291,7 +1292,7 @@ void Atom::data_bonds(int n, char *buf, int *count, tagint id_offset,
|
||||
// Bonds line is: number(ignored), bond type, atomID 1, atomID 2
|
||||
if (nwords > 0) {
|
||||
if (nwords != 4) error->all(FLERR, "Incorrect format in {}: {}", location, utils::trim(buf));
|
||||
typestr = values[1];
|
||||
typestr = utils::utf8_subst(values[1]);
|
||||
atom1 = utils::tnumeric(FLERR, values[2], false, lmp);
|
||||
atom2 = utils::tnumeric(FLERR, values[3], false, lmp);
|
||||
if (id_offset) {
|
||||
@ -1388,7 +1389,7 @@ void Atom::data_angles(int n, char *buf, int *count, tagint id_offset,
|
||||
// Angles line is: number(ignored), angle type, atomID 1, atomID 2, atomID 3
|
||||
if (nwords > 0) {
|
||||
if (nwords != 5) error->all(FLERR, "Incorrect format in {}: {}", location, utils::trim(buf));
|
||||
typestr = values[1];
|
||||
typestr = utils::utf8_subst(values[1]);
|
||||
atom1 = utils::tnumeric(FLERR, values[2], false, lmp);
|
||||
atom2 = utils::tnumeric(FLERR, values[3], false, lmp);
|
||||
atom3 = utils::tnumeric(FLERR, values[4], false, lmp);
|
||||
@ -1501,7 +1502,7 @@ void Atom::data_dihedrals(int n, char *buf, int *count, tagint id_offset,
|
||||
// Dihedrals line is: number(ignored), bond type, atomID 1, atomID 2, atomID 3, atomID 4
|
||||
if (nwords > 0) {
|
||||
if (nwords != 6) error->all(FLERR, "Incorrect format in {}: {}", location, utils::trim(buf));
|
||||
typestr = values[1];
|
||||
typestr = utils::utf8_subst(values[1]);
|
||||
atom1 = utils::tnumeric(FLERR, values[2], false, lmp);
|
||||
atom2 = utils::tnumeric(FLERR, values[3], false, lmp);
|
||||
atom3 = utils::tnumeric(FLERR, values[4], false, lmp);
|
||||
@ -1633,7 +1634,7 @@ void Atom::data_impropers(int n, char *buf, int *count, tagint id_offset,
|
||||
// Impropers line is: number(ignored), bond type, atomID 1, atomID 2, atomID 3, atomID 4
|
||||
if (nwords > 0) {
|
||||
if (nwords != 6) error->all(FLERR, "Incorrect format in {}: {}", location, utils::trim(buf));
|
||||
typestr = values[1];
|
||||
typestr = utils::utf8_subst(values[1]);
|
||||
atom1 = utils::tnumeric(FLERR, values[2], false, lmp);
|
||||
atom2 = utils::tnumeric(FLERR, values[3], false, lmp);
|
||||
atom3 = utils::tnumeric(FLERR, values[4], false, lmp);
|
||||
@ -1934,30 +1935,44 @@ void Atom::set_mass(const char *file, int line, int itype, double value)
|
||||
|
||||
void Atom::set_mass(const char *file, int line, int /*narg*/, char **arg)
|
||||
{
|
||||
if (mass == nullptr) error->all(file,line, "Cannot set atom mass for atom style {}", atom_style);
|
||||
if (mass == nullptr)
|
||||
error->all(file,line, "Cannot set per-type atom mass for atom style {}", atom_style);
|
||||
|
||||
std::string typestr = utils::trim(arg[0]);
|
||||
if (!isdigit(typestr[0]) && typestr[0] != '*') {
|
||||
int itype = lmap->find(typestr,Atom::ATOM);
|
||||
if (itype == -1) error->all(file,line,"Invalid type for mass set");
|
||||
mass[itype] = utils::numeric(FLERR,arg[1],false,lmp);
|
||||
mass_setflag[itype] = 1;
|
||||
// clang-format on
|
||||
std::string typestr = utils::utf8_subst(utils::trim(arg[0]));
|
||||
switch (utils::is_type(typestr)) {
|
||||
|
||||
if (mass[itype] <= 0.0) error->all(file,line,"Invalid mass value");
|
||||
} else {
|
||||
int lo,hi;
|
||||
utils::bounds(file,line,arg[0],1,ntypes,lo,hi,error);
|
||||
if ((lo < 1) || (hi > ntypes))
|
||||
error->all(file,line,"Invalid type {} for atom mass {}", arg[1]);
|
||||
case 0: { // numeric
|
||||
int lo, hi;
|
||||
utils::bounds(file, line, typestr.c_str(), 1, ntypes, lo, hi, error);
|
||||
if ((lo < 1) || (hi > ntypes))
|
||||
error->all(file, line, "Invalid atom type {} for atom mass", typestr);
|
||||
|
||||
const double value = utils::numeric(FLERR,arg[1],false,lmp);
|
||||
if (value <= 0.0) error->all(file,line,"Invalid atom mass value {}", value);
|
||||
const double value = utils::numeric(FLERR, arg[1], false, lmp);
|
||||
if (value <= 0.0)
|
||||
error->all(file, line, "Invalid atom mass value {} for type {}", value, typestr);
|
||||
|
||||
for (int itype = lo; itype <= hi; itype++) {
|
||||
mass[itype] = value;
|
||||
mass_setflag[itype] = 1;
|
||||
for (int itype = lo; itype <= hi; itype++) {
|
||||
mass[itype] = value;
|
||||
mass_setflag[itype] = 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case 1: { // type label
|
||||
if (!atom->labelmapflag) error->all(FLERR, "Invalid atom type {} for setting mass", typestr);
|
||||
int itype = lmap->find(typestr, Atom::ATOM);
|
||||
if (itype == -1) error->all(file, line, "Invalid type {} for setting mass", typestr);
|
||||
mass[itype] = utils::numeric(FLERR, arg[1], false, lmp);
|
||||
mass_setflag[itype] = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
default: // invalid
|
||||
error->one(FLERR, "Invalid mass setting");
|
||||
break;
|
||||
}
|
||||
// clang-format off
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
||||
@ -1666,7 +1666,8 @@ void AtomVec::data_atom(double *coord, imageint imagetmp, const std::vector<std:
|
||||
initialize other peratom quantities
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void AtomVec::data_atom(double *coord, imageint imagetmp, const std::vector<std::string> &values, std::string &extract)
|
||||
void AtomVec::data_atom(double *coord, imageint imagetmp, const std::vector<std::string> &values,
|
||||
std::string &extract)
|
||||
{
|
||||
int m, n, datatype, cols;
|
||||
void *pdata;
|
||||
|
||||
@ -141,7 +141,7 @@ void LabelMap::modify_lmap(int narg, char **arg)
|
||||
int itype = utils::inumeric(FLERR, arg[iarg++], false, lmp);
|
||||
if ((itype < 1) || (itype > ntypes))
|
||||
error->all(FLERR, "Labelmap {} type {} must be within 1-{}", tlabel, itype, ntypes);
|
||||
std::string slabel = utils::trim(arg[iarg++]);
|
||||
std::string slabel = utils::utf8_subst(utils::trim(arg[iarg++]));
|
||||
if (utils::is_type(slabel) != 1)
|
||||
error->all(FLERR, "Type label string {} for {} type {} is invalid", slabel, tlabel, itype);
|
||||
int found = search(slabel, (*labels_map));
|
||||
|
||||
@ -2159,6 +2159,7 @@ void ReadData::typelabels(int mode)
|
||||
}
|
||||
if (nwords != 2)
|
||||
error->all(FLERR, "Invalid format in section: {} Type Labels: {}", labeltypes[mode], buf);
|
||||
values[1] = utils::utf8_subst(values[1]);
|
||||
if (utils::is_type(values[1]) != 1) error->all(FLERR, "Invalid type label {}", values[1]);
|
||||
int itype = utils::inumeric(FLERR, values[0], false, lmp);
|
||||
if ((itype < 1) || (itype > lntypes))
|
||||
|
||||
@ -1162,8 +1162,10 @@ int utils::is_type(const std::string &str)
|
||||
// TODO: the first two checks below are not really needed with this function.
|
||||
// If a type label has at least one character that is not a digit or '*'
|
||||
// it can be identified by this function as type label due to the check above.
|
||||
// Whitespace and multi-byte characters are not allowed.
|
||||
if (isdigit(str[0]) || (str[0] == '*') || (str[0] == '#')) return -1;
|
||||
if (str.find_first_of(" \t\r\n\f") != std::string::npos) return -1;
|
||||
if (has_utf8(utf8_subst(str))) return -1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
@ -64,18 +64,28 @@ TEST_F(SetTest, NoBoxAtoms)
|
||||
command("create_box 4 box");
|
||||
command("labelmap atom 2 N1");
|
||||
command("labelmap atom 3 O1 4 H1");
|
||||
command("mass * 1.0");
|
||||
command("mass O1 3.0");
|
||||
command("mass N1 2.0");
|
||||
command("mass H1 4.0");
|
||||
END_HIDE_OUTPUT();
|
||||
ASSERT_NE(atom->lmap, nullptr);
|
||||
ASSERT_FALSE(atom->lmap->is_complete(Atom::ATOM));
|
||||
ASSERT_DOUBLE_EQ(atom->mass[1], 1.0);
|
||||
ASSERT_DOUBLE_EQ(atom->mass[2], 2.0);
|
||||
ASSERT_DOUBLE_EQ(atom->mass[3], 3.0);
|
||||
ASSERT_DOUBLE_EQ(atom->mass[4], 4.0);
|
||||
|
||||
BEGIN_HIDE_OUTPUT();
|
||||
command("labelmap atom 1 C1 2 N2 3 ' O#' 1 C1 4 H# 2 N3"); // second '#' starts comment
|
||||
command("mass \"O#\" 10.0");
|
||||
END_HIDE_OUTPUT();
|
||||
ASSERT_TRUE(atom->lmap->is_complete(Atom::ATOM));
|
||||
ASSERT_EQ(atom->lmap->find("C1", Atom::ATOM), 1);
|
||||
ASSERT_EQ(atom->lmap->find("N2", Atom::ATOM), 2);
|
||||
ASSERT_EQ(atom->lmap->find("O#", Atom::ATOM), 3);
|
||||
ASSERT_EQ(atom->lmap->find("H", Atom::ATOM), 4);
|
||||
ASSERT_DOUBLE_EQ(atom->mass[3], 10.0);
|
||||
|
||||
TEST_FAILURE(".*ERROR: Labelmap atom type 0 must be within 1-4.*",
|
||||
command("labelmap atom 0 C1"););
|
||||
|
||||
@ -141,8 +141,8 @@ TEST(Utils, count_words_with_extra_spaces)
|
||||
|
||||
TEST(Utils, join_words)
|
||||
{
|
||||
std::vector<std::string> words = {"one", "two", "three" };
|
||||
auto combined = utils::join_words(words, " ");
|
||||
std::vector<std::string> words = {"one", "two", "three"};
|
||||
auto combined = utils::join_words(words, " ");
|
||||
ASSERT_THAT(combined, StrEq("one two three"));
|
||||
combined = utils::join_words(words, "");
|
||||
ASSERT_THAT(combined, StrEq("onetwothree"));
|
||||
@ -469,6 +469,8 @@ TEST(Utils, valid_label)
|
||||
ASSERT_EQ(utils::is_type("@X2=&X1"), 1);
|
||||
ASSERT_EQ(utils::is_type("|Na|Cl|H2O|"), 1);
|
||||
ASSERT_EQ(utils::is_type("CA(1)/CB(1)"), 1);
|
||||
ASSERT_EQ(utils::is_type("A-B"), 1); // ASCII
|
||||
ASSERT_EQ(utils::is_type("A−B"), 1); // UTF-8
|
||||
}
|
||||
|
||||
TEST(Utils, invalid_label)
|
||||
|
||||
Reference in New Issue
Block a user