ENH: downgrade missing default system/decomposeParDict to warning

- if the system/decomposeParDict is missing, skip check for matching
  number of processor dirs. This can make job dispatch easier.
  Does not apply if -decomposeParDict was explicitly specified.

STYLE: adjust naming of host/slaves in argList
This commit is contained in:
Mark Olesen
2021-03-22 10:22:44 +01:00
parent 201f117f5f
commit b99078768e
2 changed files with 126 additions and 96 deletions

View File

@ -54,7 +54,7 @@ InfoSwitches
// Report hosts used (parallel) // Report hosts used (parallel)
// - 0 = none // - 0 = none
// - 1 = per-host-count, but unsorted // - 1 = per-host-count, but unsorted
// - 2 = long output of "slave.pid" ... // - 2 = long output with host.pid ...
writeHosts 1; writeHosts 1;
// Report list of roots used (parallel) // Report list of roots used (parallel)

View File

@ -122,7 +122,7 @@ Foam::argList::initValidTables::initValidTables()
( (
"roots", "roots",
"(dir1 .. dirN)", "(dir1 .. dirN)",
"Slave root directories for distributed running", "Subprocess root directories for distributed running",
true // advanced option true // advanced option
); );
validParOptions.set validParOptions.set
@ -141,7 +141,7 @@ Foam::argList::initValidTables::initValidTables()
( (
"hostRoots", "hostRoots",
"((host1 dir1) .. (hostN dirN))", "((host1 dir1) .. (hostN dirN))",
"Per-host slave root directories for distributed running." "Per-subprocess root directories for distributed running."
" The host specification can be a regex.", " The host specification can be a regex.",
true // advanced option true // advanced option
); );
@ -218,14 +218,14 @@ namespace Foam
// //
// Always include the master too. // Always include the master too.
// This provides a better overview of the subscription // This provides a better overview of the subscription
static void printHostsSubscription(const UList<string>& slaveProcs) static void printHostsSubscription(const UList<string>& hostProcs)
{ {
Info<< "Hosts :" << nl << "(" << nl; Info<< "Hosts :\n(" << nl;
std::string prev = hostName(); std::string prev = Foam::hostName();
int count = 1; int count = 1;
for (const auto& str : slaveProcs) for (const auto& str : hostProcs)
{ {
std::string curr(str.substr(0, str.rfind('.'))); std::string curr(str.substr(0, str.rfind('.')));
@ -234,7 +234,7 @@ static void printHostsSubscription(const UList<string>& slaveProcs)
if (count) if (count)
{ {
// Finish previous // Finish previous
Info<<" (" << prev.c_str() << " " << count << ")" << nl; Info<< " (" << prev.c_str() << ' ' << count << ')' << nl;
count = 0; count = 0;
} }
@ -246,10 +246,10 @@ static void printHostsSubscription(const UList<string>& slaveProcs)
if (count) if (count)
{ {
// Finished last one // Finished last one
Info<<" (" << prev.c_str() << " " << count << ")" << nl; Info<< " (" << prev.c_str() << ' ' << count << ')' << nl;
} }
Info<< ")" << nl; Info<< ')' << nl;
} }
} // End namespace Foam } // End namespace Foam
@ -877,7 +877,7 @@ Foam::argList::argList
{ {
// The '-debug-switch' option: // The '-debug-switch' option:
// change registered debug switch // change registered debug switch
DetailInfo << "DebugSwitch "; DetailInfo << "debug-switch ";
debug::debugObjects() debug::debugObjects()
.setNamedInt(args_[argi], 1, true); .setNamedInt(args_[argi], 1, true);
} }
@ -885,7 +885,7 @@ Foam::argList::argList
{ {
// The '-info-switch' option: // The '-info-switch' option:
// change registered info switch // change registered info switch
DetailInfo << "InfoSwitch "; DetailInfo << "info-switch ";
debug::infoObjects() debug::infoObjects()
.setNamedInt(args_[argi], 1, true); .setNamedInt(args_[argi], 1, true);
} }
@ -893,7 +893,7 @@ Foam::argList::argList
{ {
// The '-opt-switch' option: // The '-opt-switch' option:
// change registered optimisation switch // change registered optimisation switch
DetailInfo << "OptimisationSwitch "; DetailInfo << "opt-switch ";
debug::optimisationObjects() debug::optimisationObjects()
.setNamedInt(args_[argi], 1, true); .setNamedInt(args_[argi], 1, true);
} }
@ -1057,7 +1057,7 @@ void Foam::argList::parse
<< "Exec : " << commandLine_.c_str() << nl << "Exec : " << commandLine_.c_str() << nl
<< "Date : " << dateString.c_str() << nl << "Date : " << dateString.c_str() << nl
<< "Time : " << timeString.c_str() << nl << "Time : " << timeString.c_str() << nl
<< "Host : " << hostName().c_str() << nl << "Host : " << Foam::hostName().c_str() << nl
<< "PID : " << pid() << endl; << "PID : " << pid() << endl;
} }
@ -1109,36 +1109,36 @@ void Foam::argList::parse
} }
stringList slaveProcs; stringList hostMachine;
stringList slaveMachine; stringList hostProcs;
const int writeHostsSwitch = debug::infoSwitch("writeHosts", 1); const int writeHostsSwitch = Foam::debug::infoSwitch("writeHosts", 1);
// Collect slave machine/pid, and check that the build is identical // Collect machine/pid, and check that the build is identical
if (parRunControl_.parRun()) if (parRunControl_.parRun())
{ {
if (Pstream::master()) if (Pstream::master())
{ {
slaveProcs.resize(Pstream::nProcs()-1); hostMachine.resize(Pstream::nProcs()-1);
slaveMachine.resize(Pstream::nProcs()-1); hostProcs.resize(Pstream::nProcs()-1);
label proci = 0; string procBuild;
for (const int slave : Pstream::subProcs()) label procPid;
int proci = 0;
for (const int subproci : Pstream::subProcs())
{ {
IPstream fromSlave(Pstream::commsTypes::scheduled, slave); IPstream fromSubproc(Pstream::commsTypes::scheduled, subproci);
string slaveBuild; fromSubproc >> procBuild >> hostMachine[proci] >> procPid;
label slavePid;
fromSlave >> slaveBuild >> slaveMachine[proci] >> slavePid;
slaveProcs[proci] = slaveMachine[proci] + "." + name(slavePid); hostProcs[proci] = hostMachine[proci] + "." + name(procPid);
++proci; ++proci;
// Verify that all processors are running the same build // Verify that all processors are running the same build
if (slaveBuild != foamVersion::build) if (procBuild != foamVersion::build)
{ {
FatalErrorIn(executable()) FatalErrorIn(executable())
<< "Master is running version " << foamVersion::build << "Running build version " << foamVersion::build
<< "; slave " << proci << " is running version " << " but proc " << subproci << " is running "
<< slaveBuild << procBuild << nl
<< exit(FatalError); << exit(FatalError);
} }
} }
@ -1150,7 +1150,7 @@ void Foam::argList::parse
Pstream::commsTypes::scheduled, Pstream::commsTypes::scheduled,
Pstream::masterNo() Pstream::masterNo()
); );
toMaster << foamVersion::build << hostName() << pid(); toMaster << foamVersion::build << Foam::hostName() << Foam::pid();
} }
} }
@ -1170,14 +1170,13 @@ void Foam::argList::parse
// Establish rootPath_/globalCase_/case_ for master // Establish rootPath_/globalCase_/case_ for master
setCasePaths(); setCasePaths();
// Establish location of decomposeParDict, allow override with // The system/decomposeParDict (or equivalent)
// the -decomposeParDict option. fileName source;
fileName source = rootPath_/globalCase_/"system"/"decomposeParDict";
if (options_.found("decomposeParDict")) if (this->readIfPresent("decomposeParDict", source))
{ {
bool adjustOpt = false; bool adjustOpt = false;
source = options_["decomposeParDict"];
if (isDir(source)) if (isDir(source))
{ {
source /= "decomposeParDict"; source /= "decomposeParDict";
@ -1203,8 +1202,8 @@ void Foam::argList::parse
label dictNProcs = -1; label dictNProcs = -1;
if (this->readListIfPresent("roots", roots)) if (this->readListIfPresent("roots", roots))
{ {
parRunControl_.distributed(true);
source = "-roots"; source = "-roots";
parRunControl_.distributed(true);
if (roots.size() != 1) if (roots.size() != 1)
{ {
dictNProcs = roots.size()+1; dictNProcs = roots.size()+1;
@ -1212,10 +1211,9 @@ void Foam::argList::parse
} }
else if (options_.found("hostRoots")) else if (options_.found("hostRoots"))
{ {
roots.resize(Pstream::nProcs()-1, fileName::null);
source = "-hostRoots"; source = "-hostRoots";
ITstream is(source, options_["hostRoots"]); roots.resize(Pstream::nProcs()-1, fileName::null);
ITstream is(this->lookup("hostRoots"));
List<Tuple2<wordRe, fileName>> hostRoots(is); List<Tuple2<wordRe, fileName>> hostRoots(is);
checkITstream(is, "hostRoots"); checkITstream(is, "hostRoots");
@ -1224,31 +1222,33 @@ void Foam::argList::parse
{ {
labelList matched labelList matched
( (
findStrings(hostRoot.first(), slaveMachine) findStrings(hostRoot.first(), hostMachine)
); );
for (const label slavei : matched) for (const label matchi : matched)
{ {
if (!roots[slavei].empty()) if (!roots[matchi].empty())
{ {
FatalErrorInFunction FatalErrorInFunction
<< "Slave " << slaveMachine[slavei] << "Multiple matching roots for "
<< " has multiple matching roots in " << hostMachine[matchi] << " in "
<< hostRoots << exit(FatalError); << hostRoots << nl
<< exit(FatalError);
} }
roots[slavei] = hostRoot.second(); roots[matchi] = hostRoot.second();
} }
} }
// Check // Check
forAll(roots, slavei) forAll(roots, hosti)
{ {
if (roots[slavei].empty()) if (roots[hosti].empty())
{ {
FatalErrorInFunction FatalErrorInFunction
<< "Slave " << slaveMachine[slavei] << "No matching roots for "
<< " has no matching roots in " << hostMachine[hosti] << " in "
<< hostRoots << exit(FatalError); << hostRoots << nl
<< exit(FatalError);
} }
} }
@ -1259,44 +1259,63 @@ void Foam::argList::parse
} }
else if (checkProcessorDirectories_ && Pstream::nProcs() > 1) else if (checkProcessorDirectories_ && Pstream::nProcs() > 1)
{ {
// Use values from decomposeParDict, the location was already // Check values from decomposeParDict
// established above.
const bool useDefault = source.empty();
if (useDefault)
{
source = rootPath_/globalCase_/"system"/"decomposeParDict";
}
// Disable any parallel comms happening inside the fileHandler // Disable any parallel comms happening inside the fileHandler
// since we are on master. This can happen e.g. inside // since we are on master. This can happen e.g. inside
// the masterUncollated/collated handler. // the masterUncollated/collated handler.
const bool oldParRun = Pstream::parRun(false); const bool oldParRun = Pstream::parRun(false);
autoPtr<ISstream> decompDictStream autoPtr<ISstream> dictStream
( (
fileHandler().NewIFstream(source) fileHandler().NewIFstream(source)
); );
if (!decompDictStream || !decompDictStream->good())
{
FatalError
<< "Cannot read decomposeParDict from "
<< source << exit(FatalError);
}
dictionary decompDict(*decompDictStream);
Pstream::parRun(oldParRun); // Restore parallel state Pstream::parRun(oldParRun); // Restore parallel state
decompDict.readEntry("numberOfSubdomains", dictNProcs); if (dictStream && dictStream->good())
{
dictionary decompDict(*dictStream);
decompDict.readEntry("numberOfSubdomains", dictNProcs);
if (decompDict.getOrDefault("distributed", false))
{
parRunControl_.distributed(true);
decompDict.readEntry("roots", roots);
}
}
else
{
if (useDefault)
{
// Optional if using default location
DetailInfo
<< "Warning: running without decomposeParDict "
<< this->relativePath(source) << nl;
}
else
{
// Mandatory if specified as -decomposeParDict
FatalError
<< "Cannot read decomposeParDict: "
<< this->relativePath(source) << nl
<< exit(FatalError);
}
}
if (Pstream::nProcs() == 1) if (Pstream::nProcs() == 1)
{ {
WarningInFunction Warning
<< "Running parallel on single processor. This only" << "Running parallel on single processor. This only"
<< " makes sense for multi-world simulation" << endl; << " makes sense for multi-world simulation" << endl;
dictNProcs = 1; dictNProcs = 1;
} }
if (decompDict.getOrDefault("distributed", false))
{
parRunControl_.distributed(true);
decompDict.readEntry("roots", roots);
}
} }
// Convenience: // Convenience:
@ -1306,7 +1325,7 @@ void Foam::argList::parse
const fileName rootName(roots[0]); const fileName rootName(roots[0]);
roots.resize(Pstream::nProcs()-1, rootName); roots.resize(Pstream::nProcs()-1, rootName);
// adjust dictNProcs for command-line '-roots' option // Adjust dictNProcs for command-line '-roots' option
if (dictNProcs < 0) if (dictNProcs < 0)
{ {
dictNProcs = roots.size()+1; dictNProcs = roots.size()+1;
@ -1331,14 +1350,13 @@ void Foam::argList::parse
) )
{ {
FatalError FatalError
<< source << this->relativePath(source)
<< " specifies " << dictNProcs << " specifies " << dictNProcs
<< " processors but job was started with " << " processors but job was started with "
<< Pstream::nProcs() << " processors." << Pstream::nProcs() << " processors."
<< exit(FatalError); << exit(FatalError);
} }
// Distributed data // Distributed data
if (roots.size()) if (roots.size())
{ {
@ -1347,7 +1365,7 @@ void Foam::argList::parse
FatalError FatalError
<< "number of entries in roots " << "number of entries in roots "
<< roots.size() << roots.size()
<< " is not equal to the number of slaves " << " is not equal to the number of sub-processes "
<< Pstream::nProcs()-1 << Pstream::nProcs()-1
<< exit(FatalError); << exit(FatalError);
} }
@ -1359,12 +1377,12 @@ void Foam::argList::parse
// Distribute the master's argument list (with new root) // Distribute the master's argument list (with new root)
const bool hadCaseOpt = options_.found("case"); const bool hadCaseOpt = options_.found("case");
for (const int slave : Pstream::subProcs()) for (const int subproci : Pstream::subProcs())
{ {
options_.set("case", roots[slave-1]/globalCase_); options_.set("case", roots[subproci-1]/globalCase_);
OPstream toSlave(Pstream::commsTypes::scheduled, slave); OPstream toSubproc(Pstream::commsTypes::scheduled, subproci);
toSlave << args_ << options_ << roots.size(); toSubproc << args_ << options_ << roots.size();
} }
options_.erase("case"); options_.erase("case");
@ -1382,6 +1400,7 @@ void Foam::argList::parse
( (
checkProcessorDirectories_ checkProcessorDirectories_
&& Pstream::nProcs() > 1 && Pstream::nProcs() > 1
&& dictNProcs >= 1
&& dictNProcs < Pstream::nProcs() && dictNProcs < Pstream::nProcs()
) )
{ {
@ -1408,10 +1427,10 @@ void Foam::argList::parse
} }
// Distribute the master's argument list (unaltered) // Distribute the master's argument list (unaltered)
for (const int slave : Pstream::subProcs()) for (const int subproci : Pstream::subProcs())
{ {
OPstream toSlave(Pstream::commsTypes::scheduled, slave); OPstream toSubproc(Pstream::commsTypes::scheduled, subproci);
toSlave << args_ << options_ << roots.size(); toSubproc << args_ << options_ << roots.size();
} }
} }
} }
@ -1429,7 +1448,7 @@ void Foam::argList::parse
parRunControl_.distributed(nroots); parRunControl_.distributed(nroots);
// Establish rootPath_/globalCase_/case_ for slave // Establish rootPath_/globalCase_/case_ for sub-process
setCasePaths(); setCasePaths();
} }
@ -1459,13 +1478,13 @@ void Foam::argList::parse
} }
} }
// Keep or discard slave and root information for reporting: // Keep/discard sub-process host/root information for reporting:
if (Pstream::master() && parRunControl_.parRun()) if (Pstream::master() && parRunControl_.parRun())
{ {
if (!writeHostsSwitch) if (!writeHostsSwitch)
{ {
// Clear here to ensures it doesn't show in the jobInfo // Clear here to ensures it doesn't show in the jobInfo
slaveProcs.clear(); hostProcs.clear();
} }
if (!debug::infoSwitch("writeRoots", 1)) if (!debug::infoSwitch("writeRoots", 1))
{ {
@ -1480,17 +1499,28 @@ void Foam::argList::parse
if (parRunControl_.parRun()) if (parRunControl_.parRun())
{ {
if (slaveProcs.size()) if (hostProcs.size())
{ {
if (writeHostsSwitch == 1) if (writeHostsSwitch == 1)
{ {
// Compact output (see etc/controlDict) // Compact output (see etc/controlDict)
printHostsSubscription(slaveProcs); printHostsSubscription(hostProcs);
} }
else else if (writeHostsSwitch)
{ {
// Full output of "slave.pid" // Full output of "host.pid"
Info<< "Slaves : " << slaveProcs << nl; Info<< "Hosts :\n(" << nl;
// Include master in the list
Info<< " " << Foam::hostName().c_str() << '.'
<< Foam::pid() << nl;
// Sub-processes
for (const auto& str : hostProcs)
{
Info<< " " << str.c_str() << nl;
}
Info<< ')' << nl;
} }
} }
if (roots.size()) if (roots.size())
@ -1519,9 +1549,9 @@ void Foam::argList::parse
jobInfo.add("root", rootPath_); jobInfo.add("root", rootPath_);
jobInfo.add("case", globalCase_); jobInfo.add("case", globalCase_);
jobInfo.add("nProcs", nProcs); jobInfo.add("nProcs", nProcs);
if (slaveProcs.size()) if (hostProcs.size())
{ {
jobInfo.add("slaves", slaveProcs); jobInfo.add("hosts", hostProcs);
} }
if (roots.size()) if (roots.size())
{ {
@ -1819,8 +1849,8 @@ bool Foam::argList::checkRootCase() const
if (checkProcessorDirectories_ && pathDir.empty() && Pstream::master()) if (checkProcessorDirectories_ && pathDir.empty() && Pstream::master())
{ {
// Allow slaves on non-existing processor directories, created later // Allow non-existent processor directories on sub-processes,
// (e.g. redistributePar) // to be created later (e.g. redistributePar)
FatalError FatalError
<< executable_ << executable_
<< ": cannot open case directory " << path() << ": cannot open case directory " << path()