ENH: broadcast dynamicCode, create missing processor directories

- code is compiled dynamically on the master node.
  In the normal (non-distributed) case, simply poll the NFS
  to see when it appears on the sub-procs.

  For a case with distributed roots, first broadcast it (via MPI)
  to the IO master nodes and then poll afterwards.

- on startup, detect and create missing processorXXX/ subdirectories
  on distributed filesystems
This commit is contained in:
mattijs
2022-11-24 09:00:00 +00:00
committed by Andrew Heather
parent 65523e9d24
commit 030a467a23
3 changed files with 158 additions and 8 deletions

View File

@ -196,7 +196,17 @@ Foam::functionEntries::codeStream::getFunction
// (flag set by regIOobject::read, baseIOdictionary constructor)
if (!masterOnly && returnReduceOr(lib == nullptr))
{
// Broadcast distributed...
// Broadcast to distributed masters
if (fileHandler().distributed())
{
fileHandler().broadcastCopy
(
UPstream::worldComm,
UPstream::master(fileHandler().comm()),
libPath,
libPath
);
}
dynamicCode::waitForFile(libPath, context.dict());
}

View File

@ -254,9 +254,21 @@ void Foam::codedBase::createLibrary
UPstream::barrier(UPstream::worldComm);
}
// Broadcast distributed...
const fileName libPath = dynCode.libPath();
dynamicCode::waitForFile(dynCode.libPath(), context.dict());
// Broadcast to distributed masters
if (fileHandler().distributed())
{
fileHandler().broadcastCopy
(
UPstream::worldComm,
UPstream::master(fileHandler().comm()),
libPath,
libPath
);
}
dynamicCode::waitForFile(libPath, context.dict());
}

View File

@ -1763,16 +1763,144 @@ void Foam::argList::parse
}
// If needed, adjust fileHandler for distributed roots
if (runControl_.distributed())
if (runControl_.distributed() && fileOperation::fileHandlerPtr_)
{
if (fileOperation::fileHandlerPtr_)
fileOperation::fileHandlerPtr_->distributed(true);
const labelList& ranks = fileHandler().ioRanks();
if (runControl_.parRun() && ranks.size())
{
fileOperation::fileHandlerPtr_->distributed(true);
// Detect processor directories both on local proc and on
// (world) master proc. If the local proc doesn't have them
// but the master has it will attempt to copy them.
// Expected local directory name
const fileName procDir
(
rootPath_
/ globalCase_
/ ("processor" + Foam::name(UPstream::myProcNo()))
);
// Try and find my local directory using the fileHandler. This
// will check the local disk on the IO rank
// (since running distributed)
fileNameList pathDirs(UPstream::nProcs());
auto& pathDir = pathDirs[UPstream::myProcNo()];
pathDir = fileHandler().filePath(procDir, false);
if (returnReduceOr(pathDir.empty()))
{
// There is at least one processor that cannot find
// the processor directory. Look for it on the master.
// E.g. decomposed into 4 processors, two roots:
// processors4_0-1/
// processors4_2-3/
// So:
// - processor0 reads the same disk as processor0
// - processor2 needs the whole directory sent over
// - processor3 reads the same disk as processor2
if (UPstream::master() && bannerEnabled())
{
Info<< "I/O :"
<< " distributed - copying missing directories"
<< nl;
}
// Collect all wanted directories (or empty). Note: could
// just collect missing ones ...
Pstream::gatherList(pathDirs);
fileName masterRootPath(rootPath_);
Pstream::broadcast(masterRootPath);
List<fileNameList> rankToDirs(UPstream::nProcs());
if (UPstream::master())
{
const bool oldParRun = Pstream::parRun(false);
label rank = 0;
for (label proci = 1; proci < pathDirs.size(); ++proci)
{
if (ranks.contains(proci))
{
rank = proci;
}
if (pathDirs[proci].empty())
{
// Synthesise corresponding name on the master
// processor
const fileName procDir
(
rootPath_
/ globalCase_
/ ("processor" + Foam::name(proci))
);
const fileName foundDir
(
fileHandler().filePath(procDir, false)
);
if
(
!foundDir.empty()
&& !rankToDirs[rank].contains(foundDir)
)
{
rankToDirs[rank].push_back(foundDir);
}
}
}
UPstream::parRun(oldParRun);
}
Pstream::broadcast(rankToDirs);
// Copy missing directories on all the IOranks.
// Note: instead of passing 'writeOnProc' flag we could create
// communicator just between master and IOrank, but that is
// also expensive.
forAll(rankToDirs, proci)
{
// Am I the reponsible IOrank for this processor
const bool amIO = (UPstream::myProcNo() == proci);
// Construct equivalent directory on proci
for (const auto& srcDir : rankToDirs[proci])
{
const fileName tgtDir
(
rootPath_
/ srcDir.relative(masterRootPath)
);
if (amIO)
{
// I am the IO rank
Pout<< "On rank " << proci << nl
<< " copying : " << srcDir << nl
<< " to : " << tgtDir << endl;
}
fileHandler().broadcastCopy
(
UPstream::worldComm,
amIO,
tgtDir,
tgtDir
);
}
}
}
}
}
// Keep/discard sub-process host/root information for reporting:
if (Pstream::master() && runControl_.parRun())
if (UPstream::master() && runControl_.parRun())
{
if (!writeHostsSwitch)
{
@ -1785,7 +1913,7 @@ void Foam::argList::parse
}
}
if (Pstream::master() && bannerEnabled())
if (UPstream::master() && bannerEnabled())
{
Info<< "Case : " << (rootPath_/globalCase_).c_str() << nl
<< "nProcs : " << nProcs << nl;