FIX: redistributePar problems with lagrangian

- the fileHandler changes included setting cacheLevel(0) to avoid
  blocking with redistributePar. However, this meant if clouds
  were not uniformly present on all ranks the fileHandler would follow
  different code paths and lead to blocking.

  Now switch to distributed mode for the lagrangian operations within
  redistributePar based on the cacheLevel information.

FIX: avoid triggering a false processor check in argList

- when redistributing to few ranks
This commit is contained in:
Mark Olesen
2023-12-20 14:59:36 +01:00
parent 88be9ef5c6
commit de133af526
6 changed files with 133 additions and 32 deletions

View File

@ -29,6 +29,7 @@ License
#include "ListOps.H" #include "ListOps.H"
#include "parLagrangianDistributor.H" #include "parLagrangianDistributor.H"
#include "passivePositionParticleCloud.H" #include "passivePositionParticleCloud.H"
#include "fileOperation.H"
// * * * * * * * * * * * * * * Static Data Members * * * * * * * * * * * * * // // * * * * * * * * * * * * * * Static Data Members * * * * * * * * * * * * * //
@ -76,9 +77,10 @@ void Foam::parLagrangianDistributor::findClouds
( (
cloud::prefix, cloud::prefix,
mesh.time().timeName(), mesh.time().timeName(),
mesh, mesh.thisDb(),
IOobjectOption::MUST_READ, IOobjectOption::MUST_READ,
IOobjectOption::NO_WRITE IOobjectOption::NO_WRITE,
IOobjectOption::NO_REGISTER
); );
// Using the fileHandler: // Using the fileHandler:
@ -110,9 +112,10 @@ void Foam::parLagrangianDistributor::findClouds
Pstream::combineReduce(cloudNames, ListOps::uniqueEqOp<word>()); Pstream::combineReduce(cloudNames, ListOps::uniqueEqOp<word>());
Foam::sort(cloudNames); // Consistent order Foam::sort(cloudNames); // Consistent order
const label nClouds = cloudNames.size();
// See which of the global cloudNames I have // See which of the global cloudNames I have
haveClouds.resize_nocopy(cloudNames.size()); haveClouds.resize_nocopy(nClouds);
haveClouds = false; haveClouds = false;
for (const fileName& localCloudName : localCloudDirs) for (const fileName& localCloudName : localCloudDirs)
@ -125,17 +128,21 @@ void Foam::parLagrangianDistributor::findClouds
} }
// Collect fields per cloud // Collect fields per cloud
objectNames.resize(cloudNames.size()); objectNames.resize_nocopy(nClouds);
for (const fileName& localCloudName : localCloudDirs) for (label cloudi = 0; cloudi < nClouds; ++cloudi)
{ {
objectNames[cloudi].clear();
if (!haveClouds[cloudi]) continue;
// Do local scan for valid cloud objects // Do local scan for valid cloud objects
const bool oldParRun = UPstream::parRun(false); const bool oldParRun = UPstream::parRun(false);
IOobjectList localObjs IOobjectList localObjs
( (
mesh, mesh,
mesh.time().timeName(), mesh.time().timeName(),
cloud::prefix/localCloudName cloud::prefix/cloudNames[cloudi]
); );
UPstream::parRun(oldParRun); UPstream::parRun(oldParRun);
@ -152,9 +159,6 @@ void Foam::parLagrangianDistributor::findClouds
if (isCloud) if (isCloud)
{ {
// Has coordinates/positions - so must be a valid cloud // Has coordinates/positions - so must be a valid cloud
const label cloudi = cloudNames.find(localCloudName);
objectNames[cloudi] = localObjs.sortedNames(); objectNames[cloudi] = localObjs.sortedNames();
} }
} }
@ -333,9 +337,24 @@ Foam::parLagrangianDistributor::distributeLagrangianPositions
const word& cloudName const word& cloudName
) const ) const
{ {
// Load cloud and send particle // Mixed exists/missing on various ranks?
// Avoid masterRead+broadcast (can cause blocking)
auto& handler = Foam::fileHandler();
const bool oldDistributed =
handler.distributed
(
!fileOperation::cacheLevel() || handler.distributed()
);
// Load cloud
passivePositionParticleCloud lpi(srcMesh_, cloudName, false); passivePositionParticleCloud lpi(srcMesh_, cloudName, false);
// Restore distributed flag
handler.distributed(oldDistributed);
// Distribute particles to other ranks
return distributeLagrangianPositions(lpi); return distributeLagrangianPositions(lpi);
} }

View File

@ -34,6 +34,7 @@ Description
#include "parLagrangianDistributor.H" #include "parLagrangianDistributor.H"
#include "unmappedPassivePositionParticleCloud.H" #include "unmappedPassivePositionParticleCloud.H"
#include "fileOperation.H"
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
@ -59,6 +60,17 @@ readLagrangian
(void)mesh.tetBasePtIs(); (void)mesh.tetBasePtIs();
} }
// Mixed exists/missing on various ranks?
// Avoid masterRead+broadcast (can cause blocking)
auto& handler = Foam::fileHandler();
const bool oldDistributed =
handler.distributed
(
!fileOperation::cacheLevel() || handler.distributed()
);
// Setup clouds // Setup clouds
forAll(cloudNames, i) forAll(cloudNames, i)
{ {
@ -88,6 +100,9 @@ readLagrangian
); );
} }
// Restore distributed flag
handler.distributed(oldDistributed);
return clouds; return clouds;
} }
@ -164,6 +179,16 @@ void reconstructLagrangian
} }
const auto& distributor = *distributorPtr; const auto& distributor = *distributorPtr;
// Mixed exists/missing on various ranks?
// Avoid masterRead+broadcast (can cause blocking)
auto& handler = Foam::fileHandler();
const bool oldDistributed =
handler.distributed
(
!fileOperation::cacheLevel() || handler.distributed()
);
forAll(cloudNames, cloudi) forAll(cloudNames, cloudi)
{ {
const word& cloudName = cloudNames[cloudi]; const word& cloudName = cloudNames[cloudi];
@ -171,6 +196,12 @@ void reconstructLagrangian
Info<< "Reconstructing lagrangian fields for cloud " Info<< "Reconstructing lagrangian fields for cloud "
<< cloudName << nl << endl; << cloudName << nl << endl;
autoPtr<mapDistributeBase> lagrangianMapPtr =
distributor.distributeLagrangianPositions
(
cloudName
);
IOobjectList cloudObjs IOobjectList cloudObjs
( (
mesh, mesh,
@ -178,12 +209,6 @@ void reconstructLagrangian
cloud::prefix/cloudName cloud::prefix/cloudName
); );
autoPtr<mapDistributeBase> lagrangianMapPtr =
distributor.distributeLagrangianPositions
(
cloudName
);
distributor.distributeAllFields distributor.distributeAllFields
( (
lagrangianMapPtr(), lagrangianMapPtr(),
@ -193,6 +218,9 @@ void reconstructLagrangian
selectedFields selectedFields
); );
} }
// Restore distributed flag
handler.distributed(oldDistributed);
} }

View File

@ -1440,7 +1440,7 @@ int main(int argc, char *argv[])
else else
{ {
// Directory does not exist. If this happens on master -> decompose mode // Directory does not exist. If this happens on master -> decompose mode
if (UPstream::master() && !reconstruct) if (UPstream::master() && !reconstruct && !decompose)
{ {
decompose = true; decompose = true;
InfoOrPout InfoOrPout
@ -1454,7 +1454,8 @@ int main(int argc, char *argv[])
{ {
// The UPstream::nProcs is either the source or destination procs // The UPstream::nProcs is either the source or destination procs
fileOperation::nProcsFilter(UPstream::nProcs()); fileOperation::nProcsFilter(UPstream::nProcs());
InfoOrPout<< "Switching to exact matching for " InfoOrPout
<< "Switching to exact matching for "
<< fileOperation::processorsBaseDir + Foam::name(UPstream::nProcs()) << fileOperation::processorsBaseDir + Foam::name(UPstream::nProcs())
<< " processor directories" << " processor directories"
<< nl << endl; << nl << endl;

View File

@ -1711,21 +1711,21 @@ void Foam::argList::parse
} }
if (nProcDirs != Pstream::nProcs()) if (nProcDirs < UPstream::nProcs())
{ {
FatalError FatalError
<< "number of processor directories = " << "number of processor directories = "
<< nProcDirs << nProcDirs
<< " is not equal to the number of processors = " << " is not equal to the number of processors = "
<< Pstream::nProcs() << UPstream::nProcs()
<< exit(FatalError); << exit(FatalError);
} }
} }
// Distribute the master's argument list (unaltered) // Distribute the master's argument list (unaltered)
for (const int subproci : Pstream::subProcs()) for (const int proci : UPstream::subProcs())
{ {
OPstream toProc(Pstream::commsTypes::scheduled, subproci); OPstream toProc(UPstream::commsTypes::scheduled, proci);
toProc toProc
<< args_ << options_ << args_ << options_

View File

@ -3,6 +3,10 @@ cd "${0%/*}" || exit # Run from this directory
. ${WM_PROJECT_DIR:?}/bin/tools/RunFunctions # Tutorial run functions . ${WM_PROJECT_DIR:?}/bin/tools/RunFunctions # Tutorial run functions
#------------------------------------------------------------------------------ #------------------------------------------------------------------------------
fileHandler="-fileHandler collated"
unset fileHandler
## decompDict5="-decomposeParDict system/decomposeParDict.5"
# Create mesh # Create mesh
runApplication blockMesh runApplication blockMesh
@ -18,12 +22,29 @@ runApplication createBaffles -overwrite
runApplication $(getApplication) runApplication $(getApplication)
#- RedistributePar to do decomposition #- RedistributePar to do decomposition
runParallel redistributePar -decompose -cellDist runParallel redistributePar -decompose -cellDist $fileHandler
#- Continue running for a bit more #- Continue running for a bit more
runParallel -s parallel $(getApplication) runParallel -s parallel $(getApplication) $fileHandler
if :
then
#- Reconstruct all times
runParallel -s reconstruct \
redistributePar -reconstruct $fileHandler
else
# Not yet entirely working...
#- Send to more ranks
runParallel -s more-ranks $decompDict5 redistributePar $fileHandler
#- Continue running for a bit more
runParallel -s more-ranks $decompDict5 $(getApplication) $fileHandler
#- Reconstruct all times #- Reconstruct all times
runParallel -s 1 redistributePar -reconstruct runParallel -s reconstruct $decompDict5 \
redistributePar -reconstruct $fileHandler -latestTime
fi
#------------------------------------------------------------------------------ #------------------------------------------------------------------------------

View File

@ -0,0 +1,32 @@
/*--------------------------------*- C++ -*----------------------------------*\
| ========= | |
| \\ / F ield | OpenFOAM: The Open Source CFD Toolbox |
| \\ / O peration | Version: v2312 |
| \\ / A nd | Website: www.openfoam.com |
| \\/ M anipulation | |
\*---------------------------------------------------------------------------*/
FoamFile
{
version 2.0;
format ascii;
class dictionary;
object decomposeParDict;
}
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
numberOfSubdomains 5;
method scotch;
constraints
{
//- Keep owner and neighbour on same processor for faces in zones:
faces
{
type preserveFaceZones;
zones (cycLeft cycRight);
}
}
// ************************************************************************* //