From 41705e9eca18548407ee7ac1c7b7d69ffe8ec415 Mon Sep 17 00:00:00 2001 From: Henry Weller Date: Thu, 16 May 2024 13:46:20 +0100 Subject: [PATCH] Lagrangian: Added support for automatic run-time load-balancing Optional CPU load caching can be switched-on for Lagrangian cloud tracking and/or chemistry integration using the new cpuLoad switch in the cloudProperties or chemistryProperties dictionary files respectively and used for multi-constraint load-balancing by the fvMeshDistributorsLoadBalancer specified in the dynamicMeshDict file distributor { type loadBalancer; libs ("libfvMeshDistributors.so"); multiConstraint true; redistributionInterval 10; } which used the distributor specified in the decomposeParDict file, e.g. numberOfSubdomains 12; decomposer simple; distributor zoltan; libs ("libzoltanDecomp.so"); simpleCoeffs { n (2 2 3); } zoltanCoeffs { lb_method rcb; } The incompressibleDenseParticleFluid/cyclone case has been updated to demonstrate this new functionality and shows a speedup ~50% using the Zoltan RCB multi-constraint distributor. The multicomponentFluid/counterFlowFlame2D_GRI case has also been updated to use the new cpuLoad switch. --- .../fvMeshDistributors/cpuLoad/cpuLoad.C | 20 ++- .../fvMeshDistributors/cpuLoad/cpuLoad.H | 13 +- .../fvMeshDistributorsLoadBalancer.C | 162 ++++++++++++------ src/lagrangian/basic/Cloud/Cloud.C | 20 ++- src/lagrangian/basic/Cloud/Cloud.H | 9 +- .../Templates/MomentumCloud/MomentumCloud.C | 3 + .../Templates/MomentumCloud/MomentumCloud.H | 9 + .../LocalInteraction/LocalInteraction.C | 7 +- .../chemistryModel/chemistryModel.C | 14 +- .../chemistryModel/chemistryModel.H | 6 +- .../cyclone/0/U.air | 2 + .../cyclone/0/k.air | 2 + .../cyclone/0/nut.air | 2 + .../cyclone/0/p | 2 + .../cyclone/constant/cloudProperties | 2 + .../cyclone/constant/dynamicMeshDict | 27 +++ .../cyclone/system/blockMeshDict | 6 + .../cyclone/system/controlDict | 2 +- .../cyclone/system/decomposeParDict | 18 +- .../constant/chemistryProperties | 2 +- .../system/blockMeshDict | 2 +- 21 files changed, 252 insertions(+), 78 deletions(-) create mode 100644 tutorials/incompressibleDenseParticleFluid/cyclone/constant/dynamicMeshDict diff --git a/src/finiteVolume/fvMesh/fvMeshDistributors/cpuLoad/cpuLoad.C b/src/finiteVolume/fvMesh/fvMeshDistributors/cpuLoad/cpuLoad.C index 249ddc5fe3..3c761d37d4 100644 --- a/src/finiteVolume/fvMesh/fvMeshDistributors/cpuLoad/cpuLoad.C +++ b/src/finiteVolume/fvMesh/fvMeshDistributors/cpuLoad/cpuLoad.C @@ -2,7 +2,7 @@ ========= | \\ / F ield | OpenFOAM: The Open Source CFD Toolbox \\ / O peration | Website: https://openfoam.org - \\ / A nd | Copyright (C) 2022 OpenFOAM Foundation + \\ / A nd | Copyright (C) 2022-2024 OpenFOAM Foundation \\/ M anipulation | ------------------------------------------------------------------------------- License @@ -107,6 +107,24 @@ Foam::optionalCpuLoad& Foam::optionalCpuLoad::New } +Foam::optionalCpuLoad& Foam::optionalCpuLoad::New +( + const polyMesh& mesh, + const word& name, + const bool loadBalancing +) +{ + if (loadBalancing && isA(mesh)) + { + return New(refCast(mesh), name, loadBalancing); + } + else + { + return optionalCpuLoad::optionalCpuLoad_; + } +} + + // * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * // void Foam::cpuLoad::reset() diff --git a/src/finiteVolume/fvMesh/fvMeshDistributors/cpuLoad/cpuLoad.H b/src/finiteVolume/fvMesh/fvMeshDistributors/cpuLoad/cpuLoad.H index fcc815db46..140e5791a0 100644 --- a/src/finiteVolume/fvMesh/fvMeshDistributors/cpuLoad/cpuLoad.H +++ b/src/finiteVolume/fvMesh/fvMeshDistributors/cpuLoad/cpuLoad.H @@ -2,7 +2,7 @@ ========= | \\ / F ield | OpenFOAM: The Open Source CFD Toolbox \\ / O peration | Website: https://openfoam.org - \\ / A nd | Copyright (C) 2022 OpenFOAM Foundation + \\ / A nd | Copyright (C) 2022-2024 OpenFOAM Foundation \\/ M anipulation | ------------------------------------------------------------------------------- License @@ -77,6 +77,8 @@ public: // Selectors + //- Construct from fvMesh if loadBalancing is true + // otherwise return the dummy optionalCpuLoad static optionalCpuLoad& New ( const fvMesh& mesh, @@ -84,6 +86,15 @@ public: const bool loadBalancing ); + //- Construct from polyMesh if it is an fvMesh and loadBalancing is true + // otherwise return the dummy optionalCpuLoad + static optionalCpuLoad& New + ( + const polyMesh& mesh, + const word& name, + const bool loadBalancing + ); + //- Destructor virtual ~optionalCpuLoad() diff --git a/src/fvMeshDistributors/loadBalancer/fvMeshDistributorsLoadBalancer.C b/src/fvMeshDistributors/loadBalancer/fvMeshDistributorsLoadBalancer.C index 0d07d6ce87..01680982a8 100644 --- a/src/fvMeshDistributors/loadBalancer/fvMeshDistributorsLoadBalancer.C +++ b/src/fvMeshDistributors/loadBalancer/fvMeshDistributorsLoadBalancer.C @@ -85,12 +85,13 @@ bool Foam::fvMeshDistributors::loadBalancer::update() if ( Pstream::nProcs() > 1 - && mesh.time().timeIndex() > 1 + && mesh.time().timeIndex() - mesh.time().startTimeIndex() > 1 && timeIndex_ != mesh.time().timeIndex() ) { timeIndex_ = mesh.time().timeIndex(); + // Get the CPU time fer this processor which includes waiting time const scalar timeStepCpuTime = cpuTime_.cpuTimeIncrement(); // CPU loads per cell @@ -107,80 +108,125 @@ bool Foam::fvMeshDistributors::loadBalancer::update() { timeIndex_ = mesh.time().timeIndex(); - scalar sumCpuLoad = 0; + scalarList procCpuLoads(cpuLoads.size()); + label l = 0; forAllConstIter(HashTable, cpuLoads, iter) { - sumCpuLoad += sum(iter()->primitiveField()); + procCpuLoads[l++] = sum(iter()->primitiveField()); } - const scalar cellCFDCpuTime = returnReduce - ( - (timeStepCpuTime - sumCpuLoad)/mesh.nCells(), - minOp() - ); + List allProcCpuLoads(Pstream::nProcs()); + allProcCpuLoads[Pstream::myProcNo()] = procCpuLoads; + Pstream::gatherList(allProcCpuLoads); + Pstream::scatterList(allProcCpuLoads); - // Total CPU time for this processor - const scalar processorCpuTime = - mesh.nCells()*cellCFDCpuTime + sumCpuLoad; + scalarList sumProcCpuLoads(procCpuLoads.size(), scalar(0)); + scalarList maxProcCpuLoads(procCpuLoads.size(), scalar(0)); + forAll(maxProcCpuLoads, l) + { + forAll(allProcCpuLoads, proci) + { + sumProcCpuLoads[l] += allProcCpuLoads[proci][l]; + + maxProcCpuLoads[l] = + max(maxProcCpuLoads[l], allProcCpuLoads[proci][l]); + } + } + + // Sum over loads of the maximum load CPU time per processor + const scalar sumMaxProcCpuLoad(sum(maxProcCpuLoads)); + + // Maximum number of cells per processor + const label maxNcells = returnReduce(mesh.nCells(), maxOp