ENH: GAMGProcAgglomeration: use non-blocking transfers

2025-11-28 03:28:01 +00:00 · 2013-05-10 13:13:33 +01:00
parent ee145db19b
commit 2f1b6d9e96
7 changed files with 266 additions and 180 deletions
--- a/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGAgglomerations/GAMGAgglomeration/GAMGAgglomerateLduAddressing.C
+++ b/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGAgglomerations/GAMGAgglomeration/GAMGAgglomerateLduAddressing.C
@ -488,7 +488,10 @@ void Foam::GAMGAgglomeration::procAgglomerateRestrictAddressing
        comm,
        procIDs,
        restrictAddressing_[levelIndex],
-        procRestrictAddressing
+        procRestrictAddressing,
+
+        UPstream::msgType(),
+        Pstream::nonBlocking    //Pstream::scheduled
    );


--- a/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGAgglomerations/GAMGAgglomeration/GAMGAgglomerationTemplates.C
+++ b/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGAgglomerations/GAMGAgglomeration/GAMGAgglomerationTemplates.C
@ -126,7 +126,15 @@ void Foam::GAMGAgglomeration::restrictField
        const List<int>& procIDs = agglomProcIDs(coarseLevelIndex);
        const labelList& offsets = cellOffsets(coarseLevelIndex);

-        globalIndex::gather(offsets, fineComm, procIDs, cf);
+        globalIndex::gather
+        (
+            offsets,
+            fineComm,
+            procIDs,
+            cf,
+            UPstream::msgType(),
+            Pstream::nonBlocking    //Pstream::scheduled
+        );
    }
 }

@ -194,7 +202,16 @@ void Foam::GAMGAgglomeration::prolongField
        label localSize = nCells_[levelIndex];

        Field<Type> allCf(localSize);
-        globalIndex::scatter(offsets, coarseComm, procIDs, cf, allCf);
+        globalIndex::scatter
+        (
+            offsets,
+            coarseComm,
+            procIDs,
+            cf,
+            allCf,
+            UPstream::msgType(),
+            Pstream::nonBlocking    //Pstream::scheduled
+        );

        forAll(fineToCoarse, i)
        {
--- a/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/manualGAMGProcAgglomeration/manualGAMGProcAgglomeration.C
+++ b/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/manualGAMGProcAgglomeration/manualGAMGProcAgglomeration.C
@ -109,7 +109,7 @@ bool Foam::manualGAMGProcAgglomeration::agglomerate()
                    // My processor id
                    const label myProcID = Pstream::myProcNo(levelMesh.comm());

-                    const List<clusterAndMaster>& clusters =
+                    const List<labelList>& clusters =
                        procAgglomMaps_[i].second();

                    // Coarse to fine master processor
@ -125,8 +125,8 @@ bool Foam::manualGAMGProcAgglomeration::agglomerate()

                    forAll(clusters, coarseI)
                    {
-                        const labelList& cluster = clusters[coarseI].first();
-                        coarseToMaster[coarseI] = clusters[coarseI].second();
+                        const labelList& cluster = clusters[coarseI];
+                        coarseToMaster[coarseI] = cluster[0];

                        forAll(cluster, i)
                        {
--- a/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/manualGAMGProcAgglomeration/manualGAMGProcAgglomeration.H
+++ b/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGProcAgglomerations/manualGAMGProcAgglomeration/manualGAMGProcAgglomeration.H
@ -30,19 +30,23 @@ Description
    In the GAMG control dictionary:

        processorAgglomerator manual;
+        // List of level+procagglomeration where
+        // procagglomeration is a set of labelLists. Each labelList is
+        // a cluster of processor which gets combined onto the first element
+        // in the list.
        processorAgglomeration
        (
            (
                3           //at level 3
                (
-                    ((0 1) 0)   //coarse 0 from 0,1 (and moved onto 0)
-                    ((2 3) 3)   //coarse 1 from 2,3 (and moved onto 3)
+                    (0 1)   //coarse 0 from 0,1 (and moved onto 0)
+                    (3 2)   //coarse 1 from 2,3 (and moved onto 3)
                )
            )
            (
                6           //at level6
                (
-                    ((0 1) 0)   //coarse 0 from 0,1 (and moved onto 0)
+                    (0 1)   //coarse 0 from 0,1 (and moved onto 0)
                )
            )
        );
@ -76,10 +80,8 @@ class manualGAMGProcAgglomeration
 {
    // Private data

-        typedef Tuple2<labelList, label> clusterAndMaster;
-
        //- Per level the agglomeration map
-        const List<Tuple2<label, List<clusterAndMaster> > > procAgglomMaps_;
+        const List<Tuple2<label, List<labelList> > > procAgglomMaps_;

        //- Any allocated communicators
        DynamicList<label> comms_;
--- a/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGSolverSolve.C
+++ b/src/OpenFOAM/matrices/lduMatrix/solvers/GAMG/GAMGSolverSolve.C
@ -114,13 +114,6 @@ Foam::solverPerformance Foam::GAMGSolver::solve
                cmpt
            );

-
-            //Pout<< "finestCorrection:" << finestCorrection << endl;
-            //Pout<< "finestResidual:" << finestResidual << endl;
-            //Pout<< "psi:" << psi << endl;
-            //Pout<< "Apsi:" << Apsi << endl;
-
-
            // Calculate finest level residual field
            matrix_.Amul(Apsi, psi, interfaceBouCoeffs_, interfaces_, cmpt);
            finestResidual = source;
@ -205,7 +198,6 @@ void Foam::GAMGSolver::Vcycle
                    scratch1,
                    coarseCorrFields[leveli].size()
                );
-                //scalarField ACf(coarseCorrFields[leveli].size(), VGREAT);

                // Scale coarse-grid correction field
                // but not on the coarsest level because it evaluates to 1
@ -218,8 +210,6 @@ void Foam::GAMGSolver::Vcycle
                        (
                            ACf.operator const scalarField&()
                        ),
-                        //ACf,
-
                        matrixLevels_[leveli],
                        interfaceLevelsBouCoeffs_[leveli],
                        interfaceLevels_[leveli],
@ -235,7 +225,6 @@ void Foam::GAMGSolver::Vcycle
                    (
                        ACf.operator const scalarField&()
                    ),
-                    //ACf,
                    coarseCorrFields[leveli],
                    interfaceLevelsBouCoeffs_[leveli],
                    interfaceLevels_[leveli],
@ -294,11 +283,6 @@ void Foam::GAMGSolver::Vcycle
                scratch2,
                coarseCorrFields[leveli].size()
            );
-            //scalarField preSmoothedCoarseCorrField
-            //(
-            //    coarseCorrFields[leveli].size(),
-            //    VGREAT
-            //);

            // Only store the preSmoothedCoarseCorrField if pre-smoothing is
            // used
@ -328,12 +312,6 @@ void Foam::GAMGSolver::Vcycle
            );
            scalarField& ACfRef =
                const_cast<scalarField&>(ACf.operator const scalarField&());
-            //scalarField ACfRef
-            //(
-            //    coarseCorrFields[leveli].size(),
-            //    VGREAT
-            //);
-

            if (interpolateCorrection_)
            {
--- a/src/OpenFOAM/meshes/polyMesh/globalMeshData/globalIndex.H
+++ b/src/OpenFOAM/meshes/polyMesh/globalMeshData/globalIndex.H
@ -160,7 +160,8 @@ public:
                const labelList& procIDs,
                const UList<Type>& fld,
                List<Type>& allFld,
-                const int tag = UPstream::msgType()
+                const int tag = UPstream::msgType(),
+                const Pstream::commsTypes commsType=Pstream::nonBlocking
            );

            //- Collect data in processor order on master (== procIDs[0]).
@ -172,10 +173,11 @@ public:
                const labelList& procIDs,
                const UList<Type>& fld,
                List<Type>& allFld,
-                const int tag = UPstream::msgType()
+                const int tag = UPstream::msgType(),
+                const Pstream::commsTypes commsType=Pstream::nonBlocking
            ) const
            {
-                gather(offsets_, comm, procIDs, fld, allFld, tag);
+                gather(offsets_, comm, procIDs, fld, allFld, tag, commsType);
            }

            //- Inplace collect data in processor order on master
@ -187,7 +189,8 @@ public:
                const label comm,
                const labelList& procIDs,
                List<Type>& fld,
-                const int tag = UPstream::msgType()
+                const int tag = UPstream::msgType(),
+                const Pstream::commsTypes commsType=Pstream::nonBlocking
            );

            //- Inplace collect data in processor order on master
@ -198,10 +201,11 @@ public:
                const label comm,
                const labelList& procIDs,
                List<Type>& fld,
-                const int tag = UPstream::msgType()
+                const int tag = UPstream::msgType(),
+                const Pstream::commsTypes commsType=Pstream::nonBlocking
            ) const
            {
-                gather(offsets_, comm, procIDs, fld, tag);
+                gather(offsets_, comm, procIDs, fld, tag, commsType);
            }

            //- Distribute data in processor order. Requires fld to be sized!
@ -213,7 +217,8 @@ public:
                const labelList& procIDs,
                const UList<Type>& allFld,
                UList<Type>& fld,
-                const int tag = UPstream::msgType()
+                const int tag = UPstream::msgType(),
+                const Pstream::commsTypes commsType=Pstream::nonBlocking
            );

            //- Distribute data in processor order. Requires fld to be sized!
@ -224,10 +229,11 @@ public:
                const labelList& procIDs,
                const UList<Type>& allFld,
                UList<Type>& fld,
-                const int tag = UPstream::msgType()
+                const int tag = UPstream::msgType(),
+                const Pstream::commsTypes commsType=Pstream::nonBlocking
            ) const
            {
-                scatter(offsets_, comm, procIDs, allFld, fld, tag);
+                scatter(offsets_, comm, procIDs, allFld, fld, tag, commsType);
            }


--- a/src/OpenFOAM/meshes/polyMesh/globalMeshData/globalIndexTemplates.C
+++ b/src/OpenFOAM/meshes/polyMesh/globalMeshData/globalIndexTemplates.C
@ -30,30 +30,75 @@ License
 template<class Type>
 void Foam::globalIndex::gather
 (
-    const labelUList& offsets,
+    const labelUList& off,
    const label comm,
    const labelList& procIDs,
    const UList<Type>& fld,
    List<Type>& allFld,
-    const int tag
+    const int tag,
+    const Pstream::commsTypes commsType
 )
 {
    if (Pstream::myProcNo(comm) == procIDs[0])
    {
-        allFld.setSize(offsets.last());
+        allFld.setSize(off.last());

        // Assign my local data
        SubList<Type>(allFld, fld.size(), 0).assign(fld);

-        for (label i = 1; i < procIDs.size(); i++)
+        if (commsType == Pstream::scheduled || commsType == Pstream::blocking)
        {
-            SubList<Type> procSlot(allFld, offsets[i+1]-offsets[i], offsets[i]);
-
-            if (contiguous<Type>())
+            for (label i = 1; i < procIDs.size(); i++)
            {
+                SubList<Type> procSlot(allFld, off[i+1]-off[i], off[i]);
+
+                if (contiguous<Type>())
+                {
+                    IPstream::read
+                    (
+                        commsType,
+                        procIDs[i],
+                        reinterpret_cast<char*>(procSlot.begin()),
+                        procSlot.byteSize(),
+                        tag,
+                        comm
+                    );
+                }
+                else
+                {
+                    IPstream fromSlave
+                    (
+                        commsType,
+                        procIDs[i],
+                        0,
+                        tag,
+                        comm
+                    );
+                    fromSlave >> procSlot;
+                }
+            }
+        }
+        else
+        {
+            // nonBlocking
+
+            if (!contiguous<Type>())
+            {
+                FatalErrorIn("globalIndex::gather(..)")
+                    << "nonBlocking not supported for non-contiguous data"
+                    << exit(FatalError);
+            }
+
+            label startOfRequests = Pstream::nRequests();
+
+            // Set up reads
+            for (label i = 1; i < procIDs.size(); i++)
+            {
+                SubList<Type> procSlot(allFld, off[i+1]-off[i], off[i]);
+
                IPstream::read
                (
-                    Pstream::scheduled,
+                    commsType,
                    procIDs[i],
                    reinterpret_cast<char*>(procSlot.begin()),
                    procSlot.byteSize(),
@ -61,45 +106,66 @@ void Foam::globalIndex::gather
                    comm
                );
            }
-            else
-            {
-                IPstream fromSlave
-                (
-                    Pstream::scheduled,
-                    procIDs[i],
-                    0,
-                    tag,
-                    comm
-                );
-                fromSlave >> procSlot;
-            }
+
+            // Wait for all to finish
+            Pstream::waitRequests(startOfRequests);
        }
    }
    else
    {
-        if (contiguous<Type>())
+        if (commsType == Pstream::scheduled || commsType == Pstream::blocking)
        {
+            if (contiguous<Type>())
+            {
+                OPstream::write
+                (
+                    commsType,
+                    procIDs[0],
+                    reinterpret_cast<const char*>(fld.begin()),
+                    fld.byteSize(),
+                    tag,
+                    comm
+                );
+            }
+            else
+            {
+                OPstream toMaster
+                (
+                    commsType,
+                    procIDs[0],
+                    0,
+                    tag,
+                    comm
+                );
+                toMaster << fld;
+            }
+        }
+        else
+        {
+            // nonBlocking
+
+            if (!contiguous<Type>())
+            {
+                FatalErrorIn("globalIndex::gather(..)")
+                    << "nonBlocking not supported for non-contiguous data"
+                    << exit(FatalError);
+            }
+
+            label startOfRequests = Pstream::nRequests();
+
+            // Set up write
            OPstream::write
            (
-                Pstream::scheduled,
+                commsType,
                procIDs[0],
                reinterpret_cast<const char*>(fld.begin()),
                fld.byteSize(),
                tag,
                comm
            );
-        }
-        else
-        {
-            OPstream toMaster
-            (
-                Pstream::scheduled,
-                procIDs[0],
-                0,
-                tag,
-                comm
-            );
-            toMaster << fld;
+
+            // Wait for all to finish
+            Pstream::waitRequests(startOfRequests);
        }
    }
 }
@ -108,111 +174,104 @@ void Foam::globalIndex::gather
 template<class Type>
 void Foam::globalIndex::gather
 (
-    const labelUList& offsets,
+    const labelUList& off,
    const label comm,
    const labelList& procIDs,
    List<Type>& fld,
-    const int tag
+    const int tag,
+    const Pstream::commsTypes commsType
 )
 {
+    List<Type> allFld;
+
+    gather(off, comm, procIDs, fld, allFld, tag, commsType);
+
    if (Pstream::myProcNo(comm) == procIDs[0])
    {
-        List<Type> allFld(offsets.last());
-
-        // Assign my local data
-        SubList<Type>(allFld, fld.size(), 0).assign(fld);
-
-        for (label i = 1; i < procIDs.size(); i++)
-        {
-            SubList<Type> procSlot(allFld, offsets[i+1]-offsets[i], offsets[i]);
-
-            if (contiguous<Type>())
-            {
-                IPstream::read
-                (
-                    Pstream::scheduled,
-                    procIDs[i],
-                    reinterpret_cast<char*>(procSlot.begin()),
-                    procSlot.byteSize(),
-                    tag,
-                    comm
-                );
-            }
-            else
-            {
-                IPstream fromSlave
-                (
-                    Pstream::scheduled,
-                    procIDs[i],
-                    0,
-                    tag,
-                    comm
-                );
-                fromSlave >> procSlot;
-            }
-        }
-
        fld.transfer(allFld);
    }
-    else
-    {
-        if (contiguous<Type>())
-        {
-            OPstream::write
-            (
-                Pstream::scheduled,
-                procIDs[0],
-                reinterpret_cast<const char*>(fld.begin()),
-                fld.byteSize(),
-                tag,
-                comm
-            );
-        }
-        else
-        {
-            OPstream toMaster
-            (
-                Pstream::scheduled,
-                procIDs[0],
-                0,
-                tag,
-                comm
-            );
-            toMaster << fld;
-        }
-    }
 }


 template<class Type>
 void Foam::globalIndex::scatter
 (
-    const labelUList& offsets,
+    const labelUList& off,
    const label comm,
    const labelList& procIDs,
    const UList<Type>& allFld,
    UList<Type>& fld,
-    const int tag
+    const int tag,
+    const Pstream::commsTypes commsType
 )
 {
    if (Pstream::myProcNo(comm) == procIDs[0])
    {
-        fld.assign(SubList<Type>(allFld, offsets[1]-offsets[0]));
+        fld.assign(SubList<Type>(allFld, off[1]-off[0]));

-        for (label i = 1; i < procIDs.size(); i++)
+        if (commsType == Pstream::scheduled || commsType == Pstream::blocking)
        {
-            const SubList<Type> procSlot
-            (
-                allFld,
-                offsets[i+1]-offsets[i],
-                offsets[i]
-            );
-
-            if (contiguous<Type>())
+            for (label i = 1; i < procIDs.size(); i++)
            {
+                const SubList<Type> procSlot
+                (
+                    allFld,
+                    off[i+1]-off[i],
+                    off[i]
+                );
+
+                if (contiguous<Type>())
+                {
+                    OPstream::write
+                    (
+                        commsType,
+                        procIDs[i],
+                        reinterpret_cast<const char*>(procSlot.begin()),
+                        procSlot.byteSize(),
+                        tag,
+                        comm
+                    );
+                }
+                else
+                {
+                    OPstream toSlave
+                    (
+                        commsType,
+                        procIDs[i],
+                        0,
+                        tag,
+                        comm
+                    );
+                    toSlave << procSlot;
+                }
+            }
+        }
+        else
+        {
+            // nonBlocking
+
+            if (!contiguous<Type>())
+            {
+                FatalErrorIn("globalIndex::scatter(..)")
+                    << "nonBlocking not supported for non-contiguous data"
+                    << exit(FatalError);
+            }
+
+            label startOfRequests = Pstream::nRequests();
+
+            // Set up writes
+            for (label i = 1; i < procIDs.size(); i++)
+            {
+                const SubList<Type> procSlot
+                (
+                    allFld,
+                    off[i+1]-off[i],
+                    off[i]
+                );
+
                OPstream::write
                (
-                    Pstream::scheduled,
+                    commsType,
                    procIDs[i],
                    reinterpret_cast<const char*>(procSlot.begin()),
                    procSlot.byteSize(),
@ -220,45 +279,66 @@ void Foam::globalIndex::scatter
                    comm
                );
            }
-            else
-            {
-                OPstream toSlave
-                (
-                    Pstream::scheduled,
-                    procIDs[i],
-                    0,
-                    tag,
-                    comm
-                );
-                toSlave << procSlot;
-            }
+
+            // Wait for all to finish
+            Pstream::waitRequests(startOfRequests);
        }
    }
    else
    {
-        if (contiguous<Type>())
+        if (commsType == Pstream::scheduled || commsType == Pstream::blocking)
        {
+            if (contiguous<Type>())
+            {
+                IPstream::read
+                (
+                    commsType,
+                    procIDs[0],
+                    reinterpret_cast<char*>(fld.begin()),
+                    fld.byteSize(),
+                    tag,
+                    comm
+                );
+            }
+            else
+            {
+                IPstream fromMaster
+                (
+                    commsType,
+                    procIDs[0],
+                    0,
+                    tag,
+                    comm
+                );
+                fromMaster >> fld;
+            }
+        }
+        else
+        {
+            // nonBlocking
+
+            if (!contiguous<Type>())
+            {
+                FatalErrorIn("globalIndex::scatter(..)")
+                    << "nonBlocking not supported for non-contiguous data"
+                    << exit(FatalError);
+            }
+
+            label startOfRequests = Pstream::nRequests();
+
+            // Set up read
            IPstream::read
            (
-                Pstream::scheduled,
+                commsType,
                procIDs[0],
                reinterpret_cast<char*>(fld.begin()),
                fld.byteSize(),
                tag,
                comm
            );
-        }
-        else
-        {
-            IPstream fromMaster
-            (
-                Pstream::scheduled,
-                procIDs[0],
-                0,
-                tag,
-                comm
-            );
-            fromMaster >> fld;
+
+            // Wait for all to finish
+            Pstream::waitRequests(startOfRequests);
        }
    }
 }