ENH: GAMGProcAgglomeration: use non-blocking transfers

This commit is contained in:
mattijs
2013-05-10 13:13:33 +01:00
parent ee145db19b
commit 2f1b6d9e96
7 changed files with 266 additions and 180 deletions

View File

@ -488,7 +488,10 @@ void Foam::GAMGAgglomeration::procAgglomerateRestrictAddressing
comm,
procIDs,
restrictAddressing_[levelIndex],
procRestrictAddressing
procRestrictAddressing,
UPstream::msgType(),
Pstream::nonBlocking //Pstream::scheduled
);

View File

@ -126,7 +126,15 @@ void Foam::GAMGAgglomeration::restrictField
const List<int>& procIDs = agglomProcIDs(coarseLevelIndex);
const labelList& offsets = cellOffsets(coarseLevelIndex);
globalIndex::gather(offsets, fineComm, procIDs, cf);
globalIndex::gather
(
offsets,
fineComm,
procIDs,
cf,
UPstream::msgType(),
Pstream::nonBlocking //Pstream::scheduled
);
}
}
@ -194,7 +202,16 @@ void Foam::GAMGAgglomeration::prolongField
label localSize = nCells_[levelIndex];
Field<Type> allCf(localSize);
globalIndex::scatter(offsets, coarseComm, procIDs, cf, allCf);
globalIndex::scatter
(
offsets,
coarseComm,
procIDs,
cf,
allCf,
UPstream::msgType(),
Pstream::nonBlocking //Pstream::scheduled
);
forAll(fineToCoarse, i)
{

View File

@ -109,7 +109,7 @@ bool Foam::manualGAMGProcAgglomeration::agglomerate()
// My processor id
const label myProcID = Pstream::myProcNo(levelMesh.comm());
const List<clusterAndMaster>& clusters =
const List<labelList>& clusters =
procAgglomMaps_[i].second();
// Coarse to fine master processor
@ -125,8 +125,8 @@ bool Foam::manualGAMGProcAgglomeration::agglomerate()
forAll(clusters, coarseI)
{
const labelList& cluster = clusters[coarseI].first();
coarseToMaster[coarseI] = clusters[coarseI].second();
const labelList& cluster = clusters[coarseI];
coarseToMaster[coarseI] = cluster[0];
forAll(cluster, i)
{

View File

@ -30,19 +30,23 @@ Description
In the GAMG control dictionary:
processorAgglomerator manual;
// List of level+procagglomeration where
// procagglomeration is a set of labelLists. Each labelList is
// a cluster of processor which gets combined onto the first element
// in the list.
processorAgglomeration
(
(
3 //at level 3
(
((0 1) 0) //coarse 0 from 0,1 (and moved onto 0)
((2 3) 3) //coarse 1 from 2,3 (and moved onto 3)
(0 1) //coarse 0 from 0,1 (and moved onto 0)
(3 2) //coarse 1 from 2,3 (and moved onto 3)
)
)
(
6 //at level6
(
((0 1) 0) //coarse 0 from 0,1 (and moved onto 0)
(0 1) //coarse 0 from 0,1 (and moved onto 0)
)
)
);
@ -76,10 +80,8 @@ class manualGAMGProcAgglomeration
{
// Private data
typedef Tuple2<labelList, label> clusterAndMaster;
//- Per level the agglomeration map
const List<Tuple2<label, List<clusterAndMaster> > > procAgglomMaps_;
const List<Tuple2<label, List<labelList> > > procAgglomMaps_;
//- Any allocated communicators
DynamicList<label> comms_;

View File

@ -114,13 +114,6 @@ Foam::solverPerformance Foam::GAMGSolver::solve
cmpt
);
//Pout<< "finestCorrection:" << finestCorrection << endl;
//Pout<< "finestResidual:" << finestResidual << endl;
//Pout<< "psi:" << psi << endl;
//Pout<< "Apsi:" << Apsi << endl;
// Calculate finest level residual field
matrix_.Amul(Apsi, psi, interfaceBouCoeffs_, interfaces_, cmpt);
finestResidual = source;
@ -205,7 +198,6 @@ void Foam::GAMGSolver::Vcycle
scratch1,
coarseCorrFields[leveli].size()
);
//scalarField ACf(coarseCorrFields[leveli].size(), VGREAT);
// Scale coarse-grid correction field
// but not on the coarsest level because it evaluates to 1
@ -218,8 +210,6 @@ void Foam::GAMGSolver::Vcycle
(
ACf.operator const scalarField&()
),
//ACf,
matrixLevels_[leveli],
interfaceLevelsBouCoeffs_[leveli],
interfaceLevels_[leveli],
@ -235,7 +225,6 @@ void Foam::GAMGSolver::Vcycle
(
ACf.operator const scalarField&()
),
//ACf,
coarseCorrFields[leveli],
interfaceLevelsBouCoeffs_[leveli],
interfaceLevels_[leveli],
@ -294,11 +283,6 @@ void Foam::GAMGSolver::Vcycle
scratch2,
coarseCorrFields[leveli].size()
);
//scalarField preSmoothedCoarseCorrField
//(
// coarseCorrFields[leveli].size(),
// VGREAT
//);
// Only store the preSmoothedCoarseCorrField if pre-smoothing is
// used
@ -328,12 +312,6 @@ void Foam::GAMGSolver::Vcycle
);
scalarField& ACfRef =
const_cast<scalarField&>(ACf.operator const scalarField&());
//scalarField ACfRef
//(
// coarseCorrFields[leveli].size(),
// VGREAT
//);
if (interpolateCorrection_)
{

View File

@ -160,7 +160,8 @@ public:
const labelList& procIDs,
const UList<Type>& fld,
List<Type>& allFld,
const int tag = UPstream::msgType()
const int tag = UPstream::msgType(),
const Pstream::commsTypes commsType=Pstream::nonBlocking
);
//- Collect data in processor order on master (== procIDs[0]).
@ -172,10 +173,11 @@ public:
const labelList& procIDs,
const UList<Type>& fld,
List<Type>& allFld,
const int tag = UPstream::msgType()
const int tag = UPstream::msgType(),
const Pstream::commsTypes commsType=Pstream::nonBlocking
) const
{
gather(offsets_, comm, procIDs, fld, allFld, tag);
gather(offsets_, comm, procIDs, fld, allFld, tag, commsType);
}
//- Inplace collect data in processor order on master
@ -187,7 +189,8 @@ public:
const label comm,
const labelList& procIDs,
List<Type>& fld,
const int tag = UPstream::msgType()
const int tag = UPstream::msgType(),
const Pstream::commsTypes commsType=Pstream::nonBlocking
);
//- Inplace collect data in processor order on master
@ -198,10 +201,11 @@ public:
const label comm,
const labelList& procIDs,
List<Type>& fld,
const int tag = UPstream::msgType()
const int tag = UPstream::msgType(),
const Pstream::commsTypes commsType=Pstream::nonBlocking
) const
{
gather(offsets_, comm, procIDs, fld, tag);
gather(offsets_, comm, procIDs, fld, tag, commsType);
}
//- Distribute data in processor order. Requires fld to be sized!
@ -213,7 +217,8 @@ public:
const labelList& procIDs,
const UList<Type>& allFld,
UList<Type>& fld,
const int tag = UPstream::msgType()
const int tag = UPstream::msgType(),
const Pstream::commsTypes commsType=Pstream::nonBlocking
);
//- Distribute data in processor order. Requires fld to be sized!
@ -224,10 +229,11 @@ public:
const labelList& procIDs,
const UList<Type>& allFld,
UList<Type>& fld,
const int tag = UPstream::msgType()
const int tag = UPstream::msgType(),
const Pstream::commsTypes commsType=Pstream::nonBlocking
) const
{
scatter(offsets_, comm, procIDs, allFld, fld, tag);
scatter(offsets_, comm, procIDs, allFld, fld, tag, commsType);
}

View File

@ -30,30 +30,75 @@ License
template<class Type>
void Foam::globalIndex::gather
(
const labelUList& offsets,
const labelUList& off,
const label comm,
const labelList& procIDs,
const UList<Type>& fld,
List<Type>& allFld,
const int tag
const int tag,
const Pstream::commsTypes commsType
)
{
if (Pstream::myProcNo(comm) == procIDs[0])
{
allFld.setSize(offsets.last());
allFld.setSize(off.last());
// Assign my local data
SubList<Type>(allFld, fld.size(), 0).assign(fld);
for (label i = 1; i < procIDs.size(); i++)
if (commsType == Pstream::scheduled || commsType == Pstream::blocking)
{
SubList<Type> procSlot(allFld, offsets[i+1]-offsets[i], offsets[i]);
if (contiguous<Type>())
for (label i = 1; i < procIDs.size(); i++)
{
SubList<Type> procSlot(allFld, off[i+1]-off[i], off[i]);
if (contiguous<Type>())
{
IPstream::read
(
commsType,
procIDs[i],
reinterpret_cast<char*>(procSlot.begin()),
procSlot.byteSize(),
tag,
comm
);
}
else
{
IPstream fromSlave
(
commsType,
procIDs[i],
0,
tag,
comm
);
fromSlave >> procSlot;
}
}
}
else
{
// nonBlocking
if (!contiguous<Type>())
{
FatalErrorIn("globalIndex::gather(..)")
<< "nonBlocking not supported for non-contiguous data"
<< exit(FatalError);
}
label startOfRequests = Pstream::nRequests();
// Set up reads
for (label i = 1; i < procIDs.size(); i++)
{
SubList<Type> procSlot(allFld, off[i+1]-off[i], off[i]);
IPstream::read
(
Pstream::scheduled,
commsType,
procIDs[i],
reinterpret_cast<char*>(procSlot.begin()),
procSlot.byteSize(),
@ -61,45 +106,66 @@ void Foam::globalIndex::gather
comm
);
}
else
{
IPstream fromSlave
(
Pstream::scheduled,
procIDs[i],
0,
tag,
comm
);
fromSlave >> procSlot;
}
// Wait for all to finish
Pstream::waitRequests(startOfRequests);
}
}
else
{
if (contiguous<Type>())
if (commsType == Pstream::scheduled || commsType == Pstream::blocking)
{
if (contiguous<Type>())
{
OPstream::write
(
commsType,
procIDs[0],
reinterpret_cast<const char*>(fld.begin()),
fld.byteSize(),
tag,
comm
);
}
else
{
OPstream toMaster
(
commsType,
procIDs[0],
0,
tag,
comm
);
toMaster << fld;
}
}
else
{
// nonBlocking
if (!contiguous<Type>())
{
FatalErrorIn("globalIndex::gather(..)")
<< "nonBlocking not supported for non-contiguous data"
<< exit(FatalError);
}
label startOfRequests = Pstream::nRequests();
// Set up write
OPstream::write
(
Pstream::scheduled,
commsType,
procIDs[0],
reinterpret_cast<const char*>(fld.begin()),
fld.byteSize(),
tag,
comm
);
}
else
{
OPstream toMaster
(
Pstream::scheduled,
procIDs[0],
0,
tag,
comm
);
toMaster << fld;
// Wait for all to finish
Pstream::waitRequests(startOfRequests);
}
}
}
@ -108,111 +174,104 @@ void Foam::globalIndex::gather
template<class Type>
void Foam::globalIndex::gather
(
const labelUList& offsets,
const labelUList& off,
const label comm,
const labelList& procIDs,
List<Type>& fld,
const int tag
const int tag,
const Pstream::commsTypes commsType
)
{
List<Type> allFld;
gather(off, comm, procIDs, fld, allFld, tag, commsType);
if (Pstream::myProcNo(comm) == procIDs[0])
{
List<Type> allFld(offsets.last());
// Assign my local data
SubList<Type>(allFld, fld.size(), 0).assign(fld);
for (label i = 1; i < procIDs.size(); i++)
{
SubList<Type> procSlot(allFld, offsets[i+1]-offsets[i], offsets[i]);
if (contiguous<Type>())
{
IPstream::read
(
Pstream::scheduled,
procIDs[i],
reinterpret_cast<char*>(procSlot.begin()),
procSlot.byteSize(),
tag,
comm
);
}
else
{
IPstream fromSlave
(
Pstream::scheduled,
procIDs[i],
0,
tag,
comm
);
fromSlave >> procSlot;
}
}
fld.transfer(allFld);
}
else
{
if (contiguous<Type>())
{
OPstream::write
(
Pstream::scheduled,
procIDs[0],
reinterpret_cast<const char*>(fld.begin()),
fld.byteSize(),
tag,
comm
);
}
else
{
OPstream toMaster
(
Pstream::scheduled,
procIDs[0],
0,
tag,
comm
);
toMaster << fld;
}
}
}
template<class Type>
void Foam::globalIndex::scatter
(
const labelUList& offsets,
const labelUList& off,
const label comm,
const labelList& procIDs,
const UList<Type>& allFld,
UList<Type>& fld,
const int tag
const int tag,
const Pstream::commsTypes commsType
)
{
if (Pstream::myProcNo(comm) == procIDs[0])
{
fld.assign(SubList<Type>(allFld, offsets[1]-offsets[0]));
fld.assign(SubList<Type>(allFld, off[1]-off[0]));
for (label i = 1; i < procIDs.size(); i++)
if (commsType == Pstream::scheduled || commsType == Pstream::blocking)
{
const SubList<Type> procSlot
(
allFld,
offsets[i+1]-offsets[i],
offsets[i]
);
if (contiguous<Type>())
for (label i = 1; i < procIDs.size(); i++)
{
const SubList<Type> procSlot
(
allFld,
off[i+1]-off[i],
off[i]
);
if (contiguous<Type>())
{
OPstream::write
(
commsType,
procIDs[i],
reinterpret_cast<const char*>(procSlot.begin()),
procSlot.byteSize(),
tag,
comm
);
}
else
{
OPstream toSlave
(
commsType,
procIDs[i],
0,
tag,
comm
);
toSlave << procSlot;
}
}
}
else
{
// nonBlocking
if (!contiguous<Type>())
{
FatalErrorIn("globalIndex::scatter(..)")
<< "nonBlocking not supported for non-contiguous data"
<< exit(FatalError);
}
label startOfRequests = Pstream::nRequests();
// Set up writes
for (label i = 1; i < procIDs.size(); i++)
{
const SubList<Type> procSlot
(
allFld,
off[i+1]-off[i],
off[i]
);
OPstream::write
(
Pstream::scheduled,
commsType,
procIDs[i],
reinterpret_cast<const char*>(procSlot.begin()),
procSlot.byteSize(),
@ -220,45 +279,66 @@ void Foam::globalIndex::scatter
comm
);
}
else
{
OPstream toSlave
(
Pstream::scheduled,
procIDs[i],
0,
tag,
comm
);
toSlave << procSlot;
}
// Wait for all to finish
Pstream::waitRequests(startOfRequests);
}
}
else
{
if (contiguous<Type>())
if (commsType == Pstream::scheduled || commsType == Pstream::blocking)
{
if (contiguous<Type>())
{
IPstream::read
(
commsType,
procIDs[0],
reinterpret_cast<char*>(fld.begin()),
fld.byteSize(),
tag,
comm
);
}
else
{
IPstream fromMaster
(
commsType,
procIDs[0],
0,
tag,
comm
);
fromMaster >> fld;
}
}
else
{
// nonBlocking
if (!contiguous<Type>())
{
FatalErrorIn("globalIndex::scatter(..)")
<< "nonBlocking not supported for non-contiguous data"
<< exit(FatalError);
}
label startOfRequests = Pstream::nRequests();
// Set up read
IPstream::read
(
Pstream::scheduled,
commsType,
procIDs[0],
reinterpret_cast<char*>(fld.begin()),
fld.byteSize(),
tag,
comm
);
}
else
{
IPstream fromMaster
(
Pstream::scheduled,
procIDs[0],
0,
tag,
comm
);
fromMaster >> fld;
// Wait for all to finish
Pstream::waitRequests(startOfRequests);
}
}
}