ENH: globalIndex gather/scatter scheduled (not nonBlocking) for non-contiguous

- reduces later surprises and simplifies effort for the caller

- more flexible globalIndex scatter with auto-sized return field.

- Avoid communication for scattering into zero-sized fields.
This commit is contained in:
Mark Olesen
2022-02-22 19:59:13 +01:00
committed by Andrew Heather
parent ff4998d364
commit c178fe8ec1
4 changed files with 162 additions and 125 deletions

View File

@ -34,16 +34,16 @@ License
void Foam::globalIndex::reportOverflowAndExit
(
const label idx,
const labelUList& localSizes
const labelUList& localLens
)
{
FatalErrorInFunction
<< "Overflow : sum of sizes exceeds labelMax ("
<< labelMax << ") after index " << idx;
if (!localSizes.empty())
if (!localLens.empty())
{
FatalError << " of " << flatOutput(localSizes);
FatalError << " of " << flatOutput(localLens);
}
FatalError
@ -56,13 +56,13 @@ void Foam::globalIndex::reportOverflowAndExit
Foam::labelList
Foam::globalIndex::calcOffsets
(
const labelUList& localSizes,
const labelUList& localLens,
const bool checkOverflow
)
{
labelList values;
const label len = localSizes.size();
const label len = localLens.size();
if (len)
{
@ -72,11 +72,11 @@ Foam::globalIndex::calcOffsets
for (label i = 0; i < len; ++i)
{
values[i] = start;
start += localSizes[i];
start += localLens[i];
if (checkOverflow && start < values[i])
{
reportOverflowAndExit(i, localSizes);
reportOverflowAndExit(i, localLens);
}
}
values[len] = start;
@ -89,13 +89,13 @@ Foam::globalIndex::calcOffsets
Foam::List<Foam::labelRange>
Foam::globalIndex::calcRanges
(
const labelUList& localSizes,
const labelUList& localLens,
const bool checkOverflow
)
{
List<labelRange> values;
const label len = localSizes.size();
const label len = localLens.size();
if (len)
{
@ -104,12 +104,12 @@ Foam::globalIndex::calcRanges
label start = 0;
for (label i = 0; i < len; ++i)
{
values[i].reset(start, localSizes[i]);
start += localSizes[i];
values[i].reset(start, localLens[i]);
start += localLens[i];
if (checkOverflow && start < values[i].start())
{
reportOverflowAndExit(i, localSizes);
reportOverflowAndExit(i, localLens);
}
}
}
@ -221,16 +221,16 @@ void Foam::globalIndex::reset
// Seed with localSize, zero elsewhere (for non-parallel branch)
// NB: can consider UPstream::listGatherValues
labelList localSizes(len, Zero);
localSizes[Pstream::myProcNo(comm)] = localSize;
labelList localLens(len, Zero);
localLens[Pstream::myProcNo(comm)] = localSize;
if (parallel)
{
Pstream::gatherList(localSizes, tag, comm);
Pstream::scatterList(localSizes, tag, comm);
Pstream::gatherList(localLens, tag, comm);
Pstream::scatterList(localLens, tag, comm);
}
reset(localSizes, true); // checkOverflow = true
reset(localLens, true); // checkOverflow = true
}
else
{
@ -242,11 +242,11 @@ void Foam::globalIndex::reset
void Foam::globalIndex::reset
(
const labelUList& localSizes,
const labelUList& localLens,
const bool checkOverflow
)
{
const label len = localSizes.size();
const label len = localLens.size();
if (len)
{
@ -256,11 +256,11 @@ void Foam::globalIndex::reset
for (label i = 0; i < len; ++i)
{
offsets_[i] = start;
start += localSizes[i];
start += localLens[i];
if (checkOverflow && start < offsets_[i])
{
reportOverflowAndExit(i, localSizes);
reportOverflowAndExit(i, localLens);
}
}
offsets_[len] = start;
@ -290,7 +290,7 @@ void Foam::globalIndex::setLocalSize(const label proci, const label len)
}
Foam::labelList Foam::globalIndex::sizes() const
Foam::labelList Foam::globalIndex::localSizes() const
{
labelList values;

View File

@ -89,7 +89,7 @@ class globalIndex
static void reportOverflowAndExit
(
const label idx,
const labelUList& localSizes = labelUList::null()
const labelUList& localLens = labelUList::null()
);
public:
@ -178,12 +178,15 @@ public:
//- Global sum of localSizes.
inline label totalSize() const;
//- The local sizes
labelList sizes() const;
//- The local sizes. Same as localSizes()
inline labelList sizes() const;
//- The local starts
inline const labelUList localStarts() const;
//- The local sizes
labelList localSizes() const;
//- Global max of localSizes
inline label maxSize() const;
@ -276,7 +279,7 @@ public:
inline labelList toGlobal(const labelUList& labels) const;
//- From local to global index (inplace)
inline void inplaceToGlobal(labelList& labels) const;
inline void inplaceToGlobal(labelUList& labels) const;
//- From global to local on current processor.
// FatalError if not on local processor.
@ -314,7 +317,7 @@ public:
inline void inplaceToGlobal
(
const label proci,
labelList& labels
labelUList& labels
) const;
@ -405,7 +408,7 @@ public:
//- with optional check for label overflow
static labelList calcOffsets
(
const labelUList& localSizes,
const labelUList& localLens,
const bool checkOverflow = false
);
@ -422,7 +425,7 @@ public:
//- with optional check for label overflow
static List<labelRange> calcRanges
(
const labelUList& localSizes,
const labelUList& localLens,
const bool checkOverflow = false
);
@ -774,8 +777,21 @@ public:
template<class Type>
void scatter
(
const UList<Type>& allFld,
UList<Type>& fld,
const UList<Type>& allData,
UList<Type>& localData,
const int tag = UPstream::msgType(),
const Pstream::commsTypes = Pstream::commsTypes::nonBlocking,
const label comm = UPstream::worldComm //!< communicator
) const;
//- Distribute data in processor order
//- (in serial: performs a simple copy).
// Communication with default/specified communicator, message tag.
// \note the globalIndex offsets needed on master only.
template<class Type, class OutputContainer = List<Type>>
OutputContainer scatter
(
const UList<Type>& allData,
const int tag = UPstream::msgType(),
const Pstream::commsTypes = Pstream::commsTypes::nonBlocking,
const label comm = UPstream::worldComm //!< communicator

View File

@ -142,6 +142,12 @@ inline Foam::label Foam::globalIndex::size() const
}
inline Foam::labelList Foam::globalIndex::sizes() const
{
return localSizes();
}
inline Foam::label Foam::globalIndex::nProcs() const noexcept
{
const label len = (offsets_.size() - 1);
@ -289,7 +295,7 @@ inline Foam::labelList Foam::globalIndex::toGlobal
inline void Foam::globalIndex::inplaceToGlobal
(
const label proci,
labelList& labels
labelUList& labels
) const
{
const label off = offsets_[proci];
@ -301,7 +307,7 @@ inline void Foam::globalIndex::inplaceToGlobal
}
inline void Foam::globalIndex::inplaceToGlobal(labelList& labels) const
inline void Foam::globalIndex::inplaceToGlobal(labelUList& labels) const
{
inplaceToGlobal(Pstream::myProcNo(), labels);
}

View File

@ -73,22 +73,22 @@ void Foam::globalIndex::gather
const UList<Type>& fld,
List<Type>& allFld,
const int tag,
const Pstream::commsTypes commsType
const Pstream::commsTypes preferredCommsType
)
{
// low-level: no parRun guard
if
// Automatically change from nonBlocking to scheduled for
// non-contiguous data.
const UPstream::commsTypes commsType =
(
!is_contiguous<Type>::value
&& commsType == Pstream::commsTypes::nonBlocking
)
{
FatalErrorInFunction
<< "Cannot use nonBlocking with non-contiguous data"
<< exit(FatalError);
// Could also warn and change to scheduled etc...
}
(
!is_contiguous<Type>::value
&& UPstream::commsTypes::nonBlocking == preferredCommsType
)
? UPstream::commsTypes::scheduled
: preferredCommsType
);
const label startOfRequests = UPstream::nRequests();
@ -126,14 +126,7 @@ void Foam::globalIndex::gather
}
else
{
IPstream fromProc
(
commsType,
procIDs[i],
0,
tag,
comm
);
IPstream fromProc(commsType, procIDs[i], 0, tag, comm);
fromProc >> procSlot;
}
}
@ -158,14 +151,7 @@ void Foam::globalIndex::gather
}
else
{
OPstream toMaster
(
commsType,
procIDs[0],
0,
tag,
comm
);
OPstream toMaster(commsType, procIDs[0], 0, tag, comm);
toMaster << fld;
}
}
@ -187,18 +173,22 @@ void Foam::globalIndex::gather
const IndirectListBase<Type, Addr>& fld,
List<Type>& allFld,
const int tag,
const Pstream::commsTypes commsType
const Pstream::commsTypes preferredCommsType
)
{
// low-level: no parRun guard
if (commsType == Pstream::commsTypes::nonBlocking)
{
WarningInFunction
<< "Cannot use nonBlocking with indirect list of data"
<< exit(FatalError);
// Could also warn and change to scheduled etc...
}
// Automatically change from nonBlocking to scheduled for
// non-contiguous data.
const UPstream::commsTypes commsType =
(
(
!is_contiguous<Type>::value
&& UPstream::commsTypes::nonBlocking == preferredCommsType
)
? UPstream::commsTypes::scheduled
: preferredCommsType
);
if (Pstream::myProcNo(comm) == procIDs[0])
{
@ -224,14 +214,7 @@ void Foam::globalIndex::gather
}
else
{
IPstream fromProc
(
commsType,
procIDs[i],
0,
tag,
comm
);
IPstream fromProc(commsType, procIDs[i], 0, tag, comm);
fromProc >> procSlot;
}
}
@ -244,14 +227,7 @@ void Foam::globalIndex::gather
}
else
{
OPstream toMaster
(
commsType,
procIDs[0],
0,
tag,
comm
);
OPstream toMaster(commsType, procIDs[0], 0, tag, comm);
toMaster << fld;
}
}
@ -692,20 +668,22 @@ void Foam::globalIndex::scatter
const UList<Type>& allFld,
UList<Type>& fld,
const int tag,
const Pstream::commsTypes commsType
const Pstream::commsTypes preferredCommsType
)
{
if
// low-level: no parRun guard
// Automatically change from nonBlocking to scheduled for
// non-contiguous data.
const UPstream::commsTypes commsType =
(
!is_contiguous<Type>::value
&& commsType == Pstream::commsTypes::nonBlocking
)
{
FatalErrorInFunction
<< "Cannot use nonBlocking with non-contiguous data"
<< exit(FatalError);
// Could also warn and change to scheduled etc...
}
(
!is_contiguous<Type>::value
&& UPstream::commsTypes::nonBlocking == preferredCommsType
)
? UPstream::commsTypes::scheduled
: preferredCommsType
);
// FUTURE:
// could decide which procs will receive data and use mpiScatter
@ -715,18 +693,15 @@ void Foam::globalIndex::scatter
if (Pstream::myProcNo(comm) == procIDs[0])
{
const SubList<Type> localSlot(allFld, off[1]-off[0], off[0]);
if (!localSlot.empty())
{
fld.deepCopy(localSlot);
}
for (label i = 1; i < procIDs.size(); ++i)
{
const SubList<Type> procSlot(allFld, off[i+1]-off[i], off[i]);
if (is_contiguous<Type>::value)
if (procSlot.empty())
{
// Nothing to do
}
else if (is_contiguous<Type>::value)
{
OPstream::write
(
@ -740,21 +715,30 @@ void Foam::globalIndex::scatter
}
else
{
OPstream toProc
(
commsType,
procIDs[i],
0,
tag,
comm
);
OPstream toProc(commsType, procIDs[i], 0, tag, comm);
toProc << procSlot;
}
}
// Assign my local data - respect offset information
// so that we can request 0 entries to be copied.
// Also handle the case where we have a slice of the full
// list.
SubList<Type>(fld, off[1]-off[0]) =
SubList<Type>(allFld, off[1]-off[0], off[0]);
}
else
{
if (is_contiguous<Type>::value)
// Note: we are receiving into UList, so sizes MUST match or we
// have a problem. Can therefore reasonably assume that a zero-sized
// send matches a zero-sized receive, and we can skip that.
if (fld.empty())
{
// Nothing to do
}
else if (is_contiguous<Type>::value)
{
IPstream::read
(
@ -768,14 +752,7 @@ void Foam::globalIndex::scatter
}
else
{
IPstream fromMaster
(
commsType,
procIDs[0],
0,
tag,
comm
);
IPstream fromMaster(commsType, procIDs[0], 0, tag, comm);
fromMaster >> fld;
}
}
@ -791,26 +768,64 @@ void Foam::globalIndex::scatter
template<class Type>
void Foam::globalIndex::scatter
(
const UList<Type>& allFld,
UList<Type>& fld,
const UList<Type>& allData,
UList<Type>& localData,
const int tag,
const Pstream::commsTypes commsType,
const label comm
) const
{
// TBD: protection and special handling for serial?
if (UPstream::parRun())
{
scatter
(
offsets_, // needed on master only
comm,
UPstream::procID(comm),
allFld,
fld,
allData,
localData,
tag,
commsType
);
}
else
{
// Serial: direct copy
// - fails miserably if incorrectly dimensioned!
localData.deepCopy(allData);
}
}
template<class Type, class OutputContainer>
OutputContainer Foam::globalIndex::scatter
(
const UList<Type>& allData,
const int tag,
const Pstream::commsTypes commsType,
const label comm
) const
{
if (UPstream::parRun())
{
// The globalIndex might be correct on master only,
// so scatter local sizes to ensure consistency
const label localLen
(
UPstream::listScatterValues<label>(this->localSizes(), comm)
);
OutputContainer localData(localLen);
this->scatter(allData, localData, tag, commsType, comm);
return localData;
}
else
{
// Serial: direct copy
return OutputContainer(allData);
}
}