ENH: globalIndex gather/scatter scheduled (not nonBlocking) for non-contiguous

- reduces later surprises and simplifies effort for the caller

- more flexible globalIndex scatter with auto-sized return field.

- Avoid communication for scattering into zero-sized fields.
This commit is contained in:
Mark Olesen
2022-02-22 19:59:13 +01:00
committed by Andrew Heather
parent ff4998d364
commit c178fe8ec1
4 changed files with 162 additions and 125 deletions

View File

@ -34,16 +34,16 @@ License
void Foam::globalIndex::reportOverflowAndExit void Foam::globalIndex::reportOverflowAndExit
( (
const label idx, const label idx,
const labelUList& localSizes const labelUList& localLens
) )
{ {
FatalErrorInFunction FatalErrorInFunction
<< "Overflow : sum of sizes exceeds labelMax (" << "Overflow : sum of sizes exceeds labelMax ("
<< labelMax << ") after index " << idx; << labelMax << ") after index " << idx;
if (!localSizes.empty()) if (!localLens.empty())
{ {
FatalError << " of " << flatOutput(localSizes); FatalError << " of " << flatOutput(localLens);
} }
FatalError FatalError
@ -56,13 +56,13 @@ void Foam::globalIndex::reportOverflowAndExit
Foam::labelList Foam::labelList
Foam::globalIndex::calcOffsets Foam::globalIndex::calcOffsets
( (
const labelUList& localSizes, const labelUList& localLens,
const bool checkOverflow const bool checkOverflow
) )
{ {
labelList values; labelList values;
const label len = localSizes.size(); const label len = localLens.size();
if (len) if (len)
{ {
@ -72,11 +72,11 @@ Foam::globalIndex::calcOffsets
for (label i = 0; i < len; ++i) for (label i = 0; i < len; ++i)
{ {
values[i] = start; values[i] = start;
start += localSizes[i]; start += localLens[i];
if (checkOverflow && start < values[i]) if (checkOverflow && start < values[i])
{ {
reportOverflowAndExit(i, localSizes); reportOverflowAndExit(i, localLens);
} }
} }
values[len] = start; values[len] = start;
@ -89,13 +89,13 @@ Foam::globalIndex::calcOffsets
Foam::List<Foam::labelRange> Foam::List<Foam::labelRange>
Foam::globalIndex::calcRanges Foam::globalIndex::calcRanges
( (
const labelUList& localSizes, const labelUList& localLens,
const bool checkOverflow const bool checkOverflow
) )
{ {
List<labelRange> values; List<labelRange> values;
const label len = localSizes.size(); const label len = localLens.size();
if (len) if (len)
{ {
@ -104,12 +104,12 @@ Foam::globalIndex::calcRanges
label start = 0; label start = 0;
for (label i = 0; i < len; ++i) for (label i = 0; i < len; ++i)
{ {
values[i].reset(start, localSizes[i]); values[i].reset(start, localLens[i]);
start += localSizes[i]; start += localLens[i];
if (checkOverflow && start < values[i].start()) if (checkOverflow && start < values[i].start())
{ {
reportOverflowAndExit(i, localSizes); reportOverflowAndExit(i, localLens);
} }
} }
} }
@ -221,16 +221,16 @@ void Foam::globalIndex::reset
// Seed with localSize, zero elsewhere (for non-parallel branch) // Seed with localSize, zero elsewhere (for non-parallel branch)
// NB: can consider UPstream::listGatherValues // NB: can consider UPstream::listGatherValues
labelList localSizes(len, Zero); labelList localLens(len, Zero);
localSizes[Pstream::myProcNo(comm)] = localSize; localLens[Pstream::myProcNo(comm)] = localSize;
if (parallel) if (parallel)
{ {
Pstream::gatherList(localSizes, tag, comm); Pstream::gatherList(localLens, tag, comm);
Pstream::scatterList(localSizes, tag, comm); Pstream::scatterList(localLens, tag, comm);
} }
reset(localSizes, true); // checkOverflow = true reset(localLens, true); // checkOverflow = true
} }
else else
{ {
@ -242,11 +242,11 @@ void Foam::globalIndex::reset
void Foam::globalIndex::reset void Foam::globalIndex::reset
( (
const labelUList& localSizes, const labelUList& localLens,
const bool checkOverflow const bool checkOverflow
) )
{ {
const label len = localSizes.size(); const label len = localLens.size();
if (len) if (len)
{ {
@ -256,11 +256,11 @@ void Foam::globalIndex::reset
for (label i = 0; i < len; ++i) for (label i = 0; i < len; ++i)
{ {
offsets_[i] = start; offsets_[i] = start;
start += localSizes[i]; start += localLens[i];
if (checkOverflow && start < offsets_[i]) if (checkOverflow && start < offsets_[i])
{ {
reportOverflowAndExit(i, localSizes); reportOverflowAndExit(i, localLens);
} }
} }
offsets_[len] = start; offsets_[len] = start;
@ -290,7 +290,7 @@ void Foam::globalIndex::setLocalSize(const label proci, const label len)
} }
Foam::labelList Foam::globalIndex::sizes() const Foam::labelList Foam::globalIndex::localSizes() const
{ {
labelList values; labelList values;

View File

@ -89,7 +89,7 @@ class globalIndex
static void reportOverflowAndExit static void reportOverflowAndExit
( (
const label idx, const label idx,
const labelUList& localSizes = labelUList::null() const labelUList& localLens = labelUList::null()
); );
public: public:
@ -178,12 +178,15 @@ public:
//- Global sum of localSizes. //- Global sum of localSizes.
inline label totalSize() const; inline label totalSize() const;
//- The local sizes //- The local sizes. Same as localSizes()
labelList sizes() const; inline labelList sizes() const;
//- The local starts //- The local starts
inline const labelUList localStarts() const; inline const labelUList localStarts() const;
//- The local sizes
labelList localSizes() const;
//- Global max of localSizes //- Global max of localSizes
inline label maxSize() const; inline label maxSize() const;
@ -276,7 +279,7 @@ public:
inline labelList toGlobal(const labelUList& labels) const; inline labelList toGlobal(const labelUList& labels) const;
//- From local to global index (inplace) //- From local to global index (inplace)
inline void inplaceToGlobal(labelList& labels) const; inline void inplaceToGlobal(labelUList& labels) const;
//- From global to local on current processor. //- From global to local on current processor.
// FatalError if not on local processor. // FatalError if not on local processor.
@ -314,7 +317,7 @@ public:
inline void inplaceToGlobal inline void inplaceToGlobal
( (
const label proci, const label proci,
labelList& labels labelUList& labels
) const; ) const;
@ -405,7 +408,7 @@ public:
//- with optional check for label overflow //- with optional check for label overflow
static labelList calcOffsets static labelList calcOffsets
( (
const labelUList& localSizes, const labelUList& localLens,
const bool checkOverflow = false const bool checkOverflow = false
); );
@ -422,7 +425,7 @@ public:
//- with optional check for label overflow //- with optional check for label overflow
static List<labelRange> calcRanges static List<labelRange> calcRanges
( (
const labelUList& localSizes, const labelUList& localLens,
const bool checkOverflow = false const bool checkOverflow = false
); );
@ -774,8 +777,21 @@ public:
template<class Type> template<class Type>
void scatter void scatter
( (
const UList<Type>& allFld, const UList<Type>& allData,
UList<Type>& fld, UList<Type>& localData,
const int tag = UPstream::msgType(),
const Pstream::commsTypes = Pstream::commsTypes::nonBlocking,
const label comm = UPstream::worldComm //!< communicator
) const;
//- Distribute data in processor order
//- (in serial: performs a simple copy).
// Communication with default/specified communicator, message tag.
// \note the globalIndex offsets needed on master only.
template<class Type, class OutputContainer = List<Type>>
OutputContainer scatter
(
const UList<Type>& allData,
const int tag = UPstream::msgType(), const int tag = UPstream::msgType(),
const Pstream::commsTypes = Pstream::commsTypes::nonBlocking, const Pstream::commsTypes = Pstream::commsTypes::nonBlocking,
const label comm = UPstream::worldComm //!< communicator const label comm = UPstream::worldComm //!< communicator

View File

@ -142,6 +142,12 @@ inline Foam::label Foam::globalIndex::size() const
} }
inline Foam::labelList Foam::globalIndex::sizes() const
{
return localSizes();
}
inline Foam::label Foam::globalIndex::nProcs() const noexcept inline Foam::label Foam::globalIndex::nProcs() const noexcept
{ {
const label len = (offsets_.size() - 1); const label len = (offsets_.size() - 1);
@ -289,7 +295,7 @@ inline Foam::labelList Foam::globalIndex::toGlobal
inline void Foam::globalIndex::inplaceToGlobal inline void Foam::globalIndex::inplaceToGlobal
( (
const label proci, const label proci,
labelList& labels labelUList& labels
) const ) const
{ {
const label off = offsets_[proci]; const label off = offsets_[proci];
@ -301,7 +307,7 @@ inline void Foam::globalIndex::inplaceToGlobal
} }
inline void Foam::globalIndex::inplaceToGlobal(labelList& labels) const inline void Foam::globalIndex::inplaceToGlobal(labelUList& labels) const
{ {
inplaceToGlobal(Pstream::myProcNo(), labels); inplaceToGlobal(Pstream::myProcNo(), labels);
} }

View File

@ -73,22 +73,22 @@ void Foam::globalIndex::gather
const UList<Type>& fld, const UList<Type>& fld,
List<Type>& allFld, List<Type>& allFld,
const int tag, const int tag,
const Pstream::commsTypes commsType const Pstream::commsTypes preferredCommsType
) )
{ {
// low-level: no parRun guard // low-level: no parRun guard
if // Automatically change from nonBlocking to scheduled for
// non-contiguous data.
const UPstream::commsTypes commsType =
( (
!is_contiguous<Type>::value (
&& commsType == Pstream::commsTypes::nonBlocking !is_contiguous<Type>::value
) && UPstream::commsTypes::nonBlocking == preferredCommsType
{ )
FatalErrorInFunction ? UPstream::commsTypes::scheduled
<< "Cannot use nonBlocking with non-contiguous data" : preferredCommsType
<< exit(FatalError); );
// Could also warn and change to scheduled etc...
}
const label startOfRequests = UPstream::nRequests(); const label startOfRequests = UPstream::nRequests();
@ -126,14 +126,7 @@ void Foam::globalIndex::gather
} }
else else
{ {
IPstream fromProc IPstream fromProc(commsType, procIDs[i], 0, tag, comm);
(
commsType,
procIDs[i],
0,
tag,
comm
);
fromProc >> procSlot; fromProc >> procSlot;
} }
} }
@ -158,14 +151,7 @@ void Foam::globalIndex::gather
} }
else else
{ {
OPstream toMaster OPstream toMaster(commsType, procIDs[0], 0, tag, comm);
(
commsType,
procIDs[0],
0,
tag,
comm
);
toMaster << fld; toMaster << fld;
} }
} }
@ -187,18 +173,22 @@ void Foam::globalIndex::gather
const IndirectListBase<Type, Addr>& fld, const IndirectListBase<Type, Addr>& fld,
List<Type>& allFld, List<Type>& allFld,
const int tag, const int tag,
const Pstream::commsTypes commsType const Pstream::commsTypes preferredCommsType
) )
{ {
// low-level: no parRun guard // low-level: no parRun guard
if (commsType == Pstream::commsTypes::nonBlocking) // Automatically change from nonBlocking to scheduled for
{ // non-contiguous data.
WarningInFunction const UPstream::commsTypes commsType =
<< "Cannot use nonBlocking with indirect list of data" (
<< exit(FatalError); (
// Could also warn and change to scheduled etc... !is_contiguous<Type>::value
} && UPstream::commsTypes::nonBlocking == preferredCommsType
)
? UPstream::commsTypes::scheduled
: preferredCommsType
);
if (Pstream::myProcNo(comm) == procIDs[0]) if (Pstream::myProcNo(comm) == procIDs[0])
{ {
@ -224,14 +214,7 @@ void Foam::globalIndex::gather
} }
else else
{ {
IPstream fromProc IPstream fromProc(commsType, procIDs[i], 0, tag, comm);
(
commsType,
procIDs[i],
0,
tag,
comm
);
fromProc >> procSlot; fromProc >> procSlot;
} }
} }
@ -244,14 +227,7 @@ void Foam::globalIndex::gather
} }
else else
{ {
OPstream toMaster OPstream toMaster(commsType, procIDs[0], 0, tag, comm);
(
commsType,
procIDs[0],
0,
tag,
comm
);
toMaster << fld; toMaster << fld;
} }
} }
@ -692,20 +668,22 @@ void Foam::globalIndex::scatter
const UList<Type>& allFld, const UList<Type>& allFld,
UList<Type>& fld, UList<Type>& fld,
const int tag, const int tag,
const Pstream::commsTypes commsType const Pstream::commsTypes preferredCommsType
) )
{ {
if // low-level: no parRun guard
// Automatically change from nonBlocking to scheduled for
// non-contiguous data.
const UPstream::commsTypes commsType =
( (
!is_contiguous<Type>::value (
&& commsType == Pstream::commsTypes::nonBlocking !is_contiguous<Type>::value
) && UPstream::commsTypes::nonBlocking == preferredCommsType
{ )
FatalErrorInFunction ? UPstream::commsTypes::scheduled
<< "Cannot use nonBlocking with non-contiguous data" : preferredCommsType
<< exit(FatalError); );
// Could also warn and change to scheduled etc...
}
// FUTURE: // FUTURE:
// could decide which procs will receive data and use mpiScatter // could decide which procs will receive data and use mpiScatter
@ -715,18 +693,15 @@ void Foam::globalIndex::scatter
if (Pstream::myProcNo(comm) == procIDs[0]) if (Pstream::myProcNo(comm) == procIDs[0])
{ {
const SubList<Type> localSlot(allFld, off[1]-off[0], off[0]);
if (!localSlot.empty())
{
fld.deepCopy(localSlot);
}
for (label i = 1; i < procIDs.size(); ++i) for (label i = 1; i < procIDs.size(); ++i)
{ {
const SubList<Type> procSlot(allFld, off[i+1]-off[i], off[i]); const SubList<Type> procSlot(allFld, off[i+1]-off[i], off[i]);
if (is_contiguous<Type>::value) if (procSlot.empty())
{
// Nothing to do
}
else if (is_contiguous<Type>::value)
{ {
OPstream::write OPstream::write
( (
@ -740,21 +715,30 @@ void Foam::globalIndex::scatter
} }
else else
{ {
OPstream toProc OPstream toProc(commsType, procIDs[i], 0, tag, comm);
(
commsType,
procIDs[i],
0,
tag,
comm
);
toProc << procSlot; toProc << procSlot;
} }
} }
// Assign my local data - respect offset information
// so that we can request 0 entries to be copied.
// Also handle the case where we have a slice of the full
// list.
SubList<Type>(fld, off[1]-off[0]) =
SubList<Type>(allFld, off[1]-off[0], off[0]);
} }
else else
{ {
if (is_contiguous<Type>::value) // Note: we are receiving into UList, so sizes MUST match or we
// have a problem. Can therefore reasonably assume that a zero-sized
// send matches a zero-sized receive, and we can skip that.
if (fld.empty())
{
// Nothing to do
}
else if (is_contiguous<Type>::value)
{ {
IPstream::read IPstream::read
( (
@ -768,14 +752,7 @@ void Foam::globalIndex::scatter
} }
else else
{ {
IPstream fromMaster IPstream fromMaster(commsType, procIDs[0], 0, tag, comm);
(
commsType,
procIDs[0],
0,
tag,
comm
);
fromMaster >> fld; fromMaster >> fld;
} }
} }
@ -791,26 +768,64 @@ void Foam::globalIndex::scatter
template<class Type> template<class Type>
void Foam::globalIndex::scatter void Foam::globalIndex::scatter
( (
const UList<Type>& allFld, const UList<Type>& allData,
UList<Type>& fld, UList<Type>& localData,
const int tag, const int tag,
const Pstream::commsTypes commsType, const Pstream::commsTypes commsType,
const label comm const label comm
) const ) const
{ {
// TBD: protection and special handling for serial? if (UPstream::parRun())
{ {
scatter scatter
( (
offsets_, // needed on master only offsets_, // needed on master only
comm, comm,
UPstream::procID(comm), UPstream::procID(comm),
allFld, allData,
fld, localData,
tag, tag,
commsType commsType
); );
} }
else
{
// Serial: direct copy
// - fails miserably if incorrectly dimensioned!
localData.deepCopy(allData);
}
}
template<class Type, class OutputContainer>
OutputContainer Foam::globalIndex::scatter
(
const UList<Type>& allData,
const int tag,
const Pstream::commsTypes commsType,
const label comm
) const
{
if (UPstream::parRun())
{
// The globalIndex might be correct on master only,
// so scatter local sizes to ensure consistency
const label localLen
(
UPstream::listScatterValues<label>(this->localSizes(), comm)
);
OutputContainer localData(localLen);
this->scatter(allData, localData, tag, commsType, comm);
return localData;
}
else
{
// Serial: direct copy
return OutputContainer(allData);
}
} }