ENH: mapDistribute: add separate input and output

To avoid making initial copy of input to output field.
This commit is contained in:
mattijs
2025-07-17 09:59:46 +01:00
parent 7a8089e076
commit 9629ae8aa9
4 changed files with 303 additions and 3 deletions

View File

@ -262,6 +262,7 @@ OptimisationSwitches
//localAMIComm 0; // old behaviour : all ranks included
//localAMIComm 1; // (default) only ranks that have patch faces
//localAMIComm 2; // like 1 but always include rank 0 for messages
localAMIComm 1; // only ranks that have patch faces
}

View File

@ -218,7 +218,7 @@ public:
void operator()
(
const vectorTensorTransform& vt,
const bool forward,
[[maybe_unused]] const bool forward,
UList<Type>& fld
) const
{
@ -233,7 +233,7 @@ public:
void operator()
(
const vectorTensorTransform& vt,
const bool forward,
[[maybe_unused]] const bool forward,
List<List<Type>>& flds
) const
{

View File

@ -6,7 +6,7 @@
\\/ M anipulation |
-------------------------------------------------------------------------------
Copyright (C) 2015-2017 OpenFOAM Foundation
Copyright (C) 2015-2023 OpenCFD Ltd.
Copyright (C) 2015-2025 OpenCFD Ltd.
-------------------------------------------------------------------------------
License
This file is part of OpenFOAM.
@ -909,6 +909,36 @@ public:
const label comm = UPstream::worldComm
);
//- Distribute combine data with specified combine operation
//- and negate operator (for flips).
//
// If multiple processors write to same position,
// contributions are added using the combine cop.
//
// \note schedule only used for UPstream::commsTypes::scheduled,
// others just use send-to-all, receive-from-all.
template<class T, class CombineOp, class NegateOp>
static void distribute
(
const UPstream::commsTypes commsType,
const UList<labelPair>& schedule,
const UList<T>& inField, // input field
const labelListList& subMap,
const bool subHasFlip,
List<T>& field, // output field
const label constructSize,
const labelListList& constructMap,
const bool constructHasFlip,
const T& nullValue,
const CombineOp& cop,
const NegateOp& negOp,
const int tag = UPstream::msgType(),
const label comm = UPstream::worldComm
);
//- Distribute assign data with specified negate operator (for flips).
//- Uses assignment for combine operation.
//

View File

@ -894,6 +894,275 @@ void Foam::mapDistributeBase::distribute
}
template<class T, class CombineOp, class NegateOp>
void Foam::mapDistributeBase::distribute
(
const UPstream::commsTypes commsType,
const UList<labelPair>& schedule,
const UList<T>& inField, // input field
const labelListList& subMap,
const bool subHasFlip,
List<T>& field,
const label constructSize,
const labelListList& constructMap,
const bool constructHasFlip,
const T& nullValue,
const CombineOp& cop,
const NegateOp& negOp,
const int tag,
const label comm
)
{
const auto myRank = UPstream::myProcNo(comm);
const auto nProcs = UPstream::nProcs(comm);
if (!UPstream::parRun())
{
// Do only me to me.
List<T> subField
(
accessAndFlip(inField, subMap[myRank], subHasFlip, negOp)
);
// Receive sub field from myself (subField)
const labelList& map = constructMap[myRank];
// Combining bits - can now reuse field storage
field.resize_nocopy(constructSize);
field = nullValue;
flipAndCombine
(
field,
subField,
map,
constructHasFlip,
cop,
negOp
);
return;
}
if (commsType != UPstream::commsTypes::nonBlocking)
{
FatalErrorInFunction
<< "Unsupport communication type " << int(commsType)
<< abort(FatalError);
}
const label startOfRequests = UPstream::nRequests();
if (!is_contiguous<T>::value)
{
PstreamBuffers pBufs(comm, tag);
// Stream data into buffer
for (const int proci : UPstream::allProcs(comm))
{
const labelList& map = subMap[proci];
if (proci != myRank && map.size())
{
List<T> subField
(
accessAndFlip(inField, map, subHasFlip, negOp)
);
UOPstream os(proci, pBufs);
os << subField;
}
}
// Initiate receiving - do yet not block
pBufs.finishedSends(false);
{
// Set up 'send' to myself
List<T> subField
(
accessAndFlip(inField, subMap[myRank], subHasFlip, negOp)
);
// Combining bits - can now reuse field storage
field.resize_nocopy(constructSize);
field = nullValue;
// Receive sub field from myself
const labelList& map = constructMap[myRank];
flipAndCombine
(
field,
subField,
map,
constructHasFlip,
cop,
negOp
);
}
// Wait for receive requests (and the send requests too)
UPstream::waitRequests(startOfRequests);
// Receive and process neighbour fields
for (const int proci : UPstream::allProcs(comm))
{
const labelList& map = constructMap[proci];
if (proci != myRank && map.size())
{
UIPstream is(proci, pBufs);
List<T> subField(is);
checkReceivedSize(proci, map.size(), subField.size());
flipAndCombine
(
field,
subField,
map,
constructHasFlip,
cop,
negOp
);
}
}
}
else
{
// Set up receives from neighbours
List<List<T>> recvFields(nProcs);
DynamicList<int> recvProcs(nProcs);
for (const int proci : UPstream::allProcs(comm))
{
const labelList& map = constructMap[proci];
if (proci != myRank && map.size())
{
recvProcs.push_back(proci);
List<T>& subField = recvFields[proci];
subField.resize_nocopy(map.size());
UIPstream::read
(
UPstream::commsTypes::nonBlocking,
proci,
subField.data_bytes(),
subField.size_bytes(),
tag,
comm
);
}
}
// Set up sends to neighbours
List<List<T>> sendFields(nProcs);
for (const int proci : UPstream::allProcs(comm))
{
const labelList& map = subMap[proci];
if (proci != myRank && map.size())
{
List<T>& subField = sendFields[proci];
subField.resize_nocopy(map.size());
accessAndFlip(subField, inField, map, subHasFlip, negOp);
UOPstream::write
(
UPstream::commsTypes::nonBlocking,
proci,
subField.cdata_bytes(),
subField.size_bytes(),
tag,
comm
);
}
}
// Set up 'send' to myself - copy directly into recvFields
{
const labelList& map = subMap[myRank];
List<T>& subField = recvFields[myRank];
subField.resize_nocopy(map.size());
accessAndFlip(subField, inField, map, subHasFlip, negOp);
}
// Combining bits - can now reuse field storage
field.resize_nocopy(constructSize);
field = nullValue;
// Receive sub field from myself : recvFields[myRank]
{
const labelList& map = constructMap[myRank];
const List<T>& subField = recvFields[myRank];
// Probably don't need a size check
// checkReceivedSize(myRank, map.size(), subField.size());
flipAndCombine
(
field,
subField,
map,
constructHasFlip,
cop,
negOp
);
}
// Poll for completed receive requests and dispatch
DynamicList<int> indices(recvProcs.size());
while
(
UPstream::waitSomeRequests
(
startOfRequests,
recvProcs.size(),
&indices
)
)
{
for (const int idx : indices)
{
const int proci = recvProcs[idx];
const labelList& map = constructMap[proci];
const List<T>& subField = recvFields[proci];
// No size check - was dimensioned above
// checkReceivedSize(proci, map.size(), subField.size());
flipAndCombine
(
field,
subField,
map,
constructHasFlip,
cop,
negOp
);
}
}
// Wait for any remaining requests
UPstream::waitRequests(startOfRequests);
}
}
template<class T, class NegateOp>
void Foam::mapDistributeBase::distribute
(