ENH: improve procLduMatrix streaming and assembly of LUscalarMatrix

- reduce local overhead prior to sending for master assembly
- non-blocking mode when assembling solution vector
- automatic resizing of pivots
This commit is contained in:
Mark Olesen
2024-02-08 11:20:08 +01:00
parent 43c0c3989b
commit 27aa7e4e91
11 changed files with 340 additions and 230 deletions

View File

@ -6,7 +6,7 @@
\\/ M anipulation |
-------------------------------------------------------------------------------
Copyright (C) 2011-2017 OpenFOAM Foundation
Copyright (C) 2020 OpenCFD Ltd.
Copyright (C) 2020-2024 OpenCFD Ltd.
-------------------------------------------------------------------------------
License
This file is part of OpenFOAM.
@ -42,17 +42,16 @@ namespace Foam
// * * * * * * * * * * * * * * * * Constructors * * * * * * * * * * * * * * //
Foam::LUscalarMatrix::LUscalarMatrix()
Foam::LUscalarMatrix::LUscalarMatrix() noexcept
:
comm_(UPstream::worldComm)
{}
Foam::LUscalarMatrix::LUscalarMatrix(const scalarSquareMatrix& matrix)
Foam::LUscalarMatrix::LUscalarMatrix(const scalarSquareMatrix& mat)
:
scalarSquareMatrix(matrix),
comm_(UPstream::worldComm),
pivotIndices_(m())
scalarSquareMatrix(mat),
comm_(UPstream::worldComm)
{
LUDecompose(*this, pivotIndices_);
}
@ -69,13 +68,14 @@ Foam::LUscalarMatrix::LUscalarMatrix
{
if (UPstream::parRun())
{
PtrList<procLduMatrix> lduMatrices(UPstream::nProcs(comm_));
label lduMatrixi = 0;
PtrList<procLduMatrix> lduMatrices
(
UPstream::master(comm_) ? UPstream::nProcs(comm_) : 1
);
lduMatrices.set
(
lduMatrixi++,
0, // rank-local matrix (and/or master)
new procLduMatrix
(
ldum,
@ -88,101 +88,66 @@ Foam::LUscalarMatrix::LUscalarMatrix
{
for (const int proci : UPstream::subProcs(comm_))
{
lduMatrices.set
(
lduMatrixi++,
new procLduMatrix
(
IPstream
(
UPstream::commsTypes::scheduled,
proci,
0, // bufSize
UPstream::msgType(),
comm_
)()
)
);
auto& mat = lduMatrices.emplace_set(proci);
IPstream::recv(mat, proci, UPstream::msgType(), comm_);
}
convert(lduMatrices);
}
else
{
OPstream toMaster
OPstream::send
(
UPstream::commsTypes::scheduled,
lduMatrices[0], // rank-local matrix
UPstream::masterNo(),
0, // bufSize
UPstream::msgType(),
comm_
);
procLduMatrix cldum
(
ldum,
interfaceCoeffs,
interfaces
);
toMaster<< cldum;
}
if (UPstream::master(comm_))
{
label nCells = 0;
forAll(lduMatrices, i)
{
nCells += lduMatrices[i].size();
}
scalarSquareMatrix m(nCells, 0.0);
transfer(m);
convert(lduMatrices);
}
}
else
{
label nCells = ldum.lduAddr().size();
scalarSquareMatrix m(nCells, Zero);
transfer(m);
convert(ldum, interfaceCoeffs, interfaces);
}
if (debug && UPstream::master(comm_))
{
const label numRows = nRows();
const label numCols = nCols();
Pout<< "LUscalarMatrix : size:" << numRows << endl;
for (label rowi = 0; rowi < numRows; ++rowi)
{
const scalar* row = operator[](rowi);
Pout<< "cell:" << rowi << " diagCoeff:" << row[rowi] << nl;
Pout<< " connects to upper cells :";
for (label coli = rowi+1; coli < numCols; ++coli)
{
if (mag(row[coli]) > SMALL)
{
Pout<< ' ' << coli << " (coeff:" << row[coli] << ')';
}
}
Pout<< nl;
Pout<< " connects to lower cells :";
for (label coli = 0; coli < rowi; ++coli)
{
if (mag(row[coli]) > SMALL)
{
Pout<< ' ' << coli << " (coeff:" << row[coli] << ')';
}
}
Pout<< nl;
}
Pout<< endl;
}
if (UPstream::master(comm_))
{
if (debug)
{
const label numRows = m();
const label numCols = n();
Pout<< "LUscalarMatrix : size:" << numRows << endl;
for (label rowi = 0; rowi < numRows; ++rowi)
{
const scalar* row = operator[](rowi);
Pout<< "cell:" << rowi << " diagCoeff:" << row[rowi] << endl;
Pout<< " connects to upper cells :";
for (label coli = rowi+1; coli < numCols; ++coli)
{
if (mag(row[coli]) > SMALL)
{
Pout<< ' ' << coli << " (coeff:" << row[coli] << ')';
}
}
Pout<< endl;
Pout<< " connects to lower cells :";
for (label coli = 0; coli < rowi; ++coli)
{
if (mag(row[coli]) > SMALL)
{
Pout<< ' ' << coli << " (coeff:" << row[coli] << ')';
}
}
Pout<< nl;
}
Pout<< nl;
}
pivotIndices_.setSize(m());
LUDecompose(*this, pivotIndices_);
}
}
@ -197,6 +162,10 @@ void Foam::LUscalarMatrix::convert
const lduInterfaceFieldPtrsList& interfaces
)
{
// Resize and fill with zero
scalarSquareMatrix::resize_nocopy(ldum.lduAddr().size());
scalarSquareMatrix::operator=(Foam::zero{});
const label* __restrict__ uPtr = ldum.lduAddr().upperAddr().begin();
const label* __restrict__ lPtr = ldum.lduAddr().lowerAddr().begin();
@ -259,14 +228,26 @@ void Foam::LUscalarMatrix::convert
const PtrList<procLduMatrix>& lduMatrices
)
{
procOffsets_.setSize(lduMatrices.size() + 1);
procOffsets_[0] = 0;
procOffsets_.resize_nocopy(lduMatrices.size() + 1);
forAll(lduMatrices, ldumi)
{
procOffsets_[ldumi+1] = procOffsets_[ldumi] + lduMatrices[ldumi].size();
auto iter = procOffsets_.begin();
label nCellsTotal = 0;
*iter++ = nCellsTotal;
for (const auto& mat : lduMatrices)
{
nCellsTotal += mat.size();
*iter++ = nCellsTotal;
}
// Resize and fill with zero
scalarSquareMatrix::resize_nocopy(nCellsTotal);
scalarSquareMatrix::operator=(Foam::zero{});
}
forAll(lduMatrices, ldumi)
{
const procLduMatrix& lduMatrixi = lduMatrices[ldumi];
@ -400,10 +381,9 @@ void Foam::LUscalarMatrix::printDiagonalDominance() const
}
void Foam::LUscalarMatrix::decompose(const scalarSquareMatrix& M)
void Foam::LUscalarMatrix::decompose(const scalarSquareMatrix& mat)
{
scalarSquareMatrix::operator=(M);
pivotIndices_.setSize(m());
scalarSquareMatrix::operator=(mat);
LUDecompose(*this, pivotIndices_);
}