mirror of
https://develop.openfoam.com/Development/openfoam.git
synced 2025-11-28 03:28:01 +00:00
ENH: improve robustness of MPI start/stop
- warn or fatal if Pstream::init or Pstream::exit are called multiple times. - additional Pstream::initNull method as failsafe to initialize MPI when the underlying OpenFOAM process is not running in parallel but the application still needs MPI. - Pstream::exit() can now also be called without having used MPI::init(), which means it can be used to cleanup serial process or for applications that used the special purpose Pstream::initNull() mechanism.
This commit is contained in:
@ -69,6 +69,7 @@ public:
|
|||||||
nonBlocking
|
nonBlocking
|
||||||
};
|
};
|
||||||
|
|
||||||
|
//- Names of the communication types
|
||||||
static const Enum<commsTypes> commsTypeNames;
|
static const Enum<commsTypes> commsTypeNames;
|
||||||
|
|
||||||
// Public classes
|
// Public classes
|
||||||
@ -87,8 +88,8 @@ public:
|
|||||||
//- procIDs of all processors below (so not just directly below)
|
//- procIDs of all processors below (so not just directly below)
|
||||||
labelList allBelow_;
|
labelList allBelow_;
|
||||||
|
|
||||||
//- procIDs of all processors not below. (inverse set of
|
//- procIDs of all processors not below.
|
||||||
// allBelow_ and minus myProcNo)
|
// (inverse set of allBelow_ and minus myProcNo)
|
||||||
labelList allNotBelow_;
|
labelList allNotBelow_;
|
||||||
|
|
||||||
|
|
||||||
@ -102,10 +103,10 @@ public:
|
|||||||
//- Construct from components
|
//- Construct from components
|
||||||
commsStruct
|
commsStruct
|
||||||
(
|
(
|
||||||
const label,
|
const label above,
|
||||||
const labelList&,
|
const labelList& below,
|
||||||
const labelList&,
|
const labelList& allBelow,
|
||||||
const labelList&
|
const labelList& allNotBelow
|
||||||
);
|
);
|
||||||
|
|
||||||
//- Construct from components; construct allNotBelow_
|
//- Construct from components; construct allNotBelow_
|
||||||
@ -113,9 +114,9 @@ public:
|
|||||||
(
|
(
|
||||||
const label nProcs,
|
const label nProcs,
|
||||||
const label myProcID,
|
const label myProcID,
|
||||||
const label,
|
const label above,
|
||||||
const labelList&,
|
const labelList& below,
|
||||||
const labelList&
|
const labelList& allBelow
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
@ -255,12 +256,12 @@ public:
|
|||||||
// Static data
|
// Static data
|
||||||
|
|
||||||
//- Should compact transfer be used in which floats replace doubles
|
//- Should compact transfer be used in which floats replace doubles
|
||||||
// reducing the bandwidth requirement at the expense of some loss
|
//- reducing the bandwidth requirement at the expense of some loss
|
||||||
// in accuracy
|
//- in accuracy
|
||||||
static bool floatTransfer;
|
static bool floatTransfer;
|
||||||
|
|
||||||
//- Number of processors at which the sum algorithm changes from linear
|
//- Number of processors at which the sum algorithm changes from linear
|
||||||
// to tree
|
//- to tree
|
||||||
static int nProcsSimpleSum;
|
static int nProcsSimpleSum;
|
||||||
|
|
||||||
//- Default commsType
|
//- Default commsType
|
||||||
@ -344,15 +345,15 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
//- Return physical processor number (i.e. processor number in
|
//- Return physical processor number (i.e. processor number in
|
||||||
// worldComm) given communicator and procssor
|
//- worldComm) given communicator and procssor
|
||||||
static int baseProcNo(const label myComm, const int procID);
|
static int baseProcNo(const label myComm, const int procID);
|
||||||
|
|
||||||
//- Return processor number in communicator (given physical processor
|
//- Return processor number in communicator (given physical processor
|
||||||
// number) (= reverse of baseProcNo)
|
//- number) (= reverse of baseProcNo)
|
||||||
static label procNo(const label comm, const int baseProcID);
|
static label procNo(const label comm, const int baseProcID);
|
||||||
|
|
||||||
//- Return processor number in communicator (given processor number
|
//- Return processor number in communicator (given processor number
|
||||||
// and communicator)
|
//- and communicator)
|
||||||
static label procNo
|
static label procNo
|
||||||
(
|
(
|
||||||
const label myComm,
|
const label myComm,
|
||||||
@ -361,13 +362,23 @@ public:
|
|||||||
);
|
);
|
||||||
|
|
||||||
//- Add the valid option this type of communications library
|
//- Add the valid option this type of communications library
|
||||||
// adds/requires on the command line
|
//- adds/requires on the command line
|
||||||
static void addValidParOptions(HashTable<string>& validParOptions);
|
static void addValidParOptions(HashTable<string>& validParOptions);
|
||||||
|
|
||||||
//- Initialisation function called from main
|
//- Initialisation function called from main
|
||||||
// Spawns slave processes and initialises inter-communication
|
// Spawns slave processes and initialises inter-communication.
|
||||||
|
// \note warns if MPI has already been initialized.
|
||||||
|
// Fatal if MPI has already been finalized.
|
||||||
static bool init(int& argc, char**& argv);
|
static bool init(int& argc, char**& argv);
|
||||||
|
|
||||||
|
//- Special purpose initialisation function.
|
||||||
|
// Performs a basic MPI_Init without any other setup.
|
||||||
|
// Only used for applications that need MPI communication when
|
||||||
|
// OpenFOAM is running in a non-parallel mode.
|
||||||
|
// \note Behaves as a no-op if MPI has already been initialized.
|
||||||
|
// Fatal if MPI has already been finalized.
|
||||||
|
static bool initNull();
|
||||||
|
|
||||||
// Non-blocking comms
|
// Non-blocking comms
|
||||||
|
|
||||||
//- Get number of outstanding requests
|
//- Get number of outstanding requests
|
||||||
@ -401,7 +412,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
//- Set data for parallel running. Special case nProcs=0 to switch off
|
//- Set data for parallel running. Special case nProcs=0 to switch off
|
||||||
// parallel running
|
//- parallel running
|
||||||
static void setParRun(const label nProcs);
|
static void setParRun(const label nProcs);
|
||||||
|
|
||||||
//- Number of processes in parallel run
|
//- Number of processes in parallel run
|
||||||
|
|||||||
@ -122,8 +122,8 @@ class argList
|
|||||||
//- Track enabled/disabled checking of processor directories state
|
//- Track enabled/disabled checking of processor directories state
|
||||||
static bool checkProcessorDirectories_;
|
static bool checkProcessorDirectories_;
|
||||||
|
|
||||||
//- Switch on/off parallel mode. Has to be first to be constructed
|
//- Switch on/off parallel mode.
|
||||||
// so destructor is done last.
|
// Must be first to be constructed so destructor is done last.
|
||||||
ParRunControl parRunControl_;
|
ParRunControl parRunControl_;
|
||||||
|
|
||||||
//- The arguments after removing known options
|
//- The arguments after removing known options
|
||||||
|
|||||||
@ -27,6 +27,9 @@ Class
|
|||||||
Description
|
Description
|
||||||
Helper class for initializing parallel jobs from the command arguments.
|
Helper class for initializing parallel jobs from the command arguments.
|
||||||
|
|
||||||
|
This class also handles cleanup of parallel or serial jobs in a
|
||||||
|
uniform manner.
|
||||||
|
|
||||||
\*---------------------------------------------------------------------------*/
|
\*---------------------------------------------------------------------------*/
|
||||||
|
|
||||||
#ifndef parRun_H
|
#ifndef parRun_H
|
||||||
@ -60,10 +63,13 @@ public:
|
|||||||
if (RunPar)
|
if (RunPar)
|
||||||
{
|
{
|
||||||
Info<< "Finalising parallel run" << endl;
|
Info<< "Finalising parallel run" << endl;
|
||||||
Pstream::exit(0);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Handles serial and parallel modes.
|
||||||
|
Pstream::exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
//- Initialize Pstream for a parallel run
|
||||||
void runPar(int& argc, char**& argv)
|
void runPar(int& argc, char**& argv)
|
||||||
{
|
{
|
||||||
RunPar = true;
|
RunPar = true;
|
||||||
@ -75,6 +81,8 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//- Is this a parallel run?
|
||||||
bool parRun() const
|
bool parRun() const
|
||||||
{
|
{
|
||||||
return RunPar;
|
return RunPar;
|
||||||
|
|||||||
@ -25,18 +25,28 @@ License
|
|||||||
|
|
||||||
#include "Pstream.H"
|
#include "Pstream.H"
|
||||||
#include "PstreamReduceOps.H"
|
#include "PstreamReduceOps.H"
|
||||||
|
#include "OSspecific.H"
|
||||||
|
|
||||||
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
|
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
|
||||||
|
|
||||||
void Foam::UPstream::addValidParOptions(HashTable<string>& validParOptions)
|
void Foam::UPstream::addValidParOptions(HashTable<string>& validParOptions)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
|
bool Foam::UPstream::initNull()
|
||||||
|
{
|
||||||
|
WarningInFunction
|
||||||
|
<< "The dummy Pstream library cannot be used in parallel mode"
|
||||||
|
<< endl;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
bool Foam::UPstream::init(int& argc, char**& argv)
|
bool Foam::UPstream::init(int& argc, char**& argv)
|
||||||
{
|
{
|
||||||
FatalErrorInFunction
|
FatalErrorInFunction
|
||||||
<< "Trying to use the dummy Pstream library." << nl
|
<< "The dummy Pstream library cannot be used in parallel mode"
|
||||||
<< "This dummy library cannot be used in parallel mode"
|
<< endl
|
||||||
<< Foam::exit(FatalError);
|
<< Foam::exit(FatalError);
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
@ -45,13 +55,15 @@ bool Foam::UPstream::init(int& argc, char**& argv)
|
|||||||
|
|
||||||
void Foam::UPstream::exit(int errnum)
|
void Foam::UPstream::exit(int errnum)
|
||||||
{
|
{
|
||||||
NotImplemented;
|
// No MPI - just exit
|
||||||
|
::exit(errnum);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void Foam::UPstream::abort()
|
void Foam::UPstream::abort()
|
||||||
{
|
{
|
||||||
NotImplemented;
|
// No MPI - just abort
|
||||||
|
::abort();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -66,8 +66,67 @@ void Foam::UPstream::addValidParOptions(HashTable<string>& validParOptions)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool Foam::UPstream::initNull()
|
||||||
|
{
|
||||||
|
int flag = 0;
|
||||||
|
|
||||||
|
MPI_Finalized(&flag);
|
||||||
|
if (flag)
|
||||||
|
{
|
||||||
|
// Already finalized - this is an error
|
||||||
|
FatalErrorInFunction
|
||||||
|
<< "MPI was already finalized - cannot perform MPI_Init" << endl
|
||||||
|
<< Foam::abort(FatalError);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
MPI_Initialized(&flag);
|
||||||
|
if (flag)
|
||||||
|
{
|
||||||
|
// Already initialized - nothing to do
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
MPI_Init_thread
|
||||||
|
(
|
||||||
|
nullptr, // argc
|
||||||
|
nullptr, // argv
|
||||||
|
MPI_THREAD_SINGLE,
|
||||||
|
&flag // provided_thread_support
|
||||||
|
);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
bool Foam::UPstream::init(int& argc, char**& argv)
|
bool Foam::UPstream::init(int& argc, char**& argv)
|
||||||
{
|
{
|
||||||
|
int flag = 0;
|
||||||
|
|
||||||
|
MPI_Finalized(&flag);
|
||||||
|
if (flag)
|
||||||
|
{
|
||||||
|
// Already finalized - this is an error
|
||||||
|
FatalErrorInFunction
|
||||||
|
<< "MPI was already finalized - cannot perform MPI_Init" << endl
|
||||||
|
<< Foam::abort(FatalError);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
MPI_Initialized(&flag);
|
||||||
|
if (flag)
|
||||||
|
{
|
||||||
|
// Already initialized - issue warning and skip the rest
|
||||||
|
WarningInFunction
|
||||||
|
<< "MPI was already initialized - cannot perform MPI_Init" << nl
|
||||||
|
<< "This could indicate an application programming error!" << endl;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
//MPI_Init(&argc, &argv);
|
//MPI_Init(&argc, &argv);
|
||||||
int provided_thread_support;
|
int provided_thread_support;
|
||||||
MPI_Init_thread
|
MPI_Init_thread
|
||||||
@ -92,8 +151,7 @@ bool Foam::UPstream::init(int& argc, char**& argv)
|
|||||||
if (numprocs <= 1)
|
if (numprocs <= 1)
|
||||||
{
|
{
|
||||||
FatalErrorInFunction
|
FatalErrorInFunction
|
||||||
<< "bool IPstream::init(int& argc, char**& argv) : "
|
<< "attempt to run parallel on 1 processor"
|
||||||
"attempt to run parallel on 1 processor"
|
|
||||||
<< Foam::abort(FatalError);
|
<< Foam::abort(FatalError);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -148,6 +206,26 @@ void Foam::UPstream::exit(int errnum)
|
|||||||
Pout<< "UPstream::exit." << endl;
|
Pout<< "UPstream::exit." << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int flag = 0;
|
||||||
|
|
||||||
|
MPI_Initialized(&flag);
|
||||||
|
if (!flag)
|
||||||
|
{
|
||||||
|
// Not initialized - just exit
|
||||||
|
::exit(errnum);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
MPI_Finalized(&flag);
|
||||||
|
if (flag)
|
||||||
|
{
|
||||||
|
// Already finalized
|
||||||
|
FatalErrorInFunction
|
||||||
|
<< "MPI was already finalized" << endl
|
||||||
|
<< Foam::abort(FatalError);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
#ifndef SGIMPI
|
#ifndef SGIMPI
|
||||||
{
|
{
|
||||||
int size;
|
int size;
|
||||||
|
|||||||
Reference in New Issue
Block a user