ENH: improve robustness of MPI start/stop

- warn or fatal if Pstream::init or Pstream::exit are called multiple
  times.

- additional Pstream::initNull method as failsafe to initialize MPI
  when the underlying OpenFOAM process is not running in parallel but
  the application still needs MPI.

- Pstream::exit() can now also be called without having used MPI::init(),
  which means it can be used to cleanup serial process or for
  applications that used the special purpose Pstream::initNull()
  mechanism.
This commit is contained in:
Mark Olesen
2017-11-17 11:29:26 +01:00
parent 00b1ecad60
commit 9c3bef5a99
5 changed files with 136 additions and 27 deletions

View File

@ -69,6 +69,7 @@ public:
nonBlocking nonBlocking
}; };
//- Names of the communication types
static const Enum<commsTypes> commsTypeNames; static const Enum<commsTypes> commsTypeNames;
// Public classes // Public classes
@ -87,8 +88,8 @@ public:
//- procIDs of all processors below (so not just directly below) //- procIDs of all processors below (so not just directly below)
labelList allBelow_; labelList allBelow_;
//- procIDs of all processors not below. (inverse set of //- procIDs of all processors not below.
// allBelow_ and minus myProcNo) // (inverse set of allBelow_ and minus myProcNo)
labelList allNotBelow_; labelList allNotBelow_;
@ -102,10 +103,10 @@ public:
//- Construct from components //- Construct from components
commsStruct commsStruct
( (
const label, const label above,
const labelList&, const labelList& below,
const labelList&, const labelList& allBelow,
const labelList& const labelList& allNotBelow
); );
//- Construct from components; construct allNotBelow_ //- Construct from components; construct allNotBelow_
@ -113,9 +114,9 @@ public:
( (
const label nProcs, const label nProcs,
const label myProcID, const label myProcID,
const label, const label above,
const labelList&, const labelList& below,
const labelList& const labelList& allBelow
); );
@ -255,12 +256,12 @@ public:
// Static data // Static data
//- Should compact transfer be used in which floats replace doubles //- Should compact transfer be used in which floats replace doubles
// reducing the bandwidth requirement at the expense of some loss //- reducing the bandwidth requirement at the expense of some loss
// in accuracy //- in accuracy
static bool floatTransfer; static bool floatTransfer;
//- Number of processors at which the sum algorithm changes from linear //- Number of processors at which the sum algorithm changes from linear
// to tree //- to tree
static int nProcsSimpleSum; static int nProcsSimpleSum;
//- Default commsType //- Default commsType
@ -344,15 +345,15 @@ public:
}; };
//- Return physical processor number (i.e. processor number in //- Return physical processor number (i.e. processor number in
// worldComm) given communicator and procssor //- worldComm) given communicator and procssor
static int baseProcNo(const label myComm, const int procID); static int baseProcNo(const label myComm, const int procID);
//- Return processor number in communicator (given physical processor //- Return processor number in communicator (given physical processor
// number) (= reverse of baseProcNo) //- number) (= reverse of baseProcNo)
static label procNo(const label comm, const int baseProcID); static label procNo(const label comm, const int baseProcID);
//- Return processor number in communicator (given processor number //- Return processor number in communicator (given processor number
// and communicator) //- and communicator)
static label procNo static label procNo
( (
const label myComm, const label myComm,
@ -361,13 +362,23 @@ public:
); );
//- Add the valid option this type of communications library //- Add the valid option this type of communications library
// adds/requires on the command line //- adds/requires on the command line
static void addValidParOptions(HashTable<string>& validParOptions); static void addValidParOptions(HashTable<string>& validParOptions);
//- Initialisation function called from main //- Initialisation function called from main
// Spawns slave processes and initialises inter-communication // Spawns slave processes and initialises inter-communication.
// \note warns if MPI has already been initialized.
// Fatal if MPI has already been finalized.
static bool init(int& argc, char**& argv); static bool init(int& argc, char**& argv);
//- Special purpose initialisation function.
// Performs a basic MPI_Init without any other setup.
// Only used for applications that need MPI communication when
// OpenFOAM is running in a non-parallel mode.
// \note Behaves as a no-op if MPI has already been initialized.
// Fatal if MPI has already been finalized.
static bool initNull();
// Non-blocking comms // Non-blocking comms
//- Get number of outstanding requests //- Get number of outstanding requests
@ -401,7 +412,7 @@ public:
} }
//- Set data for parallel running. Special case nProcs=0 to switch off //- Set data for parallel running. Special case nProcs=0 to switch off
// parallel running //- parallel running
static void setParRun(const label nProcs); static void setParRun(const label nProcs);
//- Number of processes in parallel run //- Number of processes in parallel run

View File

@ -122,8 +122,8 @@ class argList
//- Track enabled/disabled checking of processor directories state //- Track enabled/disabled checking of processor directories state
static bool checkProcessorDirectories_; static bool checkProcessorDirectories_;
//- Switch on/off parallel mode. Has to be first to be constructed //- Switch on/off parallel mode.
// so destructor is done last. // Must be first to be constructed so destructor is done last.
ParRunControl parRunControl_; ParRunControl parRunControl_;
//- The arguments after removing known options //- The arguments after removing known options

View File

@ -27,6 +27,9 @@ Class
Description Description
Helper class for initializing parallel jobs from the command arguments. Helper class for initializing parallel jobs from the command arguments.
This class also handles cleanup of parallel or serial jobs in a
uniform manner.
\*---------------------------------------------------------------------------*/ \*---------------------------------------------------------------------------*/
#ifndef parRun_H #ifndef parRun_H
@ -60,10 +63,13 @@ public:
if (RunPar) if (RunPar)
{ {
Info<< "Finalising parallel run" << endl; Info<< "Finalising parallel run" << endl;
Pstream::exit(0);
}
} }
// Handles serial and parallel modes.
Pstream::exit(0);
}
//- Initialize Pstream for a parallel run
void runPar(int& argc, char**& argv) void runPar(int& argc, char**& argv)
{ {
RunPar = true; RunPar = true;
@ -75,6 +81,8 @@ public:
} }
} }
//- Is this a parallel run?
bool parRun() const bool parRun() const
{ {
return RunPar; return RunPar;

View File

@ -25,18 +25,28 @@ License
#include "Pstream.H" #include "Pstream.H"
#include "PstreamReduceOps.H" #include "PstreamReduceOps.H"
#include "OSspecific.H"
// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * //
void Foam::UPstream::addValidParOptions(HashTable<string>& validParOptions) void Foam::UPstream::addValidParOptions(HashTable<string>& validParOptions)
{} {}
bool Foam::UPstream::initNull()
{
WarningInFunction
<< "The dummy Pstream library cannot be used in parallel mode"
<< endl;
return false;
}
bool Foam::UPstream::init(int& argc, char**& argv) bool Foam::UPstream::init(int& argc, char**& argv)
{ {
FatalErrorInFunction FatalErrorInFunction
<< "Trying to use the dummy Pstream library." << nl << "The dummy Pstream library cannot be used in parallel mode"
<< "This dummy library cannot be used in parallel mode" << endl
<< Foam::exit(FatalError); << Foam::exit(FatalError);
return false; return false;
@ -45,13 +55,15 @@ bool Foam::UPstream::init(int& argc, char**& argv)
void Foam::UPstream::exit(int errnum) void Foam::UPstream::exit(int errnum)
{ {
NotImplemented; // No MPI - just exit
::exit(errnum);
} }
void Foam::UPstream::abort() void Foam::UPstream::abort()
{ {
NotImplemented; // No MPI - just abort
::abort();
} }

View File

@ -66,8 +66,67 @@ void Foam::UPstream::addValidParOptions(HashTable<string>& validParOptions)
} }
bool Foam::UPstream::initNull()
{
int flag = 0;
MPI_Finalized(&flag);
if (flag)
{
// Already finalized - this is an error
FatalErrorInFunction
<< "MPI was already finalized - cannot perform MPI_Init" << endl
<< Foam::abort(FatalError);
return false;
}
MPI_Initialized(&flag);
if (flag)
{
// Already initialized - nothing to do
return true;
}
MPI_Init_thread
(
nullptr, // argc
nullptr, // argv
MPI_THREAD_SINGLE,
&flag // provided_thread_support
);
return true;
}
bool Foam::UPstream::init(int& argc, char**& argv) bool Foam::UPstream::init(int& argc, char**& argv)
{ {
int flag = 0;
MPI_Finalized(&flag);
if (flag)
{
// Already finalized - this is an error
FatalErrorInFunction
<< "MPI was already finalized - cannot perform MPI_Init" << endl
<< Foam::abort(FatalError);
return false;
}
MPI_Initialized(&flag);
if (flag)
{
// Already initialized - issue warning and skip the rest
WarningInFunction
<< "MPI was already initialized - cannot perform MPI_Init" << nl
<< "This could indicate an application programming error!" << endl;
return true;
}
//MPI_Init(&argc, &argv); //MPI_Init(&argc, &argv);
int provided_thread_support; int provided_thread_support;
MPI_Init_thread MPI_Init_thread
@ -92,8 +151,7 @@ bool Foam::UPstream::init(int& argc, char**& argv)
if (numprocs <= 1) if (numprocs <= 1)
{ {
FatalErrorInFunction FatalErrorInFunction
<< "bool IPstream::init(int& argc, char**& argv) : " << "attempt to run parallel on 1 processor"
"attempt to run parallel on 1 processor"
<< Foam::abort(FatalError); << Foam::abort(FatalError);
} }
@ -148,6 +206,26 @@ void Foam::UPstream::exit(int errnum)
Pout<< "UPstream::exit." << endl; Pout<< "UPstream::exit." << endl;
} }
int flag = 0;
MPI_Initialized(&flag);
if (!flag)
{
// Not initialized - just exit
::exit(errnum);
return;
}
MPI_Finalized(&flag);
if (flag)
{
// Already finalized
FatalErrorInFunction
<< "MPI was already finalized" << endl
<< Foam::abort(FatalError);
return;
}
#ifndef SGIMPI #ifndef SGIMPI
{ {
int size; int size;