From 9c3bef5a9920b2847bd38ba8d7ce4edb55db4d04 Mon Sep 17 00:00:00 2001 From: Mark Olesen Date: Fri, 17 Nov 2017 11:29:26 +0100 Subject: [PATCH] ENH: improve robustness of MPI start/stop - warn or fatal if Pstream::init or Pstream::exit are called multiple times. - additional Pstream::initNull method as failsafe to initialize MPI when the underlying OpenFOAM process is not running in parallel but the application still needs MPI. - Pstream::exit() can now also be called without having used MPI::init(), which means it can be used to cleanup serial process or for applications that used the special purpose Pstream::initNull() mechanism. --- src/OpenFOAM/db/IOstreams/Pstreams/UPstream.H | 47 +++++++---- src/OpenFOAM/global/argList/argList.H | 4 +- src/OpenFOAM/global/argList/parRun.H | 10 ++- src/Pstream/dummy/UPstream.C | 20 ++++- src/Pstream/mpi/UPstream.C | 82 ++++++++++++++++++- 5 files changed, 136 insertions(+), 27 deletions(-) diff --git a/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.H b/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.H index 0b936c9f7e..4993ed9d12 100644 --- a/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.H +++ b/src/OpenFOAM/db/IOstreams/Pstreams/UPstream.H @@ -69,6 +69,7 @@ public: nonBlocking }; + //- Names of the communication types static const Enum commsTypeNames; // Public classes @@ -87,8 +88,8 @@ public: //- procIDs of all processors below (so not just directly below) labelList allBelow_; - //- procIDs of all processors not below. (inverse set of - // allBelow_ and minus myProcNo) + //- procIDs of all processors not below. + // (inverse set of allBelow_ and minus myProcNo) labelList allNotBelow_; @@ -102,10 +103,10 @@ public: //- Construct from components commsStruct ( - const label, - const labelList&, - const labelList&, - const labelList& + const label above, + const labelList& below, + const labelList& allBelow, + const labelList& allNotBelow ); //- Construct from components; construct allNotBelow_ @@ -113,9 +114,9 @@ public: ( const label nProcs, const label myProcID, - const label, - const labelList&, - const labelList& + const label above, + const labelList& below, + const labelList& allBelow ); @@ -255,12 +256,12 @@ public: // Static data //- Should compact transfer be used in which floats replace doubles - // reducing the bandwidth requirement at the expense of some loss - // in accuracy + //- reducing the bandwidth requirement at the expense of some loss + //- in accuracy static bool floatTransfer; //- Number of processors at which the sum algorithm changes from linear - // to tree + //- to tree static int nProcsSimpleSum; //- Default commsType @@ -344,15 +345,15 @@ public: }; //- Return physical processor number (i.e. processor number in - // worldComm) given communicator and procssor + //- worldComm) given communicator and procssor static int baseProcNo(const label myComm, const int procID); //- Return processor number in communicator (given physical processor - // number) (= reverse of baseProcNo) + //- number) (= reverse of baseProcNo) static label procNo(const label comm, const int baseProcID); //- Return processor number in communicator (given processor number - // and communicator) + //- and communicator) static label procNo ( const label myComm, @@ -361,13 +362,23 @@ public: ); //- Add the valid option this type of communications library - // adds/requires on the command line + //- adds/requires on the command line static void addValidParOptions(HashTable& validParOptions); //- Initialisation function called from main - // Spawns slave processes and initialises inter-communication + // Spawns slave processes and initialises inter-communication. + // \note warns if MPI has already been initialized. + // Fatal if MPI has already been finalized. static bool init(int& argc, char**& argv); + //- Special purpose initialisation function. + // Performs a basic MPI_Init without any other setup. + // Only used for applications that need MPI communication when + // OpenFOAM is running in a non-parallel mode. + // \note Behaves as a no-op if MPI has already been initialized. + // Fatal if MPI has already been finalized. + static bool initNull(); + // Non-blocking comms //- Get number of outstanding requests @@ -401,7 +412,7 @@ public: } //- Set data for parallel running. Special case nProcs=0 to switch off - // parallel running + //- parallel running static void setParRun(const label nProcs); //- Number of processes in parallel run diff --git a/src/OpenFOAM/global/argList/argList.H b/src/OpenFOAM/global/argList/argList.H index 171bdb7ffb..ea8368b5a6 100644 --- a/src/OpenFOAM/global/argList/argList.H +++ b/src/OpenFOAM/global/argList/argList.H @@ -122,8 +122,8 @@ class argList //- Track enabled/disabled checking of processor directories state static bool checkProcessorDirectories_; - //- Switch on/off parallel mode. Has to be first to be constructed - // so destructor is done last. + //- Switch on/off parallel mode. + // Must be first to be constructed so destructor is done last. ParRunControl parRunControl_; //- The arguments after removing known options diff --git a/src/OpenFOAM/global/argList/parRun.H b/src/OpenFOAM/global/argList/parRun.H index 724c166cf0..6fa5f1e7d6 100644 --- a/src/OpenFOAM/global/argList/parRun.H +++ b/src/OpenFOAM/global/argList/parRun.H @@ -27,6 +27,9 @@ Class Description Helper class for initializing parallel jobs from the command arguments. + This class also handles cleanup of parallel or serial jobs in a + uniform manner. + \*---------------------------------------------------------------------------*/ #ifndef parRun_H @@ -60,10 +63,13 @@ public: if (RunPar) { Info<< "Finalising parallel run" << endl; - Pstream::exit(0); } + + // Handles serial and parallel modes. + Pstream::exit(0); } + //- Initialize Pstream for a parallel run void runPar(int& argc, char**& argv) { RunPar = true; @@ -75,6 +81,8 @@ public: } } + + //- Is this a parallel run? bool parRun() const { return RunPar; diff --git a/src/Pstream/dummy/UPstream.C b/src/Pstream/dummy/UPstream.C index 4d14c2727a..832c6afec4 100644 --- a/src/Pstream/dummy/UPstream.C +++ b/src/Pstream/dummy/UPstream.C @@ -25,18 +25,28 @@ License #include "Pstream.H" #include "PstreamReduceOps.H" +#include "OSspecific.H" // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * // void Foam::UPstream::addValidParOptions(HashTable& validParOptions) {} +bool Foam::UPstream::initNull() +{ + WarningInFunction + << "The dummy Pstream library cannot be used in parallel mode" + << endl; + + return false; +} + bool Foam::UPstream::init(int& argc, char**& argv) { FatalErrorInFunction - << "Trying to use the dummy Pstream library." << nl - << "This dummy library cannot be used in parallel mode" + << "The dummy Pstream library cannot be used in parallel mode" + << endl << Foam::exit(FatalError); return false; @@ -45,13 +55,15 @@ bool Foam::UPstream::init(int& argc, char**& argv) void Foam::UPstream::exit(int errnum) { - NotImplemented; + // No MPI - just exit + ::exit(errnum); } void Foam::UPstream::abort() { - NotImplemented; + // No MPI - just abort + ::abort(); } diff --git a/src/Pstream/mpi/UPstream.C b/src/Pstream/mpi/UPstream.C index 501dec2241..c29c6f742d 100644 --- a/src/Pstream/mpi/UPstream.C +++ b/src/Pstream/mpi/UPstream.C @@ -66,8 +66,67 @@ void Foam::UPstream::addValidParOptions(HashTable& validParOptions) } +bool Foam::UPstream::initNull() +{ + int flag = 0; + + MPI_Finalized(&flag); + if (flag) + { + // Already finalized - this is an error + FatalErrorInFunction + << "MPI was already finalized - cannot perform MPI_Init" << endl + << Foam::abort(FatalError); + + return false; + } + + MPI_Initialized(&flag); + if (flag) + { + // Already initialized - nothing to do + return true; + } + + MPI_Init_thread + ( + nullptr, // argc + nullptr, // argv + MPI_THREAD_SINGLE, + &flag // provided_thread_support + ); + + return true; +} + + bool Foam::UPstream::init(int& argc, char**& argv) { + int flag = 0; + + MPI_Finalized(&flag); + if (flag) + { + // Already finalized - this is an error + FatalErrorInFunction + << "MPI was already finalized - cannot perform MPI_Init" << endl + << Foam::abort(FatalError); + + return false; + } + + MPI_Initialized(&flag); + if (flag) + { + // Already initialized - issue warning and skip the rest + WarningInFunction + << "MPI was already initialized - cannot perform MPI_Init" << nl + << "This could indicate an application programming error!" << endl; + + return true; + } + + //MPI_Init(&argc, &argv); int provided_thread_support; MPI_Init_thread @@ -92,8 +151,7 @@ bool Foam::UPstream::init(int& argc, char**& argv) if (numprocs <= 1) { FatalErrorInFunction - << "bool IPstream::init(int& argc, char**& argv) : " - "attempt to run parallel on 1 processor" + << "attempt to run parallel on 1 processor" << Foam::abort(FatalError); } @@ -148,6 +206,26 @@ void Foam::UPstream::exit(int errnum) Pout<< "UPstream::exit." << endl; } + int flag = 0; + + MPI_Initialized(&flag); + if (!flag) + { + // Not initialized - just exit + ::exit(errnum); + return; + } + + MPI_Finalized(&flag); + if (flag) + { + // Already finalized + FatalErrorInFunction + << "MPI was already finalized" << endl + << Foam::abort(FatalError); + return; + } + #ifndef SGIMPI { int size;