add option to synchronize fix halt stop requests across multi-partition runs
This commit is contained in:
@ -25,13 +25,14 @@ Syntax
|
||||
* operator = "<" or "<=" or ">" or ">=" or "==" or "!=" or "\|\^"
|
||||
* avalue = numeric value to compare attribute to
|
||||
* zero or more keyword/value pairs may be appended
|
||||
* keyword = *error* or *message* or *path*
|
||||
* keyword = *error* or *message* or *path* or *universe*
|
||||
|
||||
.. parsed-literal::
|
||||
|
||||
*error* value = *hard* or *soft* or *continue*
|
||||
*message* value = *yes* or *no*
|
||||
*path* value = path to check for free space (may be in quotes)
|
||||
*universe* value = *yes* or *no*
|
||||
|
||||
|
||||
Examples
|
||||
@ -40,8 +41,10 @@ Examples
|
||||
.. code-block:: LAMMPS
|
||||
|
||||
fix 10 all halt 1 bondmax > 1.5
|
||||
fix 10 all halt 10 v_myCheck != 0 error soft
|
||||
fix 10 all halt 10 v_myCheck != 0 error soft message no
|
||||
fix 10 all halt 100 diskfree < 100000.0 path "dump storage/."
|
||||
fix 2 all halt 100 v_curtime > ${maxtime} universe yes
|
||||
|
||||
|
||||
Description
|
||||
"""""""""""
|
||||
@ -162,12 +165,21 @@ is printed; the run simply exits. The latter may be desirable for
|
||||
post-processing tools that extract thermodynamic information from log
|
||||
files.
|
||||
|
||||
The optional *universe* keyword determines whether the halt request
|
||||
should be synchronized across the partitions of a :doc:`multi-partition
|
||||
run <Run_options>`. If *universe* is set to yes, fix halt will check if
|
||||
there is a specific message received from any of the other partitions
|
||||
requesting to stop the run on this partition as well. Consequently, if
|
||||
fix halt determines to halt the simulation, the fix will send messages
|
||||
to all other partitions so they stop their runs, too.
|
||||
|
||||
Restart, fix_modify, output, run start/stop, minimize info
|
||||
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
||||
|
||||
No information about this fix is written to :doc:`binary restart files <restart>`. None of the :doc:`fix_modify <fix_modify>` options
|
||||
are relevant to this fix. No global or per-atom quantities are stored
|
||||
by this fix for access by various :doc:`output commands <Howto_output>`.
|
||||
No information about this fix is written to :doc:`binary restart files
|
||||
<restart>`. None of the :doc:`fix_modify <fix_modify>` options are
|
||||
relevant to this fix. No global or per-atom quantities are stored by
|
||||
this fix for access by various :doc:`output commands <Howto_output>`.
|
||||
No parameter of this fix can be used with the *start/stop* keywords of
|
||||
the :doc:`run <run>` command.
|
||||
|
||||
@ -183,4 +195,4 @@ Related commands
|
||||
Default
|
||||
"""""""
|
||||
|
||||
The option defaults are error = soft, message = yes, and path = ".".
|
||||
The option defaults are error = soft, message = yes, path = ".", and universe = no.
|
||||
|
||||
@ -22,6 +22,7 @@
|
||||
#include "neighbor.h"
|
||||
#include "timer.h"
|
||||
#include "update.h"
|
||||
#include "universe.h"
|
||||
#include "variable.h"
|
||||
|
||||
#include <cmath>
|
||||
@ -34,6 +35,7 @@ enum { BONDMAX, TLIMIT, DISKFREE, VARIABLE };
|
||||
enum { LT, LE, GT, GE, EQ, NEQ, XOR };
|
||||
enum { HARD, SOFT, CONTINUE };
|
||||
enum { NOMSG = 0, YESMSG = 1 };
|
||||
static constexpr int UTAG = 999;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
@ -42,11 +44,10 @@ FixHalt::FixHalt(LAMMPS *lmp, int narg, char **arg) :
|
||||
{
|
||||
if (narg < 7) utils::missing_cmd_args(FLERR, "fix halt", error);
|
||||
nevery = utils::inumeric(FLERR, arg[3], false, lmp);
|
||||
if (nevery <= 0) error->all(FLERR, "Illegal fix halt command: nevery must be > 0");
|
||||
if (nevery <= 0) error->all(FLERR, 3, "Illegal fix halt command: nevery must be > 0");
|
||||
|
||||
// comparison args
|
||||
|
||||
idvar = nullptr;
|
||||
int iarg = 4;
|
||||
|
||||
if (strcmp(arg[iarg], "tlimit") == 0) {
|
||||
@ -56,20 +57,22 @@ FixHalt::FixHalt(LAMMPS *lmp, int narg, char **arg) :
|
||||
dlimit_path = utils::strdup(".");
|
||||
} else if (strcmp(arg[iarg], "bondmax") == 0) {
|
||||
attribute = BONDMAX;
|
||||
} else {
|
||||
} else if (utils::strmatch(arg[iarg], "^v_")) {
|
||||
ArgInfo argi(arg[iarg], ArgInfo::VARIABLE);
|
||||
|
||||
if ((argi.get_type() == ArgInfo::UNKNOWN) || (argi.get_type() == ArgInfo::NONE) ||
|
||||
(argi.get_dim() != 0))
|
||||
error->all(FLERR, "Invalid fix halt attribute {}", arg[iarg]);
|
||||
error->all(FLERR, iarg, "Invalid fix halt attribute {}", arg[iarg]);
|
||||
|
||||
attribute = VARIABLE;
|
||||
idvar = argi.copy_name();
|
||||
ivar = input->variable->find(idvar);
|
||||
|
||||
if (ivar < 0) error->all(FLERR, "Could not find fix halt variable name");
|
||||
if (ivar < 0) error->all(FLERR, iarg, "Could not find fix halt variable name {}", idvar);
|
||||
if (input->variable->equalstyle(ivar) == 0)
|
||||
error->all(FLERR, "Fix halt variable is not equal-style variable");
|
||||
error->all(FLERR, iarg, "Fix halt variable is not equal-style variable");
|
||||
} else {
|
||||
error->all(FLERR, iarg, "Unknown fix halt keyword {}", arg[iarg]);
|
||||
}
|
||||
|
||||
// clang-format off
|
||||
@ -90,6 +93,7 @@ FixHalt::FixHalt(LAMMPS *lmp, int narg, char **arg) :
|
||||
|
||||
eflag = SOFT;
|
||||
msgflag = YESMSG;
|
||||
uflag = NOMSG;
|
||||
++iarg;
|
||||
while (iarg < narg) {
|
||||
if (strcmp(arg[iarg], "error") == 0) {
|
||||
@ -103,6 +107,10 @@ FixHalt::FixHalt(LAMMPS *lmp, int narg, char **arg) :
|
||||
if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix halt message", error);
|
||||
msgflag = utils::logical(FLERR, arg[iarg + 1], false, lmp);
|
||||
iarg += 2;
|
||||
} else if (strcmp(arg[iarg], "universe") == 0) {
|
||||
if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix halt universe", error);
|
||||
uflag = utils::logical(FLERR, arg[iarg + 1], false, lmp);
|
||||
iarg += 2;
|
||||
} else if (strcmp(arg[iarg], "path") == 0) {
|
||||
if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix halt error", error);
|
||||
++iarg;
|
||||
@ -189,6 +197,50 @@ void FixHalt::min_post_force(int /* vflag */)
|
||||
|
||||
void FixHalt::end_of_step()
|
||||
{
|
||||
// check if another partition has exited and we need to exit, too.
|
||||
|
||||
if (uflag) {
|
||||
MPI_Status status;
|
||||
int partition = -1;
|
||||
int flag = 0;
|
||||
if (comm->me == 0) {
|
||||
|
||||
// probe if any stop request from another partition is pending
|
||||
|
||||
MPI_Iprobe(MPI_ANY_SOURCE, UTAG, universe->uworld, &flag, &status);
|
||||
|
||||
if (flag) {
|
||||
// determine which partition sent the stop request and receive the message
|
||||
for (int i = 0; i < universe->nworlds; ++i)
|
||||
if (universe->root_proc[i] == status.MPI_SOURCE) partition = i + 1;
|
||||
|
||||
MPI_Recv(&flag, 1, MPI_INT, status.MPI_SOURCE, UTAG, universe->uworld, MPI_STATUS_IGNORE);
|
||||
}
|
||||
}
|
||||
|
||||
// broadcast stop request partition to all processes in our partition
|
||||
|
||||
MPI_Bcast(&partition, 1, MPI_INT, 0, world);
|
||||
|
||||
// exit request pending handle the same as below
|
||||
|
||||
if (partition > 0) {
|
||||
|
||||
// hard halt -> exit LAMMPS
|
||||
// soft/continue halt -> trigger timer to break from run loop
|
||||
// print message with ID of fix halt in case multiple instances
|
||||
|
||||
auto message = fmt::format("Received universe halt request from partition {} for fix-id {} on step {}",
|
||||
partition, id, update->ntimestep);
|
||||
if (eflag == HARD) {
|
||||
error->all(FLERR, message);
|
||||
} else if ((eflag == SOFT) || (eflag == CONTINUE)) {
|
||||
if ((comm->me == 0) && (msgflag == YESMSG)) error->message(FLERR, message);
|
||||
timer->force_timeout();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// variable evaluation may invoke computes so wrap with clear/add
|
||||
|
||||
double attvalue;
|
||||
@ -228,6 +280,22 @@ void FixHalt::end_of_step()
|
||||
if ((attvalue == 0.0 && value == 0.0) || (attvalue != 0.0 && value != 0.0)) return;
|
||||
}
|
||||
|
||||
// send message to all other root processes to trigger exit across universe, if requested
|
||||
|
||||
if (uflag && (comm->me == 0)) {
|
||||
MPI_Request *req = new MPI_Request[universe->nworlds];
|
||||
for (int i = 0; i < universe->nworlds; ++i) {
|
||||
if (universe->me == universe->root_proc[i]) continue;
|
||||
MPI_Isend(&eflag, 1, MPI_INT, universe->root_proc[i], UTAG, universe->uworld, req + i);
|
||||
}
|
||||
|
||||
// wait for all sends to complete, so MPI_Finalize() will be happy
|
||||
for (int i = 0; i < universe->nworlds; ++i) {
|
||||
if (universe->me == universe->root_proc[i]) continue;
|
||||
MPI_Wait(req + i, MPI_STATUS_IGNORE);
|
||||
}
|
||||
}
|
||||
|
||||
// hard halt -> exit LAMMPS
|
||||
// soft/continue halt -> trigger timer to break from run loop
|
||||
// print message with ID of fix halt in case multiple instances
|
||||
|
||||
@ -35,7 +35,7 @@ class FixHalt : public Fix {
|
||||
void post_run() override;
|
||||
|
||||
private:
|
||||
int attribute, operation, eflag, msgflag, ivar;
|
||||
int attribute, operation, eflag, msgflag, ivar, uflag;
|
||||
bigint nextstep, thisstep;
|
||||
double value, tratio;
|
||||
char *idvar;
|
||||
|
||||
Reference in New Issue
Block a user