add option to synchronize fix halt stop requests across multi-partition runs
This commit is contained in:
@ -25,13 +25,14 @@ Syntax
|
|||||||
* operator = "<" or "<=" or ">" or ">=" or "==" or "!=" or "\|\^"
|
* operator = "<" or "<=" or ">" or ">=" or "==" or "!=" or "\|\^"
|
||||||
* avalue = numeric value to compare attribute to
|
* avalue = numeric value to compare attribute to
|
||||||
* zero or more keyword/value pairs may be appended
|
* zero or more keyword/value pairs may be appended
|
||||||
* keyword = *error* or *message* or *path*
|
* keyword = *error* or *message* or *path* or *universe*
|
||||||
|
|
||||||
.. parsed-literal::
|
.. parsed-literal::
|
||||||
|
|
||||||
*error* value = *hard* or *soft* or *continue*
|
*error* value = *hard* or *soft* or *continue*
|
||||||
*message* value = *yes* or *no*
|
*message* value = *yes* or *no*
|
||||||
*path* value = path to check for free space (may be in quotes)
|
*path* value = path to check for free space (may be in quotes)
|
||||||
|
*universe* value = *yes* or *no*
|
||||||
|
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
@ -40,8 +41,10 @@ Examples
|
|||||||
.. code-block:: LAMMPS
|
.. code-block:: LAMMPS
|
||||||
|
|
||||||
fix 10 all halt 1 bondmax > 1.5
|
fix 10 all halt 1 bondmax > 1.5
|
||||||
fix 10 all halt 10 v_myCheck != 0 error soft
|
fix 10 all halt 10 v_myCheck != 0 error soft message no
|
||||||
fix 10 all halt 100 diskfree < 100000.0 path "dump storage/."
|
fix 10 all halt 100 diskfree < 100000.0 path "dump storage/."
|
||||||
|
fix 2 all halt 100 v_curtime > ${maxtime} universe yes
|
||||||
|
|
||||||
|
|
||||||
Description
|
Description
|
||||||
"""""""""""
|
"""""""""""
|
||||||
@ -162,12 +165,21 @@ is printed; the run simply exits. The latter may be desirable for
|
|||||||
post-processing tools that extract thermodynamic information from log
|
post-processing tools that extract thermodynamic information from log
|
||||||
files.
|
files.
|
||||||
|
|
||||||
|
The optional *universe* keyword determines whether the halt request
|
||||||
|
should be synchronized across the partitions of a :doc:`multi-partition
|
||||||
|
run <Run_options>`. If *universe* is set to yes, fix halt will check if
|
||||||
|
there is a specific message received from any of the other partitions
|
||||||
|
requesting to stop the run on this partition as well. Consequently, if
|
||||||
|
fix halt determines to halt the simulation, the fix will send messages
|
||||||
|
to all other partitions so they stop their runs, too.
|
||||||
|
|
||||||
Restart, fix_modify, output, run start/stop, minimize info
|
Restart, fix_modify, output, run start/stop, minimize info
|
||||||
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
||||||
|
|
||||||
No information about this fix is written to :doc:`binary restart files <restart>`. None of the :doc:`fix_modify <fix_modify>` options
|
No information about this fix is written to :doc:`binary restart files
|
||||||
are relevant to this fix. No global or per-atom quantities are stored
|
<restart>`. None of the :doc:`fix_modify <fix_modify>` options are
|
||||||
by this fix for access by various :doc:`output commands <Howto_output>`.
|
relevant to this fix. No global or per-atom quantities are stored by
|
||||||
|
this fix for access by various :doc:`output commands <Howto_output>`.
|
||||||
No parameter of this fix can be used with the *start/stop* keywords of
|
No parameter of this fix can be used with the *start/stop* keywords of
|
||||||
the :doc:`run <run>` command.
|
the :doc:`run <run>` command.
|
||||||
|
|
||||||
@ -183,4 +195,4 @@ Related commands
|
|||||||
Default
|
Default
|
||||||
"""""""
|
"""""""
|
||||||
|
|
||||||
The option defaults are error = soft, message = yes, and path = ".".
|
The option defaults are error = soft, message = yes, path = ".", and universe = no.
|
||||||
|
|||||||
@ -22,6 +22,7 @@
|
|||||||
#include "neighbor.h"
|
#include "neighbor.h"
|
||||||
#include "timer.h"
|
#include "timer.h"
|
||||||
#include "update.h"
|
#include "update.h"
|
||||||
|
#include "universe.h"
|
||||||
#include "variable.h"
|
#include "variable.h"
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
@ -34,6 +35,7 @@ enum { BONDMAX, TLIMIT, DISKFREE, VARIABLE };
|
|||||||
enum { LT, LE, GT, GE, EQ, NEQ, XOR };
|
enum { LT, LE, GT, GE, EQ, NEQ, XOR };
|
||||||
enum { HARD, SOFT, CONTINUE };
|
enum { HARD, SOFT, CONTINUE };
|
||||||
enum { NOMSG = 0, YESMSG = 1 };
|
enum { NOMSG = 0, YESMSG = 1 };
|
||||||
|
static constexpr int UTAG = 999;
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
@ -42,11 +44,10 @@ FixHalt::FixHalt(LAMMPS *lmp, int narg, char **arg) :
|
|||||||
{
|
{
|
||||||
if (narg < 7) utils::missing_cmd_args(FLERR, "fix halt", error);
|
if (narg < 7) utils::missing_cmd_args(FLERR, "fix halt", error);
|
||||||
nevery = utils::inumeric(FLERR, arg[3], false, lmp);
|
nevery = utils::inumeric(FLERR, arg[3], false, lmp);
|
||||||
if (nevery <= 0) error->all(FLERR, "Illegal fix halt command: nevery must be > 0");
|
if (nevery <= 0) error->all(FLERR, 3, "Illegal fix halt command: nevery must be > 0");
|
||||||
|
|
||||||
// comparison args
|
// comparison args
|
||||||
|
|
||||||
idvar = nullptr;
|
|
||||||
int iarg = 4;
|
int iarg = 4;
|
||||||
|
|
||||||
if (strcmp(arg[iarg], "tlimit") == 0) {
|
if (strcmp(arg[iarg], "tlimit") == 0) {
|
||||||
@ -56,20 +57,22 @@ FixHalt::FixHalt(LAMMPS *lmp, int narg, char **arg) :
|
|||||||
dlimit_path = utils::strdup(".");
|
dlimit_path = utils::strdup(".");
|
||||||
} else if (strcmp(arg[iarg], "bondmax") == 0) {
|
} else if (strcmp(arg[iarg], "bondmax") == 0) {
|
||||||
attribute = BONDMAX;
|
attribute = BONDMAX;
|
||||||
} else {
|
} else if (utils::strmatch(arg[iarg], "^v_")) {
|
||||||
ArgInfo argi(arg[iarg], ArgInfo::VARIABLE);
|
ArgInfo argi(arg[iarg], ArgInfo::VARIABLE);
|
||||||
|
|
||||||
if ((argi.get_type() == ArgInfo::UNKNOWN) || (argi.get_type() == ArgInfo::NONE) ||
|
if ((argi.get_type() == ArgInfo::UNKNOWN) || (argi.get_type() == ArgInfo::NONE) ||
|
||||||
(argi.get_dim() != 0))
|
(argi.get_dim() != 0))
|
||||||
error->all(FLERR, "Invalid fix halt attribute {}", arg[iarg]);
|
error->all(FLERR, iarg, "Invalid fix halt attribute {}", arg[iarg]);
|
||||||
|
|
||||||
attribute = VARIABLE;
|
attribute = VARIABLE;
|
||||||
idvar = argi.copy_name();
|
idvar = argi.copy_name();
|
||||||
ivar = input->variable->find(idvar);
|
ivar = input->variable->find(idvar);
|
||||||
|
|
||||||
if (ivar < 0) error->all(FLERR, "Could not find fix halt variable name");
|
if (ivar < 0) error->all(FLERR, iarg, "Could not find fix halt variable name {}", idvar);
|
||||||
if (input->variable->equalstyle(ivar) == 0)
|
if (input->variable->equalstyle(ivar) == 0)
|
||||||
error->all(FLERR, "Fix halt variable is not equal-style variable");
|
error->all(FLERR, iarg, "Fix halt variable is not equal-style variable");
|
||||||
|
} else {
|
||||||
|
error->all(FLERR, iarg, "Unknown fix halt keyword {}", arg[iarg]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// clang-format off
|
// clang-format off
|
||||||
@ -90,6 +93,7 @@ FixHalt::FixHalt(LAMMPS *lmp, int narg, char **arg) :
|
|||||||
|
|
||||||
eflag = SOFT;
|
eflag = SOFT;
|
||||||
msgflag = YESMSG;
|
msgflag = YESMSG;
|
||||||
|
uflag = NOMSG;
|
||||||
++iarg;
|
++iarg;
|
||||||
while (iarg < narg) {
|
while (iarg < narg) {
|
||||||
if (strcmp(arg[iarg], "error") == 0) {
|
if (strcmp(arg[iarg], "error") == 0) {
|
||||||
@ -103,6 +107,10 @@ FixHalt::FixHalt(LAMMPS *lmp, int narg, char **arg) :
|
|||||||
if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix halt message", error);
|
if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix halt message", error);
|
||||||
msgflag = utils::logical(FLERR, arg[iarg + 1], false, lmp);
|
msgflag = utils::logical(FLERR, arg[iarg + 1], false, lmp);
|
||||||
iarg += 2;
|
iarg += 2;
|
||||||
|
} else if (strcmp(arg[iarg], "universe") == 0) {
|
||||||
|
if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix halt universe", error);
|
||||||
|
uflag = utils::logical(FLERR, arg[iarg + 1], false, lmp);
|
||||||
|
iarg += 2;
|
||||||
} else if (strcmp(arg[iarg], "path") == 0) {
|
} else if (strcmp(arg[iarg], "path") == 0) {
|
||||||
if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix halt error", error);
|
if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix halt error", error);
|
||||||
++iarg;
|
++iarg;
|
||||||
@ -189,6 +197,50 @@ void FixHalt::min_post_force(int /* vflag */)
|
|||||||
|
|
||||||
void FixHalt::end_of_step()
|
void FixHalt::end_of_step()
|
||||||
{
|
{
|
||||||
|
// check if another partition has exited and we need to exit, too.
|
||||||
|
|
||||||
|
if (uflag) {
|
||||||
|
MPI_Status status;
|
||||||
|
int partition = -1;
|
||||||
|
int flag = 0;
|
||||||
|
if (comm->me == 0) {
|
||||||
|
|
||||||
|
// probe if any stop request from another partition is pending
|
||||||
|
|
||||||
|
MPI_Iprobe(MPI_ANY_SOURCE, UTAG, universe->uworld, &flag, &status);
|
||||||
|
|
||||||
|
if (flag) {
|
||||||
|
// determine which partition sent the stop request and receive the message
|
||||||
|
for (int i = 0; i < universe->nworlds; ++i)
|
||||||
|
if (universe->root_proc[i] == status.MPI_SOURCE) partition = i + 1;
|
||||||
|
|
||||||
|
MPI_Recv(&flag, 1, MPI_INT, status.MPI_SOURCE, UTAG, universe->uworld, MPI_STATUS_IGNORE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// broadcast stop request partition to all processes in our partition
|
||||||
|
|
||||||
|
MPI_Bcast(&partition, 1, MPI_INT, 0, world);
|
||||||
|
|
||||||
|
// exit request pending handle the same as below
|
||||||
|
|
||||||
|
if (partition > 0) {
|
||||||
|
|
||||||
|
// hard halt -> exit LAMMPS
|
||||||
|
// soft/continue halt -> trigger timer to break from run loop
|
||||||
|
// print message with ID of fix halt in case multiple instances
|
||||||
|
|
||||||
|
auto message = fmt::format("Received universe halt request from partition {} for fix-id {} on step {}",
|
||||||
|
partition, id, update->ntimestep);
|
||||||
|
if (eflag == HARD) {
|
||||||
|
error->all(FLERR, message);
|
||||||
|
} else if ((eflag == SOFT) || (eflag == CONTINUE)) {
|
||||||
|
if ((comm->me == 0) && (msgflag == YESMSG)) error->message(FLERR, message);
|
||||||
|
timer->force_timeout();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// variable evaluation may invoke computes so wrap with clear/add
|
// variable evaluation may invoke computes so wrap with clear/add
|
||||||
|
|
||||||
double attvalue;
|
double attvalue;
|
||||||
@ -228,6 +280,22 @@ void FixHalt::end_of_step()
|
|||||||
if ((attvalue == 0.0 && value == 0.0) || (attvalue != 0.0 && value != 0.0)) return;
|
if ((attvalue == 0.0 && value == 0.0) || (attvalue != 0.0 && value != 0.0)) return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// send message to all other root processes to trigger exit across universe, if requested
|
||||||
|
|
||||||
|
if (uflag && (comm->me == 0)) {
|
||||||
|
MPI_Request *req = new MPI_Request[universe->nworlds];
|
||||||
|
for (int i = 0; i < universe->nworlds; ++i) {
|
||||||
|
if (universe->me == universe->root_proc[i]) continue;
|
||||||
|
MPI_Isend(&eflag, 1, MPI_INT, universe->root_proc[i], UTAG, universe->uworld, req + i);
|
||||||
|
}
|
||||||
|
|
||||||
|
// wait for all sends to complete, so MPI_Finalize() will be happy
|
||||||
|
for (int i = 0; i < universe->nworlds; ++i) {
|
||||||
|
if (universe->me == universe->root_proc[i]) continue;
|
||||||
|
MPI_Wait(req + i, MPI_STATUS_IGNORE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// hard halt -> exit LAMMPS
|
// hard halt -> exit LAMMPS
|
||||||
// soft/continue halt -> trigger timer to break from run loop
|
// soft/continue halt -> trigger timer to break from run loop
|
||||||
// print message with ID of fix halt in case multiple instances
|
// print message with ID of fix halt in case multiple instances
|
||||||
|
|||||||
@ -35,7 +35,7 @@ class FixHalt : public Fix {
|
|||||||
void post_run() override;
|
void post_run() override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int attribute, operation, eflag, msgflag, ivar;
|
int attribute, operation, eflag, msgflag, ivar, uflag;
|
||||||
bigint nextstep, thisstep;
|
bigint nextstep, thisstep;
|
||||||
double value, tratio;
|
double value, tratio;
|
||||||
char *idvar;
|
char *idvar;
|
||||||
|
|||||||
Reference in New Issue
Block a user