diff --git a/doc/src/fix_halt.rst b/doc/src/fix_halt.rst index 0bcf2fb5ea..f17cafc58c 100644 --- a/doc/src/fix_halt.rst +++ b/doc/src/fix_halt.rst @@ -25,13 +25,14 @@ Syntax * operator = "<" or "<=" or ">" or ">=" or "==" or "!=" or "\|\^" * avalue = numeric value to compare attribute to * zero or more keyword/value pairs may be appended -* keyword = *error* or *message* or *path* +* keyword = *error* or *message* or *path* or *universe* .. parsed-literal:: *error* value = *hard* or *soft* or *continue* *message* value = *yes* or *no* *path* value = path to check for free space (may be in quotes) + *universe* value = *yes* or *no* Examples @@ -40,8 +41,10 @@ Examples .. code-block:: LAMMPS fix 10 all halt 1 bondmax > 1.5 - fix 10 all halt 10 v_myCheck != 0 error soft + fix 10 all halt 10 v_myCheck != 0 error soft message no fix 10 all halt 100 diskfree < 100000.0 path "dump storage/." + fix 2 all halt 100 v_curtime > ${maxtime} universe yes + Description """"""""""" @@ -162,12 +165,21 @@ is printed; the run simply exits. The latter may be desirable for post-processing tools that extract thermodynamic information from log files. +The optional *universe* keyword determines whether the halt request +should be synchronized across the partitions of a :doc:`multi-partition +run `. If *universe* is set to yes, fix halt will check if +there is a specific message received from any of the other partitions +requesting to stop the run on this partition as well. Consequently, if +fix halt determines to halt the simulation, the fix will send messages +to all other partitions so they stop their runs, too. + Restart, fix_modify, output, run start/stop, minimize info """"""""""""""""""""""""""""""""""""""""""""""""""""""""""" -No information about this fix is written to :doc:`binary restart files `. None of the :doc:`fix_modify ` options -are relevant to this fix. No global or per-atom quantities are stored -by this fix for access by various :doc:`output commands `. +No information about this fix is written to :doc:`binary restart files +`. None of the :doc:`fix_modify ` options are +relevant to this fix. No global or per-atom quantities are stored by +this fix for access by various :doc:`output commands `. No parameter of this fix can be used with the *start/stop* keywords of the :doc:`run ` command. @@ -183,4 +195,4 @@ Related commands Default """"""" -The option defaults are error = soft, message = yes, and path = ".". +The option defaults are error = soft, message = yes, path = ".", and universe = no. diff --git a/src/fix_halt.cpp b/src/fix_halt.cpp index b34c79867f..a0047a3f7a 100644 --- a/src/fix_halt.cpp +++ b/src/fix_halt.cpp @@ -22,6 +22,7 @@ #include "neighbor.h" #include "timer.h" #include "update.h" +#include "universe.h" #include "variable.h" #include @@ -34,6 +35,7 @@ enum { BONDMAX, TLIMIT, DISKFREE, VARIABLE }; enum { LT, LE, GT, GE, EQ, NEQ, XOR }; enum { HARD, SOFT, CONTINUE }; enum { NOMSG = 0, YESMSG = 1 }; +static constexpr int UTAG = 999; /* ---------------------------------------------------------------------- */ @@ -42,11 +44,10 @@ FixHalt::FixHalt(LAMMPS *lmp, int narg, char **arg) : { if (narg < 7) utils::missing_cmd_args(FLERR, "fix halt", error); nevery = utils::inumeric(FLERR, arg[3], false, lmp); - if (nevery <= 0) error->all(FLERR, "Illegal fix halt command: nevery must be > 0"); + if (nevery <= 0) error->all(FLERR, 3, "Illegal fix halt command: nevery must be > 0"); // comparison args - idvar = nullptr; int iarg = 4; if (strcmp(arg[iarg], "tlimit") == 0) { @@ -56,20 +57,22 @@ FixHalt::FixHalt(LAMMPS *lmp, int narg, char **arg) : dlimit_path = utils::strdup("."); } else if (strcmp(arg[iarg], "bondmax") == 0) { attribute = BONDMAX; - } else { + } else if (utils::strmatch(arg[iarg], "^v_")) { ArgInfo argi(arg[iarg], ArgInfo::VARIABLE); if ((argi.get_type() == ArgInfo::UNKNOWN) || (argi.get_type() == ArgInfo::NONE) || (argi.get_dim() != 0)) - error->all(FLERR, "Invalid fix halt attribute {}", arg[iarg]); + error->all(FLERR, iarg, "Invalid fix halt attribute {}", arg[iarg]); attribute = VARIABLE; idvar = argi.copy_name(); ivar = input->variable->find(idvar); - if (ivar < 0) error->all(FLERR, "Could not find fix halt variable name"); + if (ivar < 0) error->all(FLERR, iarg, "Could not find fix halt variable name {}", idvar); if (input->variable->equalstyle(ivar) == 0) - error->all(FLERR, "Fix halt variable is not equal-style variable"); + error->all(FLERR, iarg, "Fix halt variable is not equal-style variable"); + } else { + error->all(FLERR, iarg, "Unknown fix halt keyword {}", arg[iarg]); } // clang-format off @@ -90,6 +93,7 @@ FixHalt::FixHalt(LAMMPS *lmp, int narg, char **arg) : eflag = SOFT; msgflag = YESMSG; + uflag = NOMSG; ++iarg; while (iarg < narg) { if (strcmp(arg[iarg], "error") == 0) { @@ -103,6 +107,10 @@ FixHalt::FixHalt(LAMMPS *lmp, int narg, char **arg) : if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix halt message", error); msgflag = utils::logical(FLERR, arg[iarg + 1], false, lmp); iarg += 2; + } else if (strcmp(arg[iarg], "universe") == 0) { + if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix halt universe", error); + uflag = utils::logical(FLERR, arg[iarg + 1], false, lmp); + iarg += 2; } else if (strcmp(arg[iarg], "path") == 0) { if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, "fix halt error", error); ++iarg; @@ -189,6 +197,50 @@ void FixHalt::min_post_force(int /* vflag */) void FixHalt::end_of_step() { + // check if another partition has exited and we need to exit, too. + + if (uflag) { + MPI_Status status; + int partition = -1; + int flag = 0; + if (comm->me == 0) { + + // probe if any stop request from another partition is pending + + MPI_Iprobe(MPI_ANY_SOURCE, UTAG, universe->uworld, &flag, &status); + + if (flag) { + // determine which partition sent the stop request and receive the message + for (int i = 0; i < universe->nworlds; ++i) + if (universe->root_proc[i] == status.MPI_SOURCE) partition = i + 1; + + MPI_Recv(&flag, 1, MPI_INT, status.MPI_SOURCE, UTAG, universe->uworld, MPI_STATUS_IGNORE); + } + } + + // broadcast stop request partition to all processes in our partition + + MPI_Bcast(&partition, 1, MPI_INT, 0, world); + + // exit request pending handle the same as below + + if (partition > 0) { + + // hard halt -> exit LAMMPS + // soft/continue halt -> trigger timer to break from run loop + // print message with ID of fix halt in case multiple instances + + auto message = fmt::format("Received universe halt request from partition {} for fix-id {} on step {}", + partition, id, update->ntimestep); + if (eflag == HARD) { + error->all(FLERR, message); + } else if ((eflag == SOFT) || (eflag == CONTINUE)) { + if ((comm->me == 0) && (msgflag == YESMSG)) error->message(FLERR, message); + timer->force_timeout(); + } + } + } + // variable evaluation may invoke computes so wrap with clear/add double attvalue; @@ -228,6 +280,22 @@ void FixHalt::end_of_step() if ((attvalue == 0.0 && value == 0.0) || (attvalue != 0.0 && value != 0.0)) return; } + // send message to all other root processes to trigger exit across universe, if requested + + if (uflag && (comm->me == 0)) { + MPI_Request *req = new MPI_Request[universe->nworlds]; + for (int i = 0; i < universe->nworlds; ++i) { + if (universe->me == universe->root_proc[i]) continue; + MPI_Isend(&eflag, 1, MPI_INT, universe->root_proc[i], UTAG, universe->uworld, req + i); + } + + // wait for all sends to complete, so MPI_Finalize() will be happy + for (int i = 0; i < universe->nworlds; ++i) { + if (universe->me == universe->root_proc[i]) continue; + MPI_Wait(req + i, MPI_STATUS_IGNORE); + } + } + // hard halt -> exit LAMMPS // soft/continue halt -> trigger timer to break from run loop // print message with ID of fix halt in case multiple instances diff --git a/src/fix_halt.h b/src/fix_halt.h index d6c46778e4..1fe789e9fa 100644 --- a/src/fix_halt.h +++ b/src/fix_halt.h @@ -35,7 +35,7 @@ class FixHalt : public Fix { void post_run() override; private: - int attribute, operation, eflag, msgflag, ivar; + int attribute, operation, eflag, msgflag, ivar, uflag; bigint nextstep, thisstep; double value, tratio; char *idvar;