Added 'timeout <value>' option to the adios reader, so that a rerun can concurrently run with the first simulation

This commit is contained in:
Podhorszki Norbert
2020-01-20 15:52:21 -05:00
parent 95d3b9e708
commit 44a814ec1d
6 changed files with 55 additions and 12 deletions

View File

@ -43,7 +43,9 @@ Syntax
*format* values = format of dump file, must be last keyword if used
*native* = native LAMMPS dump file
*xyz* = XYZ file
*adios* = dump file written by the :doc:`dump adios <dump_adios>` command
*adios* [*timeout* value] = dump file written by the :doc:`dump adios <dump_adios>` command
*timeout* = specify waiting time for the arrival of the timestep when running concurrently.
The value is a float number and is interpreted in seconds.
*molfile* style path = VMD molfile plugin interface
style = *dcd* or *xyz* or others supported by molfile plugins
path = optional path for location of molfile plugins
@ -67,6 +69,7 @@ Examples
read_dump dump.dcd 0 x y z format molfile dcd
read_dump dump.file 1000 x y z vx vy vz format molfile lammpstrj /usr/local/lib/vmd/plugins/LINUXAMD64/plugins/molfile
read_dump dump.bp 5000 x y z vx vy vz format adios
read_dump dump.bp 5000 x y z vx vy vz format adios timeout 60.0
Description
"""""""""""
@ -144,7 +147,10 @@ entire dump is read in parallel across all the processes, dividing
the atoms evenly amongs the processes. The number of writers that
has written the dump file does not matter. Using the adios style for
dump and read_dump is a convenient way to dump all atoms from *N*
writers and read it back by *M* readers.
writers and read it back by *M* readers. If one is running two
LAMMPS instances concurrently where one dumps data and the other is
reading it with the rerun command, the timeout option can be specified
to wait on the reader side for the arrival of the requested step.
Support for other dump format readers may be added in the future.

View File

@ -44,6 +44,8 @@ Examples
rerun dump.vels dump x y z vx vy vz box yes format molfile lammpstrj
rerun dump.dcd dump x y z box no format molfile dcd
rerun ../run7/dump.file.gz skip 2 dump x y z box yes
rerun dump.bp dump x y z box no format adios
rerun dump.bp dump x y z vx vy vz format adios timeout 10.0
Description
"""""""""""

View File

@ -36,3 +36,11 @@ $ bpls -l lj_dump.bp -d atoms -n 8 --format "%g" | less -S
(10,31999,0) 31924 1 32.8007 32.8736 32.5882 -0.980419 -0.237448 -1.21369
Concurrent rerun use case
=========================
The ADIOS BP4 engine allows for reading from the dump file (completed steps) while the writer is still running (and dumping new steps). In two terminals one can run the in.first and in.rerun examples concurrently. The second simulation will blocking wait for the steps from the first.
$ mpirun -n 2 lmp -in in.first
$ mpirun -n 4 lmp -in in.rerun

View File

@ -56,7 +56,6 @@
<io name="read_dump">
<engine type="BP4">
<parameter key="OpenTimeoutSecs" value="10.0"/>
</engine>
</io>

View File

@ -25,5 +25,5 @@ neighbor 0.3 bin
thermo 100
rerun lj_dump.bp first 200 last 800 every 200 &
dump x y z vx vy vz format adios
dump x y z vx vy vz format adios timeout 10.0

View File

@ -65,6 +65,7 @@ public:
// list of column names for the atom table
// (individual list of 'columns' string)
std::vector<std::string> columnNames;
float timeout = 0.0;
};
} // namespace LAMMPS_NS
@ -114,7 +115,26 @@ ReaderADIOS::~ReaderADIOS()
pass on settings to find and load the proper plugin
Called by all processors.
------------------------------------------------------------------------- */
void ReaderADIOS::settings(int narg, char **arg) {}
void ReaderADIOS::settings(int narg, char **arg)
{
int idx = 0;
while (idx < narg) {
if (!strcmp(arg[idx], "timeout")) {
if (idx + 1 < narg) {
internal->timeout = std::stof(arg[idx + 1]);
internal->io.SetParameter("OpenTimeoutSecs", arg[idx + 1]);
++idx;
} else {
char str[128];
snprintf(str, sizeof(str),
"Missing value for 'timeout' option for ADIOS "
"read_dump command");
error->one(FLERR, str);
}
}
++idx;
}
}
/* ----------------------------------------------------------------------
try to open given file
@ -130,9 +150,15 @@ void ReaderADIOS::open_file(const char *file)
if (internal->fh)
internal->fh.Close();
internal->fh = internal->io.Open(file, adios2::Mode::Read, world);
try {
internal->fh = internal->io.Open(file, adios2::Mode::Read, world);
} catch (std::ios_base::failure &e) {
char str[256];
snprintf(str, sizeof(str), "%s", e.what());
error->one(FLERR, str);
}
if (!internal->fh) {
snprintf(str, strlen(str), "Cannot open file %s using ADIOS", file);
snprintf(str, sizeof(str), "Cannot open file %s using ADIOS", file);
error->one(FLERR, str);
}
}
@ -161,7 +187,7 @@ int ReaderADIOS::read_time(bigint &ntimestep)
char str[1024];
adios2::StepStatus status =
internal->fh.BeginStep(adios2::StepMode::Read, 10.0f);
internal->fh.BeginStep(adios2::StepMode::Read, internal->timeout);
switch (status) {
case adios2::StepStatus::EndOfStream:
@ -176,13 +202,15 @@ int ReaderADIOS::read_time(bigint &ntimestep)
internal->io.InquireVariable<uint64_t>("ntimestep");
if (!internal->varNtimestep) {
snprintf(str, strlen(str),
snprintf(str, sizeof(str),
"Did not find 'ntimestep' variable in ADIOS file %s",
internal->fh.Name().c_str());
error->one(FLERR, str);
}
ntimestep = static_cast<bigint>(internal->varNtimestep.Max());
// std::cerr << " **** ReaderADIOS::read_time found step " << ntimestep
// << " **** " << std::endl;
return 0;
}
@ -220,7 +248,7 @@ bigint ReaderADIOS::read_header(double box[3][3], int &boxinfo, int &triclinic,
internal->varNatoms = internal->io.InquireVariable<uint64_t>("natoms");
if (!internal->varNatoms) {
snprintf(str, strlen(str),
snprintf(str, sizeof(str),
"Did not find 'natoms' variable in ADIOS file %s",
internal->fh.Name().c_str());
error->one(FLERR, str);
@ -242,7 +270,7 @@ bigint ReaderADIOS::read_header(double box[3][3], int &boxinfo, int &triclinic,
adios2::Attribute<int32_t> attTriclinic =
internal->io.InquireAttribute<int32_t>("triclinic");
if (!attTriclinic) {
snprintf(str, strlen(str),
snprintf(str, sizeof(str),
"Did not find 'triclinic' attribute in ADIOS file %s",
internal->fh.Name().c_str());
error->one(FLERR, str);
@ -458,7 +486,7 @@ void ReaderADIOS::read_atoms(int n, int nfield, double **fields)
if (n != nAtoms) {
snprintf(
str, strlen(str),
str, sizeof(str),
"ReaderADIOS::read_atoms() expects 'n=%d' equal to the number of "
"atoms (=%" PRIu64 ") for process %d in ADIOS file %s.",
n, nAtoms, comm->me, internal->fh.Name().c_str());