Added 'timeout <value>' option to the adios reader, so that a rerun can concurrently run with the first simulation

This commit is contained in:
Podhorszki Norbert
2020-01-20 15:52:21 -05:00
parent 95d3b9e708
commit 44a814ec1d
6 changed files with 55 additions and 12 deletions

View File

@ -43,7 +43,9 @@ Syntax
*format* values = format of dump file, must be last keyword if used *format* values = format of dump file, must be last keyword if used
*native* = native LAMMPS dump file *native* = native LAMMPS dump file
*xyz* = XYZ file *xyz* = XYZ file
*adios* = dump file written by the :doc:`dump adios <dump_adios>` command *adios* [*timeout* value] = dump file written by the :doc:`dump adios <dump_adios>` command
*timeout* = specify waiting time for the arrival of the timestep when running concurrently.
The value is a float number and is interpreted in seconds.
*molfile* style path = VMD molfile plugin interface *molfile* style path = VMD molfile plugin interface
style = *dcd* or *xyz* or others supported by molfile plugins style = *dcd* or *xyz* or others supported by molfile plugins
path = optional path for location of molfile plugins path = optional path for location of molfile plugins
@ -67,6 +69,7 @@ Examples
read_dump dump.dcd 0 x y z format molfile dcd read_dump dump.dcd 0 x y z format molfile dcd
read_dump dump.file 1000 x y z vx vy vz format molfile lammpstrj /usr/local/lib/vmd/plugins/LINUXAMD64/plugins/molfile read_dump dump.file 1000 x y z vx vy vz format molfile lammpstrj /usr/local/lib/vmd/plugins/LINUXAMD64/plugins/molfile
read_dump dump.bp 5000 x y z vx vy vz format adios read_dump dump.bp 5000 x y z vx vy vz format adios
read_dump dump.bp 5000 x y z vx vy vz format adios timeout 60.0
Description Description
""""""""""" """""""""""
@ -144,7 +147,10 @@ entire dump is read in parallel across all the processes, dividing
the atoms evenly amongs the processes. The number of writers that the atoms evenly amongs the processes. The number of writers that
has written the dump file does not matter. Using the adios style for has written the dump file does not matter. Using the adios style for
dump and read_dump is a convenient way to dump all atoms from *N* dump and read_dump is a convenient way to dump all atoms from *N*
writers and read it back by *M* readers. writers and read it back by *M* readers. If one is running two
LAMMPS instances concurrently where one dumps data and the other is
reading it with the rerun command, the timeout option can be specified
to wait on the reader side for the arrival of the requested step.
Support for other dump format readers may be added in the future. Support for other dump format readers may be added in the future.

View File

@ -44,6 +44,8 @@ Examples
rerun dump.vels dump x y z vx vy vz box yes format molfile lammpstrj rerun dump.vels dump x y z vx vy vz box yes format molfile lammpstrj
rerun dump.dcd dump x y z box no format molfile dcd rerun dump.dcd dump x y z box no format molfile dcd
rerun ../run7/dump.file.gz skip 2 dump x y z box yes rerun ../run7/dump.file.gz skip 2 dump x y z box yes
rerun dump.bp dump x y z box no format adios
rerun dump.bp dump x y z vx vy vz format adios timeout 10.0
Description Description
""""""""""" """""""""""

View File

@ -36,3 +36,11 @@ $ bpls -l lj_dump.bp -d atoms -n 8 --format "%g" | less -S
(10,31999,0) 31924 1 32.8007 32.8736 32.5882 -0.980419 -0.237448 -1.21369 (10,31999,0) 31924 1 32.8007 32.8736 32.5882 -0.980419 -0.237448 -1.21369
Concurrent rerun use case
=========================
The ADIOS BP4 engine allows for reading from the dump file (completed steps) while the writer is still running (and dumping new steps). In two terminals one can run the in.first and in.rerun examples concurrently. The second simulation will blocking wait for the steps from the first.
$ mpirun -n 2 lmp -in in.first
$ mpirun -n 4 lmp -in in.rerun

View File

@ -56,7 +56,6 @@
<io name="read_dump"> <io name="read_dump">
<engine type="BP4"> <engine type="BP4">
<parameter key="OpenTimeoutSecs" value="10.0"/>
</engine> </engine>
</io> </io>

View File

@ -25,5 +25,5 @@ neighbor 0.3 bin
thermo 100 thermo 100
rerun lj_dump.bp first 200 last 800 every 200 & rerun lj_dump.bp first 200 last 800 every 200 &
dump x y z vx vy vz format adios dump x y z vx vy vz format adios timeout 10.0

View File

@ -65,6 +65,7 @@ public:
// list of column names for the atom table // list of column names for the atom table
// (individual list of 'columns' string) // (individual list of 'columns' string)
std::vector<std::string> columnNames; std::vector<std::string> columnNames;
float timeout = 0.0;
}; };
} // namespace LAMMPS_NS } // namespace LAMMPS_NS
@ -114,7 +115,26 @@ ReaderADIOS::~ReaderADIOS()
pass on settings to find and load the proper plugin pass on settings to find and load the proper plugin
Called by all processors. Called by all processors.
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void ReaderADIOS::settings(int narg, char **arg) {} void ReaderADIOS::settings(int narg, char **arg)
{
int idx = 0;
while (idx < narg) {
if (!strcmp(arg[idx], "timeout")) {
if (idx + 1 < narg) {
internal->timeout = std::stof(arg[idx + 1]);
internal->io.SetParameter("OpenTimeoutSecs", arg[idx + 1]);
++idx;
} else {
char str[128];
snprintf(str, sizeof(str),
"Missing value for 'timeout' option for ADIOS "
"read_dump command");
error->one(FLERR, str);
}
}
++idx;
}
}
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
try to open given file try to open given file
@ -130,9 +150,15 @@ void ReaderADIOS::open_file(const char *file)
if (internal->fh) if (internal->fh)
internal->fh.Close(); internal->fh.Close();
internal->fh = internal->io.Open(file, adios2::Mode::Read, world); try {
internal->fh = internal->io.Open(file, adios2::Mode::Read, world);
} catch (std::ios_base::failure &e) {
char str[256];
snprintf(str, sizeof(str), "%s", e.what());
error->one(FLERR, str);
}
if (!internal->fh) { if (!internal->fh) {
snprintf(str, strlen(str), "Cannot open file %s using ADIOS", file); snprintf(str, sizeof(str), "Cannot open file %s using ADIOS", file);
error->one(FLERR, str); error->one(FLERR, str);
} }
} }
@ -161,7 +187,7 @@ int ReaderADIOS::read_time(bigint &ntimestep)
char str[1024]; char str[1024];
adios2::StepStatus status = adios2::StepStatus status =
internal->fh.BeginStep(adios2::StepMode::Read, 10.0f); internal->fh.BeginStep(adios2::StepMode::Read, internal->timeout);
switch (status) { switch (status) {
case adios2::StepStatus::EndOfStream: case adios2::StepStatus::EndOfStream:
@ -176,13 +202,15 @@ int ReaderADIOS::read_time(bigint &ntimestep)
internal->io.InquireVariable<uint64_t>("ntimestep"); internal->io.InquireVariable<uint64_t>("ntimestep");
if (!internal->varNtimestep) { if (!internal->varNtimestep) {
snprintf(str, strlen(str), snprintf(str, sizeof(str),
"Did not find 'ntimestep' variable in ADIOS file %s", "Did not find 'ntimestep' variable in ADIOS file %s",
internal->fh.Name().c_str()); internal->fh.Name().c_str());
error->one(FLERR, str); error->one(FLERR, str);
} }
ntimestep = static_cast<bigint>(internal->varNtimestep.Max()); ntimestep = static_cast<bigint>(internal->varNtimestep.Max());
// std::cerr << " **** ReaderADIOS::read_time found step " << ntimestep
// << " **** " << std::endl;
return 0; return 0;
} }
@ -220,7 +248,7 @@ bigint ReaderADIOS::read_header(double box[3][3], int &boxinfo, int &triclinic,
internal->varNatoms = internal->io.InquireVariable<uint64_t>("natoms"); internal->varNatoms = internal->io.InquireVariable<uint64_t>("natoms");
if (!internal->varNatoms) { if (!internal->varNatoms) {
snprintf(str, strlen(str), snprintf(str, sizeof(str),
"Did not find 'natoms' variable in ADIOS file %s", "Did not find 'natoms' variable in ADIOS file %s",
internal->fh.Name().c_str()); internal->fh.Name().c_str());
error->one(FLERR, str); error->one(FLERR, str);
@ -242,7 +270,7 @@ bigint ReaderADIOS::read_header(double box[3][3], int &boxinfo, int &triclinic,
adios2::Attribute<int32_t> attTriclinic = adios2::Attribute<int32_t> attTriclinic =
internal->io.InquireAttribute<int32_t>("triclinic"); internal->io.InquireAttribute<int32_t>("triclinic");
if (!attTriclinic) { if (!attTriclinic) {
snprintf(str, strlen(str), snprintf(str, sizeof(str),
"Did not find 'triclinic' attribute in ADIOS file %s", "Did not find 'triclinic' attribute in ADIOS file %s",
internal->fh.Name().c_str()); internal->fh.Name().c_str());
error->one(FLERR, str); error->one(FLERR, str);
@ -458,7 +486,7 @@ void ReaderADIOS::read_atoms(int n, int nfield, double **fields)
if (n != nAtoms) { if (n != nAtoms) {
snprintf( snprintf(
str, strlen(str), str, sizeof(str),
"ReaderADIOS::read_atoms() expects 'n=%d' equal to the number of " "ReaderADIOS::read_atoms() expects 'n=%d' equal to the number of "
"atoms (=%" PRIu64 ") for process %d in ADIOS file %s.", "atoms (=%" PRIu64 ") for process %d in ADIOS file %s.",
n, nAtoms, comm->me, internal->fh.Name().c_str()); n, nAtoms, comm->me, internal->fh.Name().c_str());