reimplement wallclock timeout in timer class instead of update.

This commit is contained in:
Axel Kohlmeyer
2015-10-24 22:15:10 -04:00
parent 9c8fb961dc
commit 9be09f5ddf
9 changed files with 168 additions and 79 deletions

View File

@ -16,6 +16,7 @@
#include "timer.h"
#include "comm.h"
#include "error.h"
#include "force.h"
#include "memory.h"
#ifdef _WIN32
@ -26,8 +27,38 @@
#include <sys/resource.h>
#endif
#include <stdlib.h>
#include <time.h>
using namespace LAMMPS_NS;
// convert a timespec ([[HH:]MM:]SS) to seconds
// the strings "off" and "unlimited" result in -1;
static double timespec2seconds(char *timespec)
{
double vals[3];
char *num;
int i = 0;
// first handle allowed textual inputs
if (strcmp(timespec,"off") == 0) return -1;
if (strcmp(timespec,"unlimited") == 0) return -1;
vals[0] = vals[1] = vals[2] = 0;
num = strtok(timespec,":");
while ((num != NULL) && (i < 3)) {
vals[i] = atoi(num);
++i;
num = strtok(NULL,":");
}
if (i == 3) return (vals[0]*60 + vals[1])*60 + vals[2];
else if (i == 2) return vals[0]*60 + vals[1];
else return vals[0];
}
// Return the CPU time for the current process in seconds very
// much in the same way as MPI_Wtime() returns the wall time.
@ -64,6 +95,9 @@ Timer::Timer(LAMMPS *lmp) : Pointers(lmp)
{
_level = NORMAL;
_sync = OFF;
_timeout = -1.0;
_checkfreq = 10;
_nextcheck = -1;
}
/* ---------------------------------------------------------------------- */
@ -176,12 +210,57 @@ void Timer::set_wall(enum ttype which, double newtime)
wall_array[which] = newtime;
}
/* ---------------------------------------------------------------------- */
void Timer::init_timeout()
{
if (_timeout < 0)
_nextcheck = -1;
else
_nextcheck = _checkfreq;
}
/* ---------------------------------------------------------------------- */
void Timer::print_timeout(FILE *fp)
{
if (!fp) return;
// format timeout setting
if (_timeout > 0) {
char timebuf[32];
time_t tv = _timeout - (MPI_Wtime() - timeout_start);
struct tm *tm = gmtime(&tv);
strftime(timebuf,32,"%H:%M:%S",tm);
fprintf(fp," Walltime left: %s\n",timebuf);
}
}
/* ---------------------------------------------------------------------- */
bool Timer::_check_timeout()
{
double walltime = MPI_Wtime() - timeout_start;
// broadcast time to insure all ranks act the same.
MPI_Bcast(&walltime,1,MPI_DOUBLE,0,world);
if (walltime < _timeout) {
_nextcheck += _checkfreq;
return false;
} else {
if (comm->me == 0)
error->warning(FLERR,"Wall time limit reached");
_timeout = 0.0;
return true;
}
}
/* ----------------------------------------------------------------------
modify parameters of the Timer class
------------------------------------------------------------------------- */
static const char *timer_style[] = { "off", "loop", "normal", "full" };
static const char *timer_mode[] = { "nosync", "(dummy)", "sync" };
static const char timer_fmt[] = "New timer settings: style=%s mode=%s\n";
static const char timer_fmt[] = "New timer settings: style=%s mode=%s timeout=%s\n";
void Timer::modify_params(int narg, char **arg)
{
@ -199,14 +278,37 @@ void Timer::modify_params(int narg, char **arg)
_sync = OFF;
} else if (strcmp(arg[iarg],timer_mode[NORMAL]) == 0) {
_sync = NORMAL;
} else if (strcmp(arg[iarg],"timeout") == 0) {
++iarg;
if (iarg < narg) {
_timeout = timespec2seconds(arg[iarg]);
} else error->all(FLERR,"Illegal timers command");
} else if (strcmp(arg[iarg],"every") == 0) {
++iarg;
if (iarg < narg) {
_checkfreq = force->inumeric(FLERR,arg[iarg]);
if (_checkfreq <= 0)
error->all(FLERR,"Illegal timers command");
} else error->all(FLERR,"Illegal timers command");
} else error->all(FLERR,"Illegal timers command");
++iarg;
}
timeout_start = MPI_Wtime();
if (comm->me == 0) {
// format timeout setting
char timebuf[32];
if (_timeout < 0) strcpy(timebuf,"off");
else {
time_t tv = _timeout;
struct tm *tm = gmtime(&tv);
strftime(timebuf,32,"%H:%M:%S",tm);
}
if (screen)
fprintf(screen,timer_fmt,timer_style[_level],timer_mode[_sync]);
fprintf(screen,timer_fmt,timer_style[_level],timer_mode[_sync],timebuf);
if (logfile)
fprintf(logfile,timer_fmt,timer_style[_level],timer_mode[_sync]);
fprintf(logfile,timer_fmt,timer_style[_level],timer_mode[_sync],timebuf);
}
}