git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@13937 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2015-08-28 20:00:56 +00:00
parent 962dd0da74
commit ce59e32d1c
189 changed files with 1526 additions and 587 deletions

View File

@ -16,9 +16,10 @@
#include "string.h"
#include "stdio.h"
#include "finish.h"
#include "lammps.h"
#include "timer.h"
#include "universe.h"
#include "accelerator_kokkos.h"
#include "accelerator_omp.h"
#include "atom.h"
#include "atom_vec.h"
#include "molecule.h"
@ -30,12 +31,28 @@
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "timer.h"
#include "output.h"
#include "memory.h"
#ifdef LMP_USER_OMP
#include "modify.h"
#include "fix_omp.h"
#include "thr_data.h"
#endif
using namespace LAMMPS_NS;
// local function prototypes, code at end of file
static void mpi_timings(const char *label, Timer *t, enum Timer::ttype tt,
MPI_Comm world, const int nprocs, const int nthreads,
const int me, double time_loop, FILE *scr, FILE *log);
#ifdef LMP_USER_OMP
static void omp_times(FixOMP *fix, const char *label, enum Timer::ttype which,
const int nthreads,FILE *scr, FILE *log);
#endif
/* ---------------------------------------------------------------------- */
Finish::Finish(LAMMPS *lmp) : Pointers(lmp) {}
@ -46,14 +63,16 @@ void Finish::end(int flag)
{
int i,m,nneigh,nneighfull;
int histo[10];
int loopflag,minflag,prdflag,tadflag,timeflag,fftflag,histoflag,neighflag;
int minflag,prdflag,tadflag,timeflag,fftflag,histoflag,neighflag;
double time,tmp,ave,max,min;
double time_loop,time_other;
double time_loop,time_other,cpu_loop;
int me,nprocs;
MPI_Comm_rank(world,&me);
MPI_Comm_size(world,&nprocs);
const int nthreads = comm->nthreads;
// recompute natoms in case atoms have been lost
bigint nblocal = atom->nlocal;
@ -67,8 +86,8 @@ void Finish::end(int flag)
// flag = 3 = TAD
// turn off neighflag for Kspace partition of verlet/split integrator
loopflag = 1;
minflag = prdflag = tadflag = timeflag = fftflag = histoflag = neighflag = 0;
time_loop = cpu_loop = time_other = 0.0;
if (flag == 1) {
if (update->whichflag == 2) minflag = 1;
@ -80,53 +99,88 @@ void Finish::end(int flag)
if (force->kspace && force->kspace_match("pppm",0)
&& force->kspace->fftbench) fftflag = 1;
}
if (flag == 2) prdflag = histoflag = neighflag = 1;
if (flag == 2) prdflag = timeflag = histoflag = neighflag = 1;
if (flag == 3) tadflag = histoflag = neighflag = 1;
// loop stats
if (loopflag) {
time_other = timer->array[TIME_LOOP] -
(timer->array[TIME_PAIR] + timer->array[TIME_BOND] +
timer->array[TIME_KSPACE] + timer->array[TIME_NEIGHBOR] +
timer->array[TIME_COMM] + timer->array[TIME_OUTPUT]);
time_loop = timer->array[TIME_LOOP];
MPI_Allreduce(&time_loop,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time_loop = tmp/nprocs;
if (timer->has_loop()) {
// overall loop time
#if defined(_OPENMP)
time_loop = timer->get_wall(Timer::TOTAL);
cpu_loop = timer->get_cpu(Timer::TOTAL);
MPI_Allreduce(&time_loop,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time_loop = tmp/nprocs;
MPI_Allreduce(&cpu_loop,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
cpu_loop = tmp/nprocs;
if (time_loop > 0.0) cpu_loop = cpu_loop/time_loop*100.0;
if (me == 0) {
int ntasks = nprocs * comm->nthreads;
if (screen) fprintf(screen,
"Loop time of %g on %d procs (%d MPI x %d OpenMP) "
"for %d steps with " BIGINT_FORMAT " atoms\n",
time_loop,ntasks,nprocs,comm->nthreads,
update->nsteps,atom->natoms);
if (logfile) fprintf(logfile,
"Loop time of %g on %d procs (%d MPI x %d OpenMP) "
"for %d steps with " BIGINT_FORMAT " atoms\n",
time_loop,ntasks,nprocs,comm->nthreads,
update->nsteps,atom->natoms);
}
int ntasks = nprocs * nthreads;
const char fmt1[] = "Loop time of %g on %d procs "
"for %d steps with " BIGINT_FORMAT " atoms\n\n";
if (screen) fprintf(screen,fmt1,time_loop,ntasks,update->nsteps,
atom->natoms,cpu_loop);
if (logfile) fprintf(logfile,fmt1,time_loop,ntasks,update->nsteps,
atom->natoms,cpu_loop);
// Gromacs/NAMD-style performance metric for suitable unit settings
if ( timeflag && !minflag && !prdflag && !tadflag &&
(update->nsteps > 0) && (update->dt != 0.0) &&
((strcmp(update->unit_style,"lj") == 0) ||
(strcmp(update->unit_style,"metal") == 0) ||
(strcmp(update->unit_style,"micro") == 0) ||
(strcmp(update->unit_style,"nano") == 0) ||
(strcmp(update->unit_style,"electron") == 0) ||
(strcmp(update->unit_style,"real") == 0)) ) {
double one_fs = force->femtosecond;
double t_step = ((double) time_loop) / ((double) update->nsteps);
double step_t = 1.0/t_step;
if (strcmp(update->unit_style,"lj") == 0) {
double tau_day = 24.0*3600.0 / t_step * update->dt / one_fs;
const char perf[] = "Performance: %.3f tau/day, %.3f timesteps/s\n";
if (screen) fprintf(screen,perf,tau_day,step_t);
if (logfile) fprintf(logfile,perf,tau_day,step_t);
} else {
double hrs_ns = t_step / update->dt * 1000000.0 * one_fs / 3600.0;
double ns_day = 24.0*3600.0 / t_step * update->dt / one_fs/1000000.0;
const char perf[] =
"Performance: %.3f ns/day, %.3f hours/ns, %.3f timesteps/s\n";
if (screen) fprintf(screen,perf,ns_day,hrs_ns,step_t);
if (logfile) fprintf(logfile,perf,ns_day,hrs_ns,step_t);
}
}
// CPU use on MPI tasks and OpenMP threads
#ifdef LMP_USER_OMP
const char fmt2[] =
"%.1f%% CPU use with %d MPI tasks x %d OpenMP threads\n";
if (screen) fprintf(screen,fmt2,cpu_loop,nprocs,nthreads);
if (logfile) fprintf(logfile,fmt2,cpu_loop,nprocs,nthreads);
#else
if (me == 0) {
if (screen) fprintf(screen,
"Loop time of %g on %d procs for %d steps with "
BIGINT_FORMAT " atoms\n",
time_loop,nprocs,update->nsteps,atom->natoms);
if (logfile) fprintf(logfile,
"Loop time of %g on %d procs for %d steps with "
BIGINT_FORMAT " atoms\n",
time_loop,nprocs,update->nsteps,atom->natoms);
}
const char fmt2[] =
"%.1f%% CPU use with %d MPI tasks x no OpenMP threads\n";
if (screen) fprintf(screen,fmt2,cpu_loop,nprocs);
if (logfile) fprintf(logfile,fmt2,cpu_loop,nprocs);
#endif
if (time_loop == 0.0) time_loop = 1.0;
}
}
// avoid division by zero for very short runs
if (time_loop == 0.0) time_loop = 1.0;
if (cpu_loop == 0.0) cpu_loop = 100.0;
// get "Other" wall time for later use
if (timer->has_normal())
time_other = timer->get_wall(Timer::TOTAL) - timer->get_wall(Timer::ALL);
// minimization stats
if (minflag) {
@ -190,7 +244,7 @@ void Finish::end(int flag)
if (screen) fprintf(screen,"PRD stats:\n");
if (logfile) fprintf(logfile,"PRD stats:\n");
time = timer->array[TIME_PAIR];
time = timer->get_wall(Timer::DEPHASE);
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
if (me == 0) {
@ -202,7 +256,7 @@ void Finish::end(int flag)
time,time/time_loop*100.0);
}
time = timer->array[TIME_BOND];
time = timer->get_wall(Timer::DYNAMICS);
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
if (me == 0) {
@ -214,7 +268,7 @@ void Finish::end(int flag)
time,time/time_loop*100.0);
}
time = timer->array[TIME_KSPACE];
time = timer->get_wall(Timer::QUENCH);
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
if (me == 0) {
@ -226,10 +280,35 @@ void Finish::end(int flag)
time,time/time_loop*100.0);
}
time = time_other;
time = timer->get_wall(Timer::REPCOMM);
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
if (me == 0) {
if (screen)
fprintf(screen," Comm time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
if (logfile)
fprintf(logfile," Comm time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
}
time = timer->get_wall(Timer::REPOUT);
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
if (me == 0) {
if (screen)
fprintf(screen," Output time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
if (logfile)
fprintf(logfile," Output time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
}
time = time_other;
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
if (me == 0) { // XXXX: replica comm, replica output
if (screen)
fprintf(screen," Other time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
@ -250,7 +329,7 @@ void Finish::end(int flag)
if (screen) fprintf(screen,"TAD stats:\n");
if (logfile) fprintf(logfile,"TAD stats:\n");
time = timer->array[TIME_PAIR];
time = timer->get_wall(Timer::NEB);
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
if (me == 0) {
@ -262,7 +341,7 @@ void Finish::end(int flag)
time,time/time_loop*100.0);
}
time = timer->array[TIME_BOND];
time = timer->get_wall(Timer::DYNAMICS);
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
if (me == 0) {
@ -274,7 +353,7 @@ void Finish::end(int flag)
time,time/time_loop*100.0);
}
time = timer->array[TIME_KSPACE];
time = timer->get_wall(Timer::QUENCH);
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
if (me == 0) {
@ -287,7 +366,7 @@ void Finish::end(int flag)
}
time = timer->array[TIME_COMM];
time = timer->get_wall(Timer::REPCOMM);
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
if (me == 0) {
@ -300,7 +379,7 @@ void Finish::end(int flag)
}
time = timer->array[TIME_OUTPUT];
time = timer->get_wall(Timer::REPOUT);
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
if (me == 0) {
@ -325,103 +404,110 @@ void Finish::end(int flag)
}
}
// timing breakdowns
if (timeflag && timer->has_normal()) {
if (timeflag) {
if (me == 0) {
if (screen) fprintf(screen,"\n");
if (logfile) fprintf(logfile,"\n");
}
time = timer->array[TIME_PAIR];
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
if (me == 0) {
if (screen)
fprintf(screen,"Pair time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
if (logfile)
fprintf(logfile,"Pair time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
}
if (atom->molecular) {
time = timer->array[TIME_BOND];
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
if (timer->has_full()) {
const char hdr[] = "\nMPI task timing breakdown:\n"
"Section | min time | avg time | max time |%varavg| %CPU | %total\n"
"-----------------------------------------------------------------------\n";
if (me == 0) {
if (screen)
fprintf(screen,"Bond time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
if (logfile)
fprintf(logfile,"Bond time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
if (screen) fputs(hdr,screen);
if (logfile) fputs(hdr,logfile);
}
} else {
const char hdr[] = "\nMPI task timing breakdown:\n"
"Section | min time | avg time | max time |%varavg| %total\n"
"---------------------------------------------------------------\n";
if (me == 0) {
if (screen) fputs(hdr,screen);
if (logfile) fputs(hdr,logfile);
}
}
if (force->kspace) {
time = timer->array[TIME_KSPACE];
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
if (me == 0) {
if (screen)
fprintf(screen,"Kspce time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
if (logfile)
fprintf(logfile,"Kspce time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
}
}
mpi_timings("Pair",timer,Timer::PAIR, world,nprocs,
nthreads,me,time_loop,screen,logfile);
time = timer->array[TIME_NEIGHBOR];
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
if (me == 0) {
if (screen)
fprintf(screen,"Neigh time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
if (logfile)
fprintf(logfile,"Neigh time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
}
if (atom->molecular)
mpi_timings("Bond",timer,Timer::BOND,world,nprocs,
nthreads,me,time_loop,screen,logfile);
if (force->kspace)
mpi_timings("Kspace",timer,Timer::KSPACE,world,nprocs,
nthreads,me,time_loop,screen,logfile);
time = timer->array[TIME_COMM];
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
if (me == 0) {
if (screen)
fprintf(screen,"Comm time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
if (logfile)
fprintf(logfile,"Comm time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
}
time = timer->array[TIME_OUTPUT];
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
if (me == 0) {
if (screen)
fprintf(screen,"Outpt time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
if (logfile)
fprintf(logfile,"Outpt time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
}
mpi_timings("Neigh",timer,Timer::NEIGH,world,nprocs,
nthreads,me,time_loop,screen,logfile);
mpi_timings("Comm",timer,Timer::COMM,world,nprocs,
nthreads,me,time_loop,screen,logfile);
mpi_timings("Output",timer,Timer::OUTPUT,world,nprocs,
nthreads,me,time_loop,screen,logfile);
mpi_timings("Modify",timer,Timer::MODIFY,world,nprocs,
nthreads,me,time_loop,screen,logfile);
if (timer->has_sync())
mpi_timings("Sync",timer,Timer::SYNC,world,nprocs,
nthreads,me,time_loop,screen,logfile);
time = time_other;
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
const char *fmt;
if (timer->has_full())
fmt = "Other | |%- 12.4g| | | |%6.2f\n";
else
fmt = "Other | |%- 12.4g| | |%6.2f\n";
if (me == 0) {
if (screen)
fprintf(screen,"Other time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
if (logfile)
fprintf(logfile,"Other time (%%) = %g (%g)\n",
time,time/time_loop*100.0);
if (screen) fprintf(screen,fmt,time,time/time_loop*100.0);
if (logfile) fprintf(logfile,fmt,time,time/time_loop*100.0);
}
}
#ifdef LMP_USER_OMP
const char thr_hdr_fmt[] =
"\nThread timing breakdown (MPI rank %d):\nTotal threaded time %.4g / %.1f%%\n";
const char thr_header[] =
"Section | min time | avg time | max time |%varavg| %total\n"
"---------------------------------------------------------------\n";
int ifix = modify->find_fix("package_omp");
// print thread breakdown only with full timer detail
if ((ifix >= 0) && timer->has_full() && me == 0) {
double thr_total = 0.0;
ThrData *td;
FixOMP *fixomp = static_cast<FixOMP *>(lmp->modify->fix[ifix]);
for (i=0; i < nthreads; ++i) {
td = fixomp->get_thr(i);
thr_total += td->get_time(Timer::ALL);
}
thr_total /= (double) nthreads;
if (thr_total > 0.0) {
if (screen) {
fprintf(screen,thr_hdr_fmt,me,thr_total,thr_total/time_loop*100.0);
fputs(thr_header,screen);
}
if (logfile) {
fprintf(logfile,thr_hdr_fmt,me,thr_total,thr_total/time_loop*100.0);
fputs(thr_header,logfile);
}
omp_times(fixomp,"Pair",Timer::PAIR,nthreads,screen,logfile);
if (atom->molecular)
omp_times(fixomp,"Bond",Timer::BOND,nthreads,screen,logfile);
if (force->kspace)
omp_times(fixomp,"Kspace",Timer::KSPACE,nthreads,screen,logfile);
omp_times(fixomp,"Neigh",Timer::NEIGH,nthreads,screen,logfile);
omp_times(fixomp,"Reduce",Timer::COMM,nthreads,screen,logfile);
}
}
#endif
// FFT timing statistics
// time3d,time1d = total time during run for 3d and 1d FFTs
// loop on timing() until nsample FFTs require at least 1.0 CPU sec
@ -459,7 +545,7 @@ void Finish::end(int flag)
MPI_Allreduce(&time1d,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time1d = tmp/nprocs;
double time_kspace = timer->array[TIME_KSPACE];
double time_kspace = timer->get_wall(Timer::KSPACE);
MPI_Allreduce(&time_kspace,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time_kspace = tmp/nprocs;
@ -536,8 +622,10 @@ void Finish::end(int flag)
neighbor->old_requests[m]->gran ||
neighbor->old_requests[m]->respaouter ||
neighbor->old_requests[m]->half_from_full) &&
neighbor->old_requests[m]->skip == 0) {
if (lmp->kokkos && lmp->kokkos->neigh_list_kokkos(m)) break;
neighbor->old_requests[m]->skip == 0 &&
neighbor->lists[m] && neighbor->lists[m]->numneigh) {
if (!neighbor->lists[m] && lmp->kokkos &&
lmp->kokkos->neigh_list_kokkos(m)) break;
else break;
}
}
@ -584,13 +672,14 @@ void Finish::end(int flag)
nneighfull = 0;
if (m < neighbor->old_nrequest) {
if (neighbor->lists[m]) {
if (neighbor->lists[m] && neighbor->lists[m]->numneigh) {
int inum = neighbor->lists[m]->inum;
int *ilist = neighbor->lists[m]->ilist;
int *numneigh = neighbor->lists[m]->numneigh;
for (i = 0; i < inum; i++)
nneighfull += numneigh[ilist[i]];
} else if (lmp->kokkos) nneighfull = lmp->kokkos->neigh_count(m);
} else if (!neighbor->lists[m] && lmp->kokkos)
nneighfull = lmp->kokkos->neigh_count(m);
tmp = nneighfull;
stats(1,&tmp,&ave,&max,&min,10,histo);
@ -622,7 +711,7 @@ void Finish::end(int flag)
MPI_Allreduce(&tmp,&nall,1,MPI_DOUBLE,MPI_SUM,world);
int nspec;
double nspec_all;
double nspec_all = 0;
if (atom->molecular == 1) {
int **nspecial = atom->nspecial;
int nlocal = atom->nlocal;
@ -731,3 +820,95 @@ void Finish::stats(int n, double *data,
*pmax = max;
*pmin = min;
}
/* ---------------------------------------------------------------------- */
static void mpi_timings(const char *label, Timer *t, enum Timer::ttype tt,
MPI_Comm world, const int nprocs, const int nthreads,
const int me, double time_loop, FILE *scr, FILE *log)
{
double tmp, time_max, time_min, time_sq;
double time = t->get_wall(tt);
double time_cpu = t->get_cpu(tt);
if (time/time_loop < 0.001) // insufficient timer resolution!
time_cpu = 1.0;
else
time_cpu = time_cpu / time;
if (time_cpu > nthreads) time_cpu = nthreads;
MPI_Allreduce(&time,&time_min,1,MPI_DOUBLE,MPI_MIN,world);
MPI_Allreduce(&time,&time_max,1,MPI_DOUBLE,MPI_MAX,world);
time_sq = time*time;
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time = tmp/nprocs;
MPI_Allreduce(&time_sq,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time_sq = tmp/nprocs;
MPI_Allreduce(&time_cpu,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
time_cpu = tmp/nprocs*100.0;
// % variance from the average as measure of load imbalance
if (time > 1.0e-10)
time_sq = sqrt(time_sq/time - time)*100.0;
else
time_sq = 0.0;
if (me == 0) {
tmp = time/time_loop*100.0;
if (t->has_full()) {
const char fmt[] = "%-8s|%- 12.5g|%- 12.5g|%- 12.5g|%6.1f |%6.1f |%6.2f\n";
if (scr)
fprintf(scr,fmt,label,time_min,time,time_max,time_sq,time_cpu,tmp);
if (log)
fprintf(log,fmt,label,time_min,time,time_max,time_sq,time_cpu,tmp);
time_loop = 100.0/time_loop;
} else {
const char fmt[] = "%-8s|%- 12.5g|%- 12.5g|%- 12.5g|%6.1f |%6.2f\n";
if (scr)
fprintf(scr,fmt,label,time_min,time,time_max,time_sq,tmp);
if (log)
fprintf(log,fmt,label,time_min,time,time_max,time_sq,tmp);
}
}
}
/* ---------------------------------------------------------------------- */
#ifdef LMP_USER_OMP
static void omp_times(FixOMP *fix, const char *label, enum Timer::ttype which,
const int nthreads,FILE *scr, FILE *log)
{
const char fmt[] = "%-8s|%- 12.5g|%- 12.5g|%- 12.5g|%6.1f |%6.2f\n";
double time_min, time_max, time_avg, time_total, time_std;
time_min = 1.0e100;
time_max = -1.0e100;
time_total = time_avg = time_std = 0.0;
for (int i=0; i < nthreads; ++i) {
ThrData *thr = fix->get_thr(i);
double tmp=thr->get_time(which);
time_min = MIN(time_min,tmp);
time_max = MAX(time_max,tmp);
time_avg += tmp;
time_std += tmp*tmp;
time_total += thr->get_time(Timer::ALL);
}
time_avg /= nthreads;
time_std /= nthreads;
time_total /= nthreads;
if (time_avg > 1.0e-10)
time_std = sqrt(time_std/time_avg - time_avg)*100.0;
else
time_std = 0.0;
if (scr) fprintf(scr,fmt,label,time_min,time_avg,time_max,time_std,
time_avg/time_total*100.0);
if (log) fprintf(log,fmt,label,time_min,time_avg,time_max,time_std,
time_avg/time_total*100.0);
}
#endif