From 6bd977ceea1ce66776aba7a1f7dd93a224781f26 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sat, 27 Jul 2013 22:19:48 +0200 Subject: [PATCH] refactor summary timing. always compute CPU use for Loop time. --- src/finish.cpp | 64 ++++++++++++++++++++++++++++++++++++-------------- src/timer.cpp | 6 +++-- 2 files changed, 50 insertions(+), 20 deletions(-) diff --git a/src/finish.cpp b/src/finish.cpp index 84386709d5..01d0fbfc20 100644 --- a/src/finish.cpp +++ b/src/finish.cpp @@ -123,7 +123,7 @@ void Finish::end(int flag) int histo[10]; int minflag,prdflag,tadflag,timeflag,fftflag,histoflag,neighflag; double time,tmp,ave,max,min; - double time_loop,time_other; + double time_loop,time_other,cpu_loop; int me,nprocs; MPI_Comm_rank(world,&me); @@ -143,6 +143,7 @@ void Finish::end(int flag) // turn off neighflag for Kspace partition of verlet/split integrator minflag = prdflag = tadflag = timeflag = fftflag = histoflag = neighflag = 0; + time_loop = cpu_loop = time_other = 0.0; if (flag == 1) { if (update->whichflag == 2) minflag = 1; @@ -160,26 +161,39 @@ void Finish::end(int flag) // loop stats if (timer->has_loop()) { - time_other = timer->get_wall(Timer::TOTAL) - - (timer->get_wall(Timer::PAIR) + timer->get_wall(Timer::BOND) + - timer->get_wall(Timer::KSPACE) + timer->get_wall(Timer::NEIGH) + - timer->get_wall(Timer::COMM) + timer->get_wall(Timer::OUTPUT) + - timer->get_wall(Timer::MODIFY) + timer->get_wall(Timer::SYNC)); - time_loop = timer->get_wall(Timer::TOTAL); - MPI_Allreduce(&time_loop,&tmp,1,MPI_DOUBLE,MPI_SUM,world); - time_loop = tmp/nprocs; - // overall loop time + time_loop = timer->get_wall(Timer::TOTAL); + cpu_loop = timer->get_cpu(Timer::TOTAL); + MPI_Allreduce(&time_loop,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + time_loop = tmp/nprocs; + MPI_Allreduce(&cpu_loop,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + cpu_loop = tmp/nprocs; + if (time_loop > 0.0) cpu_loop = cpu_loop/time_loop*100.0; + if (me == 0) { int ntasks = nprocs * comm->nthreads; + +#ifdef LMP_USER_OMP const char fmt[] = "Loop time of %g on %d procs " - "(%d MPI x %d OpenMP) for %d steps with " BIGINT_FORMAT " atoms\n"; - if (screen) fprintf(screen,fmt,time_loop,ntasks,nprocs, - comm->nthreads,update->nsteps,atom->natoms); - if (logfile) fprintf(logfile,fmt,time_loop,ntasks,nprocs, - comm->nthreads,update->nsteps,atom->natoms); + "for %d steps with " BIGINT_FORMAT " atoms\n" + "%6.1f%% CPU use with %d MPI tasks x %d OpenMP threads\n"; + if (screen) fprintf(screen,fmt,time_loop,ntasks,update->nsteps, + atom->natoms,cpu_loop,nprocs,comm->nthreads); + if (logfile) fprintf(logfile,fmt,time_loop,ntasks,update->nsteps, + atom->natoms,cpu_loop,nprocs,comm->nthreads); +#else + const char fmt[] = "Loop time of %g on %d procs " + "for %d steps with " BIGINT_FORMAT " atoms\n" + "%6.1f%% CPU use with %d MPI tasks\n"; + if (screen) fprintf(screen,fmt,time_loop,ntasks,update->nsteps, + atom->natoms,cpu_loop,nprocs); + if (logfile) fprintf(logfile,fmt,time_loop,ntasks,update->nsteps, + atom->natoms,cpu_loop,nprocs); +#endif + + // Gromacs/NAMD-like performance metric for MD with suitable units if ( timeflag && !minflag && !prdflag && !tadflag && (update->nsteps > 0) && @@ -187,7 +201,8 @@ void Finish::end(int flag) (strcmp(update->unit_style,"real") == 0)) ) { float t_step, ns_day, hrs_ns, tps, one_fs = 1.0; - // conversion factor to femtoseconds + // conversion factor to femtoseconds for suitable units + if (strcmp(update->unit_style,"metal") == 0) one_fs = 0.001; t_step = (float)time_loop / ((float) update->nsteps); tps = 1.0/t_step; @@ -200,10 +215,23 @@ void Finish::end(int flag) if (logfile) fprintf(logfile, perf, ns_day, hrs_ns, tps); } } - - if (time_loop == 0.0) time_loop = 1.0; } + // avoid division by zero for very short runs + + if (time_loop == 0.0) time_loop = 1.0; + if (cpu_loop == 0.0) cpu_loop = 100.0; + + // get "Other" wall time for later use + + if (timer->has_normal()) { + time_other = timer->get_wall(Timer::TOTAL) - + (timer->get_wall(Timer::PAIR) + timer->get_wall(Timer::BOND) + + timer->get_wall(Timer::KSPACE) + timer->get_wall(Timer::NEIGH) + + timer->get_wall(Timer::COMM) + timer->get_wall(Timer::OUTPUT) + + timer->get_wall(Timer::MODIFY) + timer->get_wall(Timer::SYNC)); + } + // minimization stats if (minflag) { diff --git a/src/timer.cpp b/src/timer.cpp index a3c72c6a4c..d8880e20b6 100644 --- a/src/timer.cpp +++ b/src/timer.cpp @@ -114,7 +114,8 @@ void Timer::barrier_start() MPI_Barrier(world); if (_level < LOOP) return; - if (_level > NORMAL) current_cpu = CPU_Time(); + + current_cpu = CPU_Time(); current_wall = MPI_Wtime(); cpu_array[TOTAL] = current_cpu; @@ -132,7 +133,8 @@ void Timer::barrier_stop() MPI_Barrier(world); if (_level < LOOP) return; - if (_level > NORMAL) current_cpu = CPU_Time(); + + current_cpu = CPU_Time(); current_wall = MPI_Wtime(); cpu_array[TOTAL] = current_cpu - cpu_array[TOTAL];