git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@13937 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
455
src/finish.cpp
455
src/finish.cpp
@ -16,9 +16,10 @@
|
||||
#include "string.h"
|
||||
#include "stdio.h"
|
||||
#include "finish.h"
|
||||
#include "lammps.h"
|
||||
#include "timer.h"
|
||||
#include "universe.h"
|
||||
#include "accelerator_kokkos.h"
|
||||
#include "accelerator_omp.h"
|
||||
#include "atom.h"
|
||||
#include "atom_vec.h"
|
||||
#include "molecule.h"
|
||||
@ -30,12 +31,28 @@
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "timer.h"
|
||||
#include "output.h"
|
||||
#include "memory.h"
|
||||
|
||||
#ifdef LMP_USER_OMP
|
||||
#include "modify.h"
|
||||
#include "fix_omp.h"
|
||||
#include "thr_data.h"
|
||||
#endif
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
// local function prototypes, code at end of file
|
||||
|
||||
static void mpi_timings(const char *label, Timer *t, enum Timer::ttype tt,
|
||||
MPI_Comm world, const int nprocs, const int nthreads,
|
||||
const int me, double time_loop, FILE *scr, FILE *log);
|
||||
|
||||
#ifdef LMP_USER_OMP
|
||||
static void omp_times(FixOMP *fix, const char *label, enum Timer::ttype which,
|
||||
const int nthreads,FILE *scr, FILE *log);
|
||||
#endif
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
Finish::Finish(LAMMPS *lmp) : Pointers(lmp) {}
|
||||
@ -46,14 +63,16 @@ void Finish::end(int flag)
|
||||
{
|
||||
int i,m,nneigh,nneighfull;
|
||||
int histo[10];
|
||||
int loopflag,minflag,prdflag,tadflag,timeflag,fftflag,histoflag,neighflag;
|
||||
int minflag,prdflag,tadflag,timeflag,fftflag,histoflag,neighflag;
|
||||
double time,tmp,ave,max,min;
|
||||
double time_loop,time_other;
|
||||
double time_loop,time_other,cpu_loop;
|
||||
|
||||
int me,nprocs;
|
||||
MPI_Comm_rank(world,&me);
|
||||
MPI_Comm_size(world,&nprocs);
|
||||
|
||||
const int nthreads = comm->nthreads;
|
||||
|
||||
// recompute natoms in case atoms have been lost
|
||||
|
||||
bigint nblocal = atom->nlocal;
|
||||
@ -67,8 +86,8 @@ void Finish::end(int flag)
|
||||
// flag = 3 = TAD
|
||||
// turn off neighflag for Kspace partition of verlet/split integrator
|
||||
|
||||
loopflag = 1;
|
||||
minflag = prdflag = tadflag = timeflag = fftflag = histoflag = neighflag = 0;
|
||||
time_loop = cpu_loop = time_other = 0.0;
|
||||
|
||||
if (flag == 1) {
|
||||
if (update->whichflag == 2) minflag = 1;
|
||||
@ -80,53 +99,88 @@ void Finish::end(int flag)
|
||||
if (force->kspace && force->kspace_match("pppm",0)
|
||||
&& force->kspace->fftbench) fftflag = 1;
|
||||
}
|
||||
if (flag == 2) prdflag = histoflag = neighflag = 1;
|
||||
if (flag == 2) prdflag = timeflag = histoflag = neighflag = 1;
|
||||
if (flag == 3) tadflag = histoflag = neighflag = 1;
|
||||
|
||||
// loop stats
|
||||
|
||||
if (loopflag) {
|
||||
time_other = timer->array[TIME_LOOP] -
|
||||
(timer->array[TIME_PAIR] + timer->array[TIME_BOND] +
|
||||
timer->array[TIME_KSPACE] + timer->array[TIME_NEIGHBOR] +
|
||||
timer->array[TIME_COMM] + timer->array[TIME_OUTPUT]);
|
||||
|
||||
time_loop = timer->array[TIME_LOOP];
|
||||
MPI_Allreduce(&time_loop,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
time_loop = tmp/nprocs;
|
||||
|
||||
if (timer->has_loop()) {
|
||||
|
||||
// overall loop time
|
||||
|
||||
#if defined(_OPENMP)
|
||||
time_loop = timer->get_wall(Timer::TOTAL);
|
||||
cpu_loop = timer->get_cpu(Timer::TOTAL);
|
||||
MPI_Allreduce(&time_loop,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
time_loop = tmp/nprocs;
|
||||
MPI_Allreduce(&cpu_loop,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
cpu_loop = tmp/nprocs;
|
||||
if (time_loop > 0.0) cpu_loop = cpu_loop/time_loop*100.0;
|
||||
|
||||
if (me == 0) {
|
||||
int ntasks = nprocs * comm->nthreads;
|
||||
if (screen) fprintf(screen,
|
||||
"Loop time of %g on %d procs (%d MPI x %d OpenMP) "
|
||||
"for %d steps with " BIGINT_FORMAT " atoms\n",
|
||||
time_loop,ntasks,nprocs,comm->nthreads,
|
||||
update->nsteps,atom->natoms);
|
||||
if (logfile) fprintf(logfile,
|
||||
"Loop time of %g on %d procs (%d MPI x %d OpenMP) "
|
||||
"for %d steps with " BIGINT_FORMAT " atoms\n",
|
||||
time_loop,ntasks,nprocs,comm->nthreads,
|
||||
update->nsteps,atom->natoms);
|
||||
}
|
||||
int ntasks = nprocs * nthreads;
|
||||
const char fmt1[] = "Loop time of %g on %d procs "
|
||||
"for %d steps with " BIGINT_FORMAT " atoms\n\n";
|
||||
if (screen) fprintf(screen,fmt1,time_loop,ntasks,update->nsteps,
|
||||
atom->natoms,cpu_loop);
|
||||
if (logfile) fprintf(logfile,fmt1,time_loop,ntasks,update->nsteps,
|
||||
atom->natoms,cpu_loop);
|
||||
|
||||
// Gromacs/NAMD-style performance metric for suitable unit settings
|
||||
|
||||
if ( timeflag && !minflag && !prdflag && !tadflag &&
|
||||
(update->nsteps > 0) && (update->dt != 0.0) &&
|
||||
((strcmp(update->unit_style,"lj") == 0) ||
|
||||
(strcmp(update->unit_style,"metal") == 0) ||
|
||||
(strcmp(update->unit_style,"micro") == 0) ||
|
||||
(strcmp(update->unit_style,"nano") == 0) ||
|
||||
(strcmp(update->unit_style,"electron") == 0) ||
|
||||
(strcmp(update->unit_style,"real") == 0)) ) {
|
||||
double one_fs = force->femtosecond;
|
||||
double t_step = ((double) time_loop) / ((double) update->nsteps);
|
||||
double step_t = 1.0/t_step;
|
||||
|
||||
if (strcmp(update->unit_style,"lj") == 0) {
|
||||
double tau_day = 24.0*3600.0 / t_step * update->dt / one_fs;
|
||||
const char perf[] = "Performance: %.3f tau/day, %.3f timesteps/s\n";
|
||||
if (screen) fprintf(screen,perf,tau_day,step_t);
|
||||
if (logfile) fprintf(logfile,perf,tau_day,step_t);
|
||||
} else {
|
||||
double hrs_ns = t_step / update->dt * 1000000.0 * one_fs / 3600.0;
|
||||
double ns_day = 24.0*3600.0 / t_step * update->dt / one_fs/1000000.0;
|
||||
const char perf[] =
|
||||
"Performance: %.3f ns/day, %.3f hours/ns, %.3f timesteps/s\n";
|
||||
if (screen) fprintf(screen,perf,ns_day,hrs_ns,step_t);
|
||||
if (logfile) fprintf(logfile,perf,ns_day,hrs_ns,step_t);
|
||||
}
|
||||
}
|
||||
|
||||
// CPU use on MPI tasks and OpenMP threads
|
||||
|
||||
#ifdef LMP_USER_OMP
|
||||
const char fmt2[] =
|
||||
"%.1f%% CPU use with %d MPI tasks x %d OpenMP threads\n";
|
||||
if (screen) fprintf(screen,fmt2,cpu_loop,nprocs,nthreads);
|
||||
if (logfile) fprintf(logfile,fmt2,cpu_loop,nprocs,nthreads);
|
||||
#else
|
||||
if (me == 0) {
|
||||
if (screen) fprintf(screen,
|
||||
"Loop time of %g on %d procs for %d steps with "
|
||||
BIGINT_FORMAT " atoms\n",
|
||||
time_loop,nprocs,update->nsteps,atom->natoms);
|
||||
if (logfile) fprintf(logfile,
|
||||
"Loop time of %g on %d procs for %d steps with "
|
||||
BIGINT_FORMAT " atoms\n",
|
||||
time_loop,nprocs,update->nsteps,atom->natoms);
|
||||
}
|
||||
const char fmt2[] =
|
||||
"%.1f%% CPU use with %d MPI tasks x no OpenMP threads\n";
|
||||
if (screen) fprintf(screen,fmt2,cpu_loop,nprocs);
|
||||
if (logfile) fprintf(logfile,fmt2,cpu_loop,nprocs);
|
||||
#endif
|
||||
|
||||
if (time_loop == 0.0) time_loop = 1.0;
|
||||
}
|
||||
}
|
||||
|
||||
// avoid division by zero for very short runs
|
||||
|
||||
if (time_loop == 0.0) time_loop = 1.0;
|
||||
if (cpu_loop == 0.0) cpu_loop = 100.0;
|
||||
|
||||
// get "Other" wall time for later use
|
||||
|
||||
if (timer->has_normal())
|
||||
time_other = timer->get_wall(Timer::TOTAL) - timer->get_wall(Timer::ALL);
|
||||
|
||||
// minimization stats
|
||||
|
||||
if (minflag) {
|
||||
@ -190,7 +244,7 @@ void Finish::end(int flag)
|
||||
if (screen) fprintf(screen,"PRD stats:\n");
|
||||
if (logfile) fprintf(logfile,"PRD stats:\n");
|
||||
|
||||
time = timer->array[TIME_PAIR];
|
||||
time = timer->get_wall(Timer::DEPHASE);
|
||||
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
time = tmp/nprocs;
|
||||
if (me == 0) {
|
||||
@ -202,7 +256,7 @@ void Finish::end(int flag)
|
||||
time,time/time_loop*100.0);
|
||||
}
|
||||
|
||||
time = timer->array[TIME_BOND];
|
||||
time = timer->get_wall(Timer::DYNAMICS);
|
||||
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
time = tmp/nprocs;
|
||||
if (me == 0) {
|
||||
@ -214,7 +268,7 @@ void Finish::end(int flag)
|
||||
time,time/time_loop*100.0);
|
||||
}
|
||||
|
||||
time = timer->array[TIME_KSPACE];
|
||||
time = timer->get_wall(Timer::QUENCH);
|
||||
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
time = tmp/nprocs;
|
||||
if (me == 0) {
|
||||
@ -226,10 +280,35 @@ void Finish::end(int flag)
|
||||
time,time/time_loop*100.0);
|
||||
}
|
||||
|
||||
time = time_other;
|
||||
time = timer->get_wall(Timer::REPCOMM);
|
||||
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
time = tmp/nprocs;
|
||||
if (me == 0) {
|
||||
if (screen)
|
||||
fprintf(screen," Comm time (%%) = %g (%g)\n",
|
||||
time,time/time_loop*100.0);
|
||||
if (logfile)
|
||||
fprintf(logfile," Comm time (%%) = %g (%g)\n",
|
||||
time,time/time_loop*100.0);
|
||||
}
|
||||
|
||||
|
||||
time = timer->get_wall(Timer::REPOUT);
|
||||
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
time = tmp/nprocs;
|
||||
if (me == 0) {
|
||||
if (screen)
|
||||
fprintf(screen," Output time (%%) = %g (%g)\n",
|
||||
time,time/time_loop*100.0);
|
||||
if (logfile)
|
||||
fprintf(logfile," Output time (%%) = %g (%g)\n",
|
||||
time,time/time_loop*100.0);
|
||||
}
|
||||
|
||||
time = time_other;
|
||||
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
time = tmp/nprocs;
|
||||
if (me == 0) { // XXXX: replica comm, replica output
|
||||
if (screen)
|
||||
fprintf(screen," Other time (%%) = %g (%g)\n",
|
||||
time,time/time_loop*100.0);
|
||||
@ -250,7 +329,7 @@ void Finish::end(int flag)
|
||||
if (screen) fprintf(screen,"TAD stats:\n");
|
||||
if (logfile) fprintf(logfile,"TAD stats:\n");
|
||||
|
||||
time = timer->array[TIME_PAIR];
|
||||
time = timer->get_wall(Timer::NEB);
|
||||
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
time = tmp/nprocs;
|
||||
if (me == 0) {
|
||||
@ -262,7 +341,7 @@ void Finish::end(int flag)
|
||||
time,time/time_loop*100.0);
|
||||
}
|
||||
|
||||
time = timer->array[TIME_BOND];
|
||||
time = timer->get_wall(Timer::DYNAMICS);
|
||||
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
time = tmp/nprocs;
|
||||
if (me == 0) {
|
||||
@ -274,7 +353,7 @@ void Finish::end(int flag)
|
||||
time,time/time_loop*100.0);
|
||||
}
|
||||
|
||||
time = timer->array[TIME_KSPACE];
|
||||
time = timer->get_wall(Timer::QUENCH);
|
||||
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
time = tmp/nprocs;
|
||||
if (me == 0) {
|
||||
@ -287,7 +366,7 @@ void Finish::end(int flag)
|
||||
}
|
||||
|
||||
|
||||
time = timer->array[TIME_COMM];
|
||||
time = timer->get_wall(Timer::REPCOMM);
|
||||
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
time = tmp/nprocs;
|
||||
if (me == 0) {
|
||||
@ -300,7 +379,7 @@ void Finish::end(int flag)
|
||||
}
|
||||
|
||||
|
||||
time = timer->array[TIME_OUTPUT];
|
||||
time = timer->get_wall(Timer::REPOUT);
|
||||
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
time = tmp/nprocs;
|
||||
if (me == 0) {
|
||||
@ -325,103 +404,110 @@ void Finish::end(int flag)
|
||||
}
|
||||
}
|
||||
|
||||
// timing breakdowns
|
||||
if (timeflag && timer->has_normal()) {
|
||||
|
||||
if (timeflag) {
|
||||
if (me == 0) {
|
||||
if (screen) fprintf(screen,"\n");
|
||||
if (logfile) fprintf(logfile,"\n");
|
||||
}
|
||||
|
||||
time = timer->array[TIME_PAIR];
|
||||
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
time = tmp/nprocs;
|
||||
if (me == 0) {
|
||||
if (screen)
|
||||
fprintf(screen,"Pair time (%%) = %g (%g)\n",
|
||||
time,time/time_loop*100.0);
|
||||
if (logfile)
|
||||
fprintf(logfile,"Pair time (%%) = %g (%g)\n",
|
||||
time,time/time_loop*100.0);
|
||||
}
|
||||
|
||||
if (atom->molecular) {
|
||||
time = timer->array[TIME_BOND];
|
||||
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
time = tmp/nprocs;
|
||||
if (timer->has_full()) {
|
||||
const char hdr[] = "\nMPI task timing breakdown:\n"
|
||||
"Section | min time | avg time | max time |%varavg| %CPU | %total\n"
|
||||
"-----------------------------------------------------------------------\n";
|
||||
if (me == 0) {
|
||||
if (screen)
|
||||
fprintf(screen,"Bond time (%%) = %g (%g)\n",
|
||||
time,time/time_loop*100.0);
|
||||
if (logfile)
|
||||
fprintf(logfile,"Bond time (%%) = %g (%g)\n",
|
||||
time,time/time_loop*100.0);
|
||||
if (screen) fputs(hdr,screen);
|
||||
if (logfile) fputs(hdr,logfile);
|
||||
}
|
||||
} else {
|
||||
const char hdr[] = "\nMPI task timing breakdown:\n"
|
||||
"Section | min time | avg time | max time |%varavg| %total\n"
|
||||
"---------------------------------------------------------------\n";
|
||||
if (me == 0) {
|
||||
if (screen) fputs(hdr,screen);
|
||||
if (logfile) fputs(hdr,logfile);
|
||||
}
|
||||
}
|
||||
|
||||
if (force->kspace) {
|
||||
time = timer->array[TIME_KSPACE];
|
||||
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
time = tmp/nprocs;
|
||||
if (me == 0) {
|
||||
if (screen)
|
||||
fprintf(screen,"Kspce time (%%) = %g (%g)\n",
|
||||
time,time/time_loop*100.0);
|
||||
if (logfile)
|
||||
fprintf(logfile,"Kspce time (%%) = %g (%g)\n",
|
||||
time,time/time_loop*100.0);
|
||||
}
|
||||
}
|
||||
mpi_timings("Pair",timer,Timer::PAIR, world,nprocs,
|
||||
nthreads,me,time_loop,screen,logfile);
|
||||
|
||||
time = timer->array[TIME_NEIGHBOR];
|
||||
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
time = tmp/nprocs;
|
||||
if (me == 0) {
|
||||
if (screen)
|
||||
fprintf(screen,"Neigh time (%%) = %g (%g)\n",
|
||||
time,time/time_loop*100.0);
|
||||
if (logfile)
|
||||
fprintf(logfile,"Neigh time (%%) = %g (%g)\n",
|
||||
time,time/time_loop*100.0);
|
||||
}
|
||||
if (atom->molecular)
|
||||
mpi_timings("Bond",timer,Timer::BOND,world,nprocs,
|
||||
nthreads,me,time_loop,screen,logfile);
|
||||
|
||||
if (force->kspace)
|
||||
mpi_timings("Kspace",timer,Timer::KSPACE,world,nprocs,
|
||||
nthreads,me,time_loop,screen,logfile);
|
||||
|
||||
time = timer->array[TIME_COMM];
|
||||
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
time = tmp/nprocs;
|
||||
if (me == 0) {
|
||||
if (screen)
|
||||
fprintf(screen,"Comm time (%%) = %g (%g)\n",
|
||||
time,time/time_loop*100.0);
|
||||
if (logfile)
|
||||
fprintf(logfile,"Comm time (%%) = %g (%g)\n",
|
||||
time,time/time_loop*100.0);
|
||||
}
|
||||
|
||||
time = timer->array[TIME_OUTPUT];
|
||||
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
time = tmp/nprocs;
|
||||
if (me == 0) {
|
||||
if (screen)
|
||||
fprintf(screen,"Outpt time (%%) = %g (%g)\n",
|
||||
time,time/time_loop*100.0);
|
||||
if (logfile)
|
||||
fprintf(logfile,"Outpt time (%%) = %g (%g)\n",
|
||||
time,time/time_loop*100.0);
|
||||
}
|
||||
mpi_timings("Neigh",timer,Timer::NEIGH,world,nprocs,
|
||||
nthreads,me,time_loop,screen,logfile);
|
||||
mpi_timings("Comm",timer,Timer::COMM,world,nprocs,
|
||||
nthreads,me,time_loop,screen,logfile);
|
||||
mpi_timings("Output",timer,Timer::OUTPUT,world,nprocs,
|
||||
nthreads,me,time_loop,screen,logfile);
|
||||
mpi_timings("Modify",timer,Timer::MODIFY,world,nprocs,
|
||||
nthreads,me,time_loop,screen,logfile);
|
||||
if (timer->has_sync())
|
||||
mpi_timings("Sync",timer,Timer::SYNC,world,nprocs,
|
||||
nthreads,me,time_loop,screen,logfile);
|
||||
|
||||
time = time_other;
|
||||
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
time = tmp/nprocs;
|
||||
|
||||
const char *fmt;
|
||||
if (timer->has_full())
|
||||
fmt = "Other | |%- 12.4g| | | |%6.2f\n";
|
||||
else
|
||||
fmt = "Other | |%- 12.4g| | |%6.2f\n";
|
||||
|
||||
if (me == 0) {
|
||||
if (screen)
|
||||
fprintf(screen,"Other time (%%) = %g (%g)\n",
|
||||
time,time/time_loop*100.0);
|
||||
if (logfile)
|
||||
fprintf(logfile,"Other time (%%) = %g (%g)\n",
|
||||
time,time/time_loop*100.0);
|
||||
if (screen) fprintf(screen,fmt,time,time/time_loop*100.0);
|
||||
if (logfile) fprintf(logfile,fmt,time,time/time_loop*100.0);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef LMP_USER_OMP
|
||||
const char thr_hdr_fmt[] =
|
||||
"\nThread timing breakdown (MPI rank %d):\nTotal threaded time %.4g / %.1f%%\n";
|
||||
const char thr_header[] =
|
||||
"Section | min time | avg time | max time |%varavg| %total\n"
|
||||
"---------------------------------------------------------------\n";
|
||||
|
||||
int ifix = modify->find_fix("package_omp");
|
||||
|
||||
// print thread breakdown only with full timer detail
|
||||
|
||||
if ((ifix >= 0) && timer->has_full() && me == 0) {
|
||||
double thr_total = 0.0;
|
||||
ThrData *td;
|
||||
FixOMP *fixomp = static_cast<FixOMP *>(lmp->modify->fix[ifix]);
|
||||
for (i=0; i < nthreads; ++i) {
|
||||
td = fixomp->get_thr(i);
|
||||
thr_total += td->get_time(Timer::ALL);
|
||||
}
|
||||
thr_total /= (double) nthreads;
|
||||
|
||||
if (thr_total > 0.0) {
|
||||
if (screen) {
|
||||
fprintf(screen,thr_hdr_fmt,me,thr_total,thr_total/time_loop*100.0);
|
||||
fputs(thr_header,screen);
|
||||
}
|
||||
if (logfile) {
|
||||
fprintf(logfile,thr_hdr_fmt,me,thr_total,thr_total/time_loop*100.0);
|
||||
fputs(thr_header,logfile);
|
||||
}
|
||||
|
||||
omp_times(fixomp,"Pair",Timer::PAIR,nthreads,screen,logfile);
|
||||
|
||||
if (atom->molecular)
|
||||
omp_times(fixomp,"Bond",Timer::BOND,nthreads,screen,logfile);
|
||||
|
||||
if (force->kspace)
|
||||
omp_times(fixomp,"Kspace",Timer::KSPACE,nthreads,screen,logfile);
|
||||
|
||||
omp_times(fixomp,"Neigh",Timer::NEIGH,nthreads,screen,logfile);
|
||||
omp_times(fixomp,"Reduce",Timer::COMM,nthreads,screen,logfile);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// FFT timing statistics
|
||||
// time3d,time1d = total time during run for 3d and 1d FFTs
|
||||
// loop on timing() until nsample FFTs require at least 1.0 CPU sec
|
||||
@ -459,7 +545,7 @@ void Finish::end(int flag)
|
||||
MPI_Allreduce(&time1d,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
time1d = tmp/nprocs;
|
||||
|
||||
double time_kspace = timer->array[TIME_KSPACE];
|
||||
double time_kspace = timer->get_wall(Timer::KSPACE);
|
||||
MPI_Allreduce(&time_kspace,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
time_kspace = tmp/nprocs;
|
||||
|
||||
@ -536,8 +622,10 @@ void Finish::end(int flag)
|
||||
neighbor->old_requests[m]->gran ||
|
||||
neighbor->old_requests[m]->respaouter ||
|
||||
neighbor->old_requests[m]->half_from_full) &&
|
||||
neighbor->old_requests[m]->skip == 0) {
|
||||
if (lmp->kokkos && lmp->kokkos->neigh_list_kokkos(m)) break;
|
||||
neighbor->old_requests[m]->skip == 0 &&
|
||||
neighbor->lists[m] && neighbor->lists[m]->numneigh) {
|
||||
if (!neighbor->lists[m] && lmp->kokkos &&
|
||||
lmp->kokkos->neigh_list_kokkos(m)) break;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
@ -584,13 +672,14 @@ void Finish::end(int flag)
|
||||
|
||||
nneighfull = 0;
|
||||
if (m < neighbor->old_nrequest) {
|
||||
if (neighbor->lists[m]) {
|
||||
if (neighbor->lists[m] && neighbor->lists[m]->numneigh) {
|
||||
int inum = neighbor->lists[m]->inum;
|
||||
int *ilist = neighbor->lists[m]->ilist;
|
||||
int *numneigh = neighbor->lists[m]->numneigh;
|
||||
for (i = 0; i < inum; i++)
|
||||
nneighfull += numneigh[ilist[i]];
|
||||
} else if (lmp->kokkos) nneighfull = lmp->kokkos->neigh_count(m);
|
||||
} else if (!neighbor->lists[m] && lmp->kokkos)
|
||||
nneighfull = lmp->kokkos->neigh_count(m);
|
||||
|
||||
tmp = nneighfull;
|
||||
stats(1,&tmp,&ave,&max,&min,10,histo);
|
||||
@ -622,7 +711,7 @@ void Finish::end(int flag)
|
||||
MPI_Allreduce(&tmp,&nall,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
|
||||
int nspec;
|
||||
double nspec_all;
|
||||
double nspec_all = 0;
|
||||
if (atom->molecular == 1) {
|
||||
int **nspecial = atom->nspecial;
|
||||
int nlocal = atom->nlocal;
|
||||
@ -731,3 +820,95 @@ void Finish::stats(int n, double *data,
|
||||
*pmax = max;
|
||||
*pmin = min;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
static void mpi_timings(const char *label, Timer *t, enum Timer::ttype tt,
|
||||
MPI_Comm world, const int nprocs, const int nthreads,
|
||||
const int me, double time_loop, FILE *scr, FILE *log)
|
||||
{
|
||||
double tmp, time_max, time_min, time_sq;
|
||||
double time = t->get_wall(tt);
|
||||
|
||||
double time_cpu = t->get_cpu(tt);
|
||||
if (time/time_loop < 0.001) // insufficient timer resolution!
|
||||
time_cpu = 1.0;
|
||||
else
|
||||
time_cpu = time_cpu / time;
|
||||
if (time_cpu > nthreads) time_cpu = nthreads;
|
||||
|
||||
MPI_Allreduce(&time,&time_min,1,MPI_DOUBLE,MPI_MIN,world);
|
||||
MPI_Allreduce(&time,&time_max,1,MPI_DOUBLE,MPI_MAX,world);
|
||||
time_sq = time*time;
|
||||
MPI_Allreduce(&time,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
time = tmp/nprocs;
|
||||
MPI_Allreduce(&time_sq,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
time_sq = tmp/nprocs;
|
||||
MPI_Allreduce(&time_cpu,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
time_cpu = tmp/nprocs*100.0;
|
||||
|
||||
// % variance from the average as measure of load imbalance
|
||||
if (time > 1.0e-10)
|
||||
time_sq = sqrt(time_sq/time - time)*100.0;
|
||||
else
|
||||
time_sq = 0.0;
|
||||
|
||||
|
||||
if (me == 0) {
|
||||
tmp = time/time_loop*100.0;
|
||||
if (t->has_full()) {
|
||||
const char fmt[] = "%-8s|%- 12.5g|%- 12.5g|%- 12.5g|%6.1f |%6.1f |%6.2f\n";
|
||||
if (scr)
|
||||
fprintf(scr,fmt,label,time_min,time,time_max,time_sq,time_cpu,tmp);
|
||||
if (log)
|
||||
fprintf(log,fmt,label,time_min,time,time_max,time_sq,time_cpu,tmp);
|
||||
time_loop = 100.0/time_loop;
|
||||
} else {
|
||||
const char fmt[] = "%-8s|%- 12.5g|%- 12.5g|%- 12.5g|%6.1f |%6.2f\n";
|
||||
if (scr)
|
||||
fprintf(scr,fmt,label,time_min,time,time_max,time_sq,tmp);
|
||||
if (log)
|
||||
fprintf(log,fmt,label,time_min,time,time_max,time_sq,tmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
#ifdef LMP_USER_OMP
|
||||
static void omp_times(FixOMP *fix, const char *label, enum Timer::ttype which,
|
||||
const int nthreads,FILE *scr, FILE *log)
|
||||
{
|
||||
const char fmt[] = "%-8s|%- 12.5g|%- 12.5g|%- 12.5g|%6.1f |%6.2f\n";
|
||||
double time_min, time_max, time_avg, time_total, time_std;
|
||||
|
||||
time_min = 1.0e100;
|
||||
time_max = -1.0e100;
|
||||
time_total = time_avg = time_std = 0.0;
|
||||
|
||||
for (int i=0; i < nthreads; ++i) {
|
||||
ThrData *thr = fix->get_thr(i);
|
||||
double tmp=thr->get_time(which);
|
||||
time_min = MIN(time_min,tmp);
|
||||
time_max = MAX(time_max,tmp);
|
||||
time_avg += tmp;
|
||||
time_std += tmp*tmp;
|
||||
time_total += thr->get_time(Timer::ALL);
|
||||
}
|
||||
|
||||
time_avg /= nthreads;
|
||||
time_std /= nthreads;
|
||||
time_total /= nthreads;
|
||||
|
||||
if (time_avg > 1.0e-10)
|
||||
time_std = sqrt(time_std/time_avg - time_avg)*100.0;
|
||||
else
|
||||
time_std = 0.0;
|
||||
|
||||
if (scr) fprintf(scr,fmt,label,time_min,time_avg,time_max,time_std,
|
||||
time_avg/time_total*100.0);
|
||||
if (log) fprintf(log,fmt,label,time_min,time_avg,time_max,time_std,
|
||||
time_avg/time_total*100.0);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
Reference in New Issue
Block a user