also print atom-step/s performance metric
This commit is contained in:
@ -16,46 +16,47 @@ simulation. An example set of statistics is shown here:
|
||||
|
||||
.. parsed-literal::
|
||||
|
||||
Loop time of 2.81192 on 4 procs for 300 steps with 2004 atoms
|
||||
Loop time of 0.942801 on 4 procs for 300 steps with 2004 atoms
|
||||
|
||||
Performance: 18.436 ns/day 1.302 hours/ns 106.689 timesteps/s
|
||||
97.0% CPU use with 4 MPI tasks x no OpenMP threads
|
||||
Performance: 54.985 ns/day, 0.436 hours/ns, 318.201 timesteps/s, 637.674 katom-step/s
|
||||
195.2% CPU use with 2 MPI tasks x 2 OpenMP threads
|
||||
|
||||
MPI task timings breakdown:
|
||||
MPI task timing breakdown:
|
||||
Section \| min time \| avg time \| max time \|%varavg\| %total
|
||||
---------------------------------------------------------------
|
||||
Pair \| 1.9808 \| 2.0134 \| 2.0318 \| 1.4 \| 71.60
|
||||
Bond \| 0.0021894 \| 0.0060319 \| 0.010058 \| 4.7 \| 0.21
|
||||
Kspace \| 0.3207 \| 0.3366 \| 0.36616 \| 3.1 \| 11.97
|
||||
Neigh \| 0.28411 \| 0.28464 \| 0.28516 \| 0.1 \| 10.12
|
||||
Comm \| 0.075732 \| 0.077018 \| 0.07883 \| 0.4 \| 2.74
|
||||
Output \| 0.00030518 \| 0.00042665 \| 0.00078821 \| 1.0 \| 0.02
|
||||
Modify \| 0.086606 \| 0.086631 \| 0.086668 \| 0.0 \| 3.08
|
||||
Other \| \| 0.007178 \| \| \| 0.26
|
||||
Pair \| 0.61419 \| 0.62872 \| 0.64325 \| 1.8 \| 66.69
|
||||
Bond \| 0.0028608 \| 0.0028899 \| 0.002919 \| 0.1 \| 0.31
|
||||
Kspace \| 0.12652 \| 0.14048 \| 0.15444 \| 3.7 \| 14.90
|
||||
Neigh \| 0.10242 \| 0.10242 \| 0.10242 \| 0.0 \| 10.86
|
||||
Comm \| 0.026753 \| 0.027593 \| 0.028434 \| 0.5 \| 2.93
|
||||
Output \| 0.00018341 \| 0.00030942 \| 0.00043542 \| 0.0 \| 0.03
|
||||
Modify \| 0.039117 \| 0.039348 \| 0.039579 \| 0.1 \| 4.17
|
||||
Other \| \| 0.001041 \| \| \| 0.11
|
||||
|
||||
Nlocal: 501 ave 508 max 490 min
|
||||
Histogram: 1 0 0 0 0 0 1 1 0 1
|
||||
Nghost: 6586.25 ave 6628 max 6548 min
|
||||
Histogram: 1 0 1 0 0 0 1 0 0 1
|
||||
Neighs: 177007 ave 180562 max 170212 min
|
||||
Histogram: 1 0 0 0 0 0 0 1 1 1
|
||||
Nlocal: 1002 ave 1006 max 998 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 1
|
||||
Nghost: 8670.5 ave 8691 max 8650 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 1
|
||||
Neighs: 354010 ave 357257 max 350763 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 1
|
||||
|
||||
Total # of neighbors = 708028
|
||||
Ave neighs/atom = 353.307
|
||||
Ave special neighs/atom = 2.34032
|
||||
Total # of neighbors = 708020
|
||||
Ave neighs/atom = 353.30339
|
||||
Ave special neighs/atom = 2.3403194
|
||||
Neighbor list builds = 26
|
||||
Dangerous builds = 0
|
||||
|
||||
----------
|
||||
|
||||
The first section provides a global loop timing summary. The *loop
|
||||
time* is the total wall-clock time for the simulation to run. The
|
||||
*Performance* line is provided for convenience to help predict how
|
||||
long it will take to run a desired physical simulation. The *CPU use*
|
||||
line provides the CPU utilization per MPI task; it should be close to
|
||||
100% times the number of OpenMP threads (or 1 of not using OpenMP).
|
||||
Lower numbers correspond to delays due to file I/O or insufficient
|
||||
thread utilization.
|
||||
The first section provides a global loop timing summary. The *loop time*
|
||||
is the total wall-clock time for the simulation to run. The
|
||||
*Performance* line is provided for convenience to help predict how long
|
||||
it will take to run a desired physical simulation and to have numbers
|
||||
useful for performance comparison between different simulation settings
|
||||
or system sizes. The *CPU use* line provides the CPU utilization per
|
||||
MPI task; it should be close to 100% times the number of OpenMP threads
|
||||
(or 1 of not using OpenMP). Lower numbers correspond to delays due to
|
||||
file I/O or insufficient thread utilization.
|
||||
|
||||
----------
|
||||
|
||||
|
||||
@ -141,21 +141,31 @@ void Finish::end(int flag)
|
||||
(strcmp(update->unit_style,"real") == 0))) {
|
||||
double one_fs = force->femtosecond;
|
||||
double t_step = ((double) time_loop) / ((double) update->nsteps);
|
||||
double step_t = 1.0/t_step;
|
||||
double step_t = 1.0 / t_step;
|
||||
double atomstep_s = (double)atom->natoms * step_t;
|
||||
std::string atomstep_u = "atom-step/s";
|
||||
if (atomstep_s > 1000000.0) {
|
||||
atomstep_u = "Matom-step/s";
|
||||
atomstep_s /= 1000000.0;
|
||||
} else if (atomstep_s > 1000.0) {
|
||||
atomstep_u = "katom-step/s";
|
||||
atomstep_s /= 1000.0;
|
||||
}
|
||||
|
||||
if (strcmp(update->unit_style,"lj") == 0) {
|
||||
double tau_day = 24.0*3600.0 / t_step * update->dt / one_fs;
|
||||
utils::logmesg(lmp,"Performance: {:.3f} tau/day, {:.3f} timesteps/s\n",tau_day,step_t);
|
||||
utils::logmesg(lmp, "Performance: {:.3f} tau/day, {:.3f} timesteps/s, {:.3f} {}\n",
|
||||
tau_day, step_t, atomstep_s, atomstep_u);
|
||||
} else if (strcmp(update->unit_style,"electron") == 0) {
|
||||
double hrs_fs = t_step / update->dt * one_fs / 3600.0;
|
||||
double fs_day = 24.0*3600.0 / t_step * update->dt / one_fs;
|
||||
utils::logmesg(lmp,"Performance: {:.3f} fs/day, {:.3f} hours/fs, "
|
||||
"{:.3f} timesteps/s\n",fs_day,hrs_fs,step_t);
|
||||
utils::logmesg(lmp,"Performance: {:.3f} fs/day, {:.3f} hours/fs, {:.3f} timesteps/s, "
|
||||
"{:.3f} {}\n", fs_day, hrs_fs, step_t, atomstep_s, atomstep_u);
|
||||
} else {
|
||||
double hrs_ns = t_step / update->dt * 1000000.0 * one_fs / 3600.0;
|
||||
double ns_day = 24.0*3600.0 / t_step * update->dt / one_fs/1000000.0;
|
||||
utils::logmesg(lmp,"Performance: {:.3f} ns/day, {:.3f} hours/ns, "
|
||||
"{:.3f} timesteps/s\n",ns_day,hrs_ns,step_t);
|
||||
utils::logmesg(lmp,"Performance: {:.3f} ns/day, {:.3f} hours/ns, {:.3f} timesteps/s, "
|
||||
"{:.3f} {}\n", ns_day, hrs_ns, step_t, atomstep_s, atomstep_u);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user