diff --git a/lib/gpu/lal_base_ellipsoid.cpp b/lib/gpu/lal_base_ellipsoid.cpp index 2e22b2f602..fa060bea5a 100644 --- a/lib/gpu/lal_base_ellipsoid.cpp +++ b/lib/gpu/lal_base_ellipsoid.cpp @@ -224,7 +224,9 @@ void BaseEllipsoidT::output_times() { #ifdef USE_OPENCL // Workaround for timing issue on Intel OpenCL + if (times[0] > 80e6) times[0]=0.0; if (times[3] > 80e6) times[3]=0.0; + if (times[6] > 80e6) times[6]=0.0; #endif if (device->replica_me()==0) @@ -237,17 +239,18 @@ void BaseEllipsoidT::output_times() { fprintf(screen,"\n-------------------------------------"); fprintf(screen,"--------------------------------\n"); - if (device->procs_per_gpu()==1 && times[3]>0) { - fprintf(screen,"Data Transfer: %.4f s.\n",times[0]/replica_size); + if (device->procs_per_gpu()==1 && (times[3] > 0.0)) { + if (times[0] > 0.0) + fprintf(screen,"Data Transfer: %.4f s.\n",times[0]/replica_size); fprintf(screen,"Neighbor copy: %.4f s.\n",times[1]/replica_size); - if (nbor->gpu_nbor()>0) + if (nbor->gpu_nbor() > 0.0) fprintf(screen,"Neighbor build: %.4f s.\n",times[2]/replica_size); else fprintf(screen,"Neighbor unpack: %.4f s.\n",times[2]/replica_size); fprintf(screen,"Force calc: %.4f s.\n",times[3]/replica_size); fprintf(screen,"LJ calc: %.4f s.\n",times[4]/replica_size); } - if (times[6]>0) + if (times[6] > 0.0) fprintf(screen,"Device Overhead: %.4f s.\n",times[6]/replica_size); fprintf(screen,"Average split: %.4f.\n",avg_split); fprintf(screen,"Lanes / atom: %d.\n",_threads_per_atom); diff --git a/lib/gpu/lal_device.cpp b/lib/gpu/lal_device.cpp index e2b5b9cdb5..50046d8bdd 100644 --- a/lib/gpu/lal_device.cpp +++ b/lib/gpu/lal_device.cpp @@ -777,28 +777,30 @@ void DeviceT::output_times(UCL_Timer &time_pair, Answer &ans, #ifdef USE_OPENCL // Workaround for timing issue on Intel OpenCL + if (times[0] > 80e6) times[0]=0.0; if (times[3] > 80e6) times[3]=0.0; if (times[5] > 80e6) times[5]=0.0; #endif if (replica_me()==0) - if (screen && times[6]>0.0) { + if (screen && (times[6] > 0.0)) { fprintf(screen,"\n\n-------------------------------------"); fprintf(screen,"--------------------------------\n"); fprintf(screen," Device Time Info (average): "); fprintf(screen,"\n-------------------------------------"); fprintf(screen,"--------------------------------\n"); - if (time_device() && times[3]>0) { - fprintf(screen,"Data Transfer: %.4f s.\n",times[0]/_replica_size); + if (time_device() && (times[3] > 0.0)) { + if (times[0] > 0.0) + fprintf(screen,"Data Transfer: %.4f s.\n",times[0]/_replica_size); fprintf(screen,"Neighbor copy: %.4f s.\n",times[1]/_replica_size); - if (nbor.gpu_nbor()>0) + if (nbor.gpu_nbor() > 0.0) fprintf(screen,"Neighbor build: %.4f s.\n",times[2]/_replica_size); else fprintf(screen,"Neighbor unpack: %.4f s.\n",times[2]/_replica_size); fprintf(screen,"Force calc: %.4f s.\n",times[3]/_replica_size); } - if (times[5]>0) + if (times[5] > 0.0) fprintf(screen,"Device Overhead: %.4f s.\n",times[5]/_replica_size); fprintf(screen,"Average split: %.4f.\n",avg_split); fprintf(screen,"Lanes / atom: %d.\n",threads_per_atom);