From 9d081a5916b4baad9fdf516baf0959e77df13e47 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 19 Oct 2022 07:39:56 -0400 Subject: [PATCH] more adjustments for bogus timer results on Intel OpenCL --- lib/gpu/lal_device.cpp | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/lib/gpu/lal_device.cpp b/lib/gpu/lal_device.cpp index 9fbd988bc3..8a4784d309 100644 --- a/lib/gpu/lal_device.cpp +++ b/lib/gpu/lal_device.cpp @@ -794,6 +794,7 @@ void DeviceT::output_times(UCL_Timer &time_pair, Answer &ans, #ifdef USE_OPENCL // Workaround for timing issue on Intel OpenCL if (times[0] > 80e6) times[0]=0.0; + if (times[1] > 80e6) times[1]=0.0; if (times[3] > 80e6) times[3]=0.0; if (times[5] > 80e6) times[5]=0.0; #endif @@ -807,9 +808,8 @@ void DeviceT::output_times(UCL_Timer &time_pair, Answer &ans, fprintf(screen,"--------------------------------\n"); if (time_device() && (times[3] > 0.0)) { - if (times[0] > 0.0) - fprintf(screen,"Data Transfer: %.4f s.\n",times[0]/_replica_size); - fprintf(screen,"Neighbor copy: %.4f s.\n",times[1]/_replica_size); + if (times[0] > 0.0) fprintf(screen,"Data Transfer: %.4f s.\n",times[0]/_replica_size); + if (times[1] > 0.0) fprintf(screen,"Neighbor copy: %.4f s.\n",times[1]/_replica_size); if (nbor.gpu_nbor() > 0.0) fprintf(screen,"Neighbor build: %.4f s.\n",times[2]/_replica_size); else @@ -863,32 +863,34 @@ void DeviceT::output_kspace_times(UCL_Timer &time_in, double max_mb=mpi_max_bytes/(1024.0*1024.0); #ifdef USE_OPENCL // Workaround for timing issue on Intel OpenCL + if (times[0] > 80e6) times[0]=0.0; + if (times[1] > 80e6) times[1]=0.0; if (times[3] > 80e6) times[3]=0.0; + if (times[5] > 80e6) times[5]=0.0; #endif if (replica_me()==0) - if (screen && times[6]>0.0) { + if (screen && (times[6] > 0.0)) { fprintf(screen,"\n\n-------------------------------------"); fprintf(screen,"--------------------------------\n"); fprintf(screen," Device Time Info (average) for kspace: "); fprintf(screen,"\n-------------------------------------"); fprintf(screen,"--------------------------------\n"); - if (time_device() && times[3]>0) { - fprintf(screen,"Data Out: %.4f s.\n",times[0]/_replica_size); - fprintf(screen,"Data In: %.4f s.\n",times[1]/_replica_size); + if (time_device() && (times[3] > 0.0)) { + if (times[0] > 0.0) fprintf(screen,"Data Out: %.4f s.\n",times[0]/_replica_size); + if (times[1] > 0.0) fprintf(screen,"Data In: %.4f s.\n",times[1]/_replica_size); fprintf(screen,"Kernel (map): %.4f s.\n",times[2]/_replica_size); fprintf(screen,"Kernel (rho): %.4f s.\n",times[3]/_replica_size); fprintf(screen,"Force interp: %.4f s.\n",times[4]/_replica_size); - fprintf(screen,"Total rho: %.4f s.\n", - (times[0]+times[2]+times[3])/_replica_size); - fprintf(screen,"Total interp: %.4f s.\n", - (times[1]+times[4])/_replica_size); - fprintf(screen,"Force copy: %.4f s.\n",times[5]/_replica_size); + if (times[0] > 0.0) + fprintf(screen,"Total rho: %.4f s.\n", (times[0]+times[2]+times[3])/_replica_size); + if (times[1] > 0.0) + fprintf(screen,"Total interp: %.4f s.\n", (times[1]+times[4])/_replica_size); + if (times[5] > 0.0) fprintf(screen,"Force copy: %.4f s.\n",times[5]/_replica_size); fprintf(screen,"Total: %.4f s.\n", - (times[0]+times[1]+times[2]+times[3]+times[4]+times[5])/ - _replica_size); + (times[0]+times[1]+times[2]+times[3]+times[4]+times[5])/_replica_size); } fprintf(screen,"CPU Poisson: %.4f s.\n",times[6]/_replica_size); fprintf(screen,"CPU Data Cast: %.4f s.\n",times[8]/_replica_size);