diff --git a/lib/gpu/lal_device.cpp b/lib/gpu/lal_device.cpp index cbf3f5f885..70ba373a65 100644 --- a/lib/gpu/lal_device.cpp +++ b/lib/gpu/lal_device.cpp @@ -29,7 +29,7 @@ const char *ocl_prefetch_test = " #if (NBOR_PREFETCH == 1) \n"\ " inline void ucl_prefetch(const __global int *p) { prefetch(p, 1); } \n"\ -" #else \n"\ +" #elif (NBOR_PREFETCH == 2) \n"\ " enum LSC_LDCC {LSC_LDCC_DEFAULT, LSC_LDCC_L1UC_L3UC, LSC_LDCC_L1UC_L3C, \n"\ " LSC_LDCC_L1C_L3UC, LSC_LDCC_L1C_L3C, LSC_LDCC_L1S_L3UC, \n"\ " LSC_LDCC_L1S_L3C, LSC_LDCC_L1IAR_L3C, }; \n"\ @@ -745,7 +745,14 @@ void DeviceT::estimate_gpu_overhead(const int kernel_calls, gpu_overhead=0.0; gpu_driver_overhead=0.0; - for (int z=0; z<11; z++) { + // TODO: XXX + // The following estimation currently fails on Intel GPUs + // that do not support double precision with OpenCL error code -5. + // Until we have a better solution, we just skip this test in this case. + int zloops = 11; + if (!gpu->double_precision()) zloops = 0; + + for (int z=0; z < zloops; z++) { gpu->sync(); gpu_barrier(); over_timer.start(); diff --git a/lib/gpu/lal_pre_ocl_config.h b/lib/gpu/lal_pre_ocl_config.h index a854b223ba..d5cd66feca 100644 --- a/lib/gpu/lal_pre_ocl_config.h +++ b/lib/gpu/lal_pre_ocl_config.h @@ -43,7 +43,7 @@ const char * ocl_config_strings[] = "NVIDIA_GPU,203,32,32,1,1,4,8,2,256,256,128,64,128,8,128,11,128,8,0", "AMD_GPU,403,64,64,0,1,4,8,2,256,256,128,64,128,8,128,11,128,8,0", #ifdef _SINGLE_SINGLE - "INTEL_GPU,500,8,32,1,1,4,8,2,128,128,128,128,64,8,128,8,128,8,2", + "INTEL_GPU,500,8,32,1,1,4,8,2,128,128,128,128,64,8,128,8,128,8,0", "APPLE_GPU,600,16,16,0,1,4,8,1,64,64,64,64,64,8,128,8,128,8,0", #else "INTEL_GPU,500,8,32,1,1,2,8,2,128,128,128,128,64,8,128,8,128,8,2", diff --git a/lib/gpu/lal_preprocessor.h b/lib/gpu/lal_preprocessor.h index d3e2481646..93d6936f38 100644 --- a/lib/gpu/lal_preprocessor.h +++ b/lib/gpu/lal_preprocessor.h @@ -333,12 +333,12 @@ inline void ucl_prefetch(const __global int *p) { struct _lgpu_float3 { float x; float y; float z; }; -struct _lgpu_double3 { - double x; double y; double z; -}; #ifdef _SINGLE_SINGLE #define acctyp3 struct _lgpu_float3 #else +struct _lgpu_double3 { + double x; double y; double z; +}; #define acctyp3 struct _lgpu_double3 #endif