From d27ab47ed9a3b16adb1a7781ff2d9dc96cae9b71 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 17 Jul 2023 18:32:18 -0400 Subject: [PATCH 1/4] for clarity --- lib/gpu/lal_device.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/gpu/lal_device.cpp b/lib/gpu/lal_device.cpp index cbf3f5f885..57c4b96b39 100644 --- a/lib/gpu/lal_device.cpp +++ b/lib/gpu/lal_device.cpp @@ -29,7 +29,7 @@ const char *ocl_prefetch_test = " #if (NBOR_PREFETCH == 1) \n"\ " inline void ucl_prefetch(const __global int *p) { prefetch(p, 1); } \n"\ -" #else \n"\ +" #elif (NBOR_PREFETCH == 2) \n"\ " enum LSC_LDCC {LSC_LDCC_DEFAULT, LSC_LDCC_L1UC_L3UC, LSC_LDCC_L1UC_L3C, \n"\ " LSC_LDCC_L1C_L3UC, LSC_LDCC_L1C_L3C, LSC_LDCC_L1S_L3UC, \n"\ " LSC_LDCC_L1S_L3C, LSC_LDCC_L1IAR_L3C, }; \n"\ From fa4a3a0622aeb00285a0a4299e97cc2a0465a992 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 17 Jul 2023 18:33:21 -0400 Subject: [PATCH 2/4] using NBOR_PREFETCH=2 gives errors with Intel Xe GPUs set to 0 --- lib/gpu/lal_pre_ocl_config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/gpu/lal_pre_ocl_config.h b/lib/gpu/lal_pre_ocl_config.h index a854b223ba..d5cd66feca 100644 --- a/lib/gpu/lal_pre_ocl_config.h +++ b/lib/gpu/lal_pre_ocl_config.h @@ -43,7 +43,7 @@ const char * ocl_config_strings[] = "NVIDIA_GPU,203,32,32,1,1,4,8,2,256,256,128,64,128,8,128,11,128,8,0", "AMD_GPU,403,64,64,0,1,4,8,2,256,256,128,64,128,8,128,11,128,8,0", #ifdef _SINGLE_SINGLE - "INTEL_GPU,500,8,32,1,1,4,8,2,128,128,128,128,64,8,128,8,128,8,2", + "INTEL_GPU,500,8,32,1,1,4,8,2,128,128,128,128,64,8,128,8,128,8,0", "APPLE_GPU,600,16,16,0,1,4,8,1,64,64,64,64,64,8,128,8,128,8,0", #else "INTEL_GPU,500,8,32,1,1,2,8,2,128,128,128,128,64,8,128,8,128,8,2", From 74c5ca0996ce14a5a1c8514a276761d1538ccc0a Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 17 Jul 2023 18:34:03 -0400 Subject: [PATCH 3/4] move definition of struct containing double to avoid failure on Intel Xe GPU --- lib/gpu/lal_preprocessor.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/gpu/lal_preprocessor.h b/lib/gpu/lal_preprocessor.h index d3e2481646..93d6936f38 100644 --- a/lib/gpu/lal_preprocessor.h +++ b/lib/gpu/lal_preprocessor.h @@ -333,12 +333,12 @@ inline void ucl_prefetch(const __global int *p) { struct _lgpu_float3 { float x; float y; float z; }; -struct _lgpu_double3 { - double x; double y; double z; -}; #ifdef _SINGLE_SINGLE #define acctyp3 struct _lgpu_float3 #else +struct _lgpu_double3 { + double x; double y; double z; +}; #define acctyp3 struct _lgpu_double3 #endif From 021eeae8606ac386461fffe52993d03400ebc0d6 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 18 Jul 2023 13:24:11 -0400 Subject: [PATCH 4/4] add workaround for failing overhead estimation test on (Intel) GPUs that only support single precision --- lib/gpu/lal_device.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/gpu/lal_device.cpp b/lib/gpu/lal_device.cpp index 57c4b96b39..70ba373a65 100644 --- a/lib/gpu/lal_device.cpp +++ b/lib/gpu/lal_device.cpp @@ -745,7 +745,14 @@ void DeviceT::estimate_gpu_overhead(const int kernel_calls, gpu_overhead=0.0; gpu_driver_overhead=0.0; - for (int z=0; z<11; z++) { + // TODO: XXX + // The following estimation currently fails on Intel GPUs + // that do not support double precision with OpenCL error code -5. + // Until we have a better solution, we just skip this test in this case. + int zloops = 11; + if (!gpu->double_precision()) zloops = 0; + + for (int z=0; z < zloops; z++) { gpu->sync(); gpu_barrier(); over_timer.start();