From d27ab47ed9a3b16adb1a7781ff2d9dc96cae9b71 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Mon, 17 Jul 2023 18:32:18 -0400
Subject: [PATCH 1/4] for clarity

---
 lib/gpu/lal_device.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/gpu/lal_device.cpp b/lib/gpu/lal_device.cpp
index cbf3f5f885..57c4b96b39 100644
--- a/lib/gpu/lal_device.cpp
+++ b/lib/gpu/lal_device.cpp
@@ -29,7 +29,7 @@
 const char *ocl_prefetch_test =
 "  #if (NBOR_PREFETCH == 1)                                                \n"\
 "  inline void ucl_prefetch(const __global int *p) { prefetch(p, 1); }     \n"\
-"  #else                                                                   \n"\
+"  #elif (NBOR_PREFETCH == 2)                                              \n"\
 "  enum LSC_LDCC {LSC_LDCC_DEFAULT, LSC_LDCC_L1UC_L3UC, LSC_LDCC_L1UC_L3C, \n"\
 "                 LSC_LDCC_L1C_L3UC, LSC_LDCC_L1C_L3C, LSC_LDCC_L1S_L3UC,  \n"\
 "                 LSC_LDCC_L1S_L3C, LSC_LDCC_L1IAR_L3C, };                 \n"\

From fa4a3a0622aeb00285a0a4299e97cc2a0465a992 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Mon, 17 Jul 2023 18:33:21 -0400
Subject: [PATCH 2/4] using NBOR_PREFETCH=2 gives errors with Intel Xe GPUs set
 to 0

---
 lib/gpu/lal_pre_ocl_config.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/gpu/lal_pre_ocl_config.h b/lib/gpu/lal_pre_ocl_config.h
index a854b223ba..d5cd66feca 100644
--- a/lib/gpu/lal_pre_ocl_config.h
+++ b/lib/gpu/lal_pre_ocl_config.h
@@ -43,7 +43,7 @@ const char * ocl_config_strings[] =
    "NVIDIA_GPU,203,32,32,1,1,4,8,2,256,256,128,64,128,8,128,11,128,8,0",
    "AMD_GPU,403,64,64,0,1,4,8,2,256,256,128,64,128,8,128,11,128,8,0",
 #ifdef _SINGLE_SINGLE
-   "INTEL_GPU,500,8,32,1,1,4,8,2,128,128,128,128,64,8,128,8,128,8,2",
+   "INTEL_GPU,500,8,32,1,1,4,8,2,128,128,128,128,64,8,128,8,128,8,0",
    "APPLE_GPU,600,16,16,0,1,4,8,1,64,64,64,64,64,8,128,8,128,8,0",
 #else
    "INTEL_GPU,500,8,32,1,1,2,8,2,128,128,128,128,64,8,128,8,128,8,2",

From 74c5ca0996ce14a5a1c8514a276761d1538ccc0a Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Mon, 17 Jul 2023 18:34:03 -0400
Subject: [PATCH 3/4] move definition of struct containing double to avoid
 failure on Intel Xe GPU

---
 lib/gpu/lal_preprocessor.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/gpu/lal_preprocessor.h b/lib/gpu/lal_preprocessor.h
index d3e2481646..93d6936f38 100644
--- a/lib/gpu/lal_preprocessor.h
+++ b/lib/gpu/lal_preprocessor.h
@@ -333,12 +333,12 @@ inline void ucl_prefetch(const __global int *p) {
 struct _lgpu_float3 {
   float x; float y; float z;
 };
-struct _lgpu_double3 {
-  double x; double y; double z;
-};
 #ifdef _SINGLE_SINGLE
 #define acctyp3 struct _lgpu_float3
 #else
+struct _lgpu_double3 {
+  double x; double y; double z;
+};
 #define acctyp3 struct _lgpu_double3
 #endif
 

From 021eeae8606ac386461fffe52993d03400ebc0d6 Mon Sep 17 00:00:00 2001
From: Axel Kohlmeyer <akohlmey@gmail.com>
Date: Tue, 18 Jul 2023 13:24:11 -0400
Subject: [PATCH 4/4] add workaround for failing overhead estimation test on
 (Intel) GPUs that only support single precision

---
 lib/gpu/lal_device.cpp | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/lib/gpu/lal_device.cpp b/lib/gpu/lal_device.cpp
index 57c4b96b39..70ba373a65 100644
--- a/lib/gpu/lal_device.cpp
+++ b/lib/gpu/lal_device.cpp
@@ -745,7 +745,14 @@ void DeviceT::estimate_gpu_overhead(const int kernel_calls,
   gpu_overhead=0.0;
   gpu_driver_overhead=0.0;
 
-  for (int z=0; z<11; z++) {
+  // TODO: XXX
+  // The following estimation currently fails on Intel GPUs
+  // that do not support double precision with OpenCL error code -5.
+  // Until we have a better solution, we just skip this test in this case.
+  int zloops = 11;
+  if (!gpu->double_precision()) zloops = 0;
+
+  for (int z=0; z < zloops; z++) {
     gpu->sync();
     gpu_barrier();
     over_timer.start();