GPU Package: Fixing logic in OpenCL backend that could result in unnecessary device allocations.

2022-09-28 22:30:09 -07:00
parent 6e34d21b24
commit 71464d8314
5 changed files with 49 additions and 22 deletions
--- a/lib/gpu/geryon/ocl_device.h
+++ b/lib/gpu/geryon/ocl_device.h
@ -99,6 +99,7 @@ struct OCLProperties {
  int cl_device_version;
  bool has_subgroup_support;
  bool has_shuffle_support;
+  bool shared_main_memory;
 };

 /// Class for looking at data parallel device properties
@ -226,7 +227,7 @@ class UCL_Device {
  inline bool shared_memory() { return shared_memory(_device); }
  /// Returns true if host memory is efficiently addressable from device
  inline bool shared_memory(const int i)
-    { return _shared_mem_device(_cl_devices[i]); }
+    { return _properties[i].shared_main_memory; }

  /// Returns preferred vector width
  inline int preferred_fp32_width() { return preferred_fp32_width(_device); }
@ -582,8 +583,9 @@ void UCL_Device::add_properties(cl_device_id device_list) {
  op.preferred_vector_width64=double_width;

  // Determine if double precision is supported: All bits in the mask must be set.
-  cl_device_fp_config double_mask = (CL_FP_FMA|CL_FP_ROUND_TO_NEAREST|CL_FP_ROUND_TO_ZERO|
-                                     CL_FP_ROUND_TO_INF|CL_FP_INF_NAN|CL_FP_DENORM);
+  cl_device_fp_config double_mask = (CL_FP_FMA|CL_FP_ROUND_TO_NEAREST|
+                                     CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_INF|
+                                     CL_FP_INF_NAN|CL_FP_DENORM);
  cl_device_fp_config double_avail;
  CL_SAFE_CALL(clGetDeviceInfo(device_list,CL_DEVICE_DOUBLE_FP_CONFIG,
                               sizeof(double_avail),&double_avail,nullptr));
@ -684,6 +686,7 @@ void UCL_Device::add_properties(cl_device_id device_list) {
    double arch = static_cast<double>(minor)/10+major;
    if (arch >= 3.0)
      op.has_shuffle_support=true;
+    op.shared_main_memory=_shared_mem_device(device_list);
  }
  delete[] buffer2;
  #endif