GPU Package: Fixing logic in OpenCL backend that could result in unnecessary device allocations.
This commit is contained in:
@ -99,6 +99,7 @@ struct OCLProperties {
|
||||
int cl_device_version;
|
||||
bool has_subgroup_support;
|
||||
bool has_shuffle_support;
|
||||
bool shared_main_memory;
|
||||
};
|
||||
|
||||
/// Class for looking at data parallel device properties
|
||||
@ -226,7 +227,7 @@ class UCL_Device {
|
||||
inline bool shared_memory() { return shared_memory(_device); }
|
||||
/// Returns true if host memory is efficiently addressable from device
|
||||
inline bool shared_memory(const int i)
|
||||
{ return _shared_mem_device(_cl_devices[i]); }
|
||||
{ return _properties[i].shared_main_memory; }
|
||||
|
||||
/// Returns preferred vector width
|
||||
inline int preferred_fp32_width() { return preferred_fp32_width(_device); }
|
||||
@ -582,8 +583,9 @@ void UCL_Device::add_properties(cl_device_id device_list) {
|
||||
op.preferred_vector_width64=double_width;
|
||||
|
||||
// Determine if double precision is supported: All bits in the mask must be set.
|
||||
cl_device_fp_config double_mask = (CL_FP_FMA|CL_FP_ROUND_TO_NEAREST|CL_FP_ROUND_TO_ZERO|
|
||||
CL_FP_ROUND_TO_INF|CL_FP_INF_NAN|CL_FP_DENORM);
|
||||
cl_device_fp_config double_mask = (CL_FP_FMA|CL_FP_ROUND_TO_NEAREST|
|
||||
CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_INF|
|
||||
CL_FP_INF_NAN|CL_FP_DENORM);
|
||||
cl_device_fp_config double_avail;
|
||||
CL_SAFE_CALL(clGetDeviceInfo(device_list,CL_DEVICE_DOUBLE_FP_CONFIG,
|
||||
sizeof(double_avail),&double_avail,nullptr));
|
||||
@ -684,6 +686,7 @@ void UCL_Device::add_properties(cl_device_id device_list) {
|
||||
double arch = static_cast<double>(minor)/10+major;
|
||||
if (arch >= 3.0)
|
||||
op.has_shuffle_support=true;
|
||||
op.shared_main_memory=_shared_mem_device(device_list);
|
||||
}
|
||||
delete[] buffer2;
|
||||
#endif
|
||||
|
||||
@ -118,15 +118,19 @@ inline int _host_alloc(mat_type &mat, copy_type &cm, const size_t n,
|
||||
template <class mat_type, class copy_type>
|
||||
inline int _host_view(mat_type &mat, copy_type &cm, const size_t o,
|
||||
const size_t n) {
|
||||
cl_int error_flag;
|
||||
cl_buffer_region subbuffer;
|
||||
subbuffer.origin = o;
|
||||
subbuffer.size = n;
|
||||
mat.cbegin()=clCreateSubBuffer(cm.cbegin(), 0,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, &subbuffer,
|
||||
&error_flag);
|
||||
|
||||
CL_CHECK_ERR(error_flag);
|
||||
// When viewing outside host allocation with discrete main memory on accelerator,
|
||||
// no cl_buffer object is created to avoid unnecessary creation of device allocs
|
||||
if (cm.shared_mem_device()) {
|
||||
cl_int error_flag;
|
||||
cl_buffer_region subbuffer;
|
||||
subbuffer.origin = o;
|
||||
subbuffer.size = n;
|
||||
mat.cbegin()=clCreateSubBuffer(cm.cbegin(), 0,
|
||||
CL_BUFFER_CREATE_TYPE_REGION, &subbuffer,
|
||||
&error_flag);
|
||||
CL_CHECK_ERR(error_flag);
|
||||
} else
|
||||
mat.cbegin()=(cl_mem)0;
|
||||
CL_SAFE_CALL(clRetainCommandQueue(mat.cq()));
|
||||
return UCL_SUCCESS;
|
||||
}
|
||||
@ -170,10 +174,13 @@ inline int _host_alloc(mat_type &mat, UCL_Device &dev, const size_t n,
|
||||
|
||||
template <class mat_type>
|
||||
inline int _host_view(mat_type &mat, UCL_Device &dev, const size_t n) {
|
||||
cl_int error_flag;
|
||||
mat.cbegin()=clCreateBuffer(dev.context(), CL_MEM_USE_HOST_PTR,
|
||||
n,*mat.host_ptr(),&error_flag);
|
||||
CL_CHECK_ERR(error_flag);
|
||||
if (mat.shared_mem_device()) {
|
||||
cl_int error_flag;
|
||||
mat.cbegin()=clCreateBuffer(dev.context(), CL_MEM_USE_HOST_PTR,
|
||||
n,*mat.host_ptr(),&error_flag);
|
||||
CL_CHECK_ERR(error_flag);
|
||||
} else
|
||||
mat.cbegin()=(cl_mem)0;
|
||||
CL_SAFE_CALL(clRetainCommandQueue(mat.cq()));
|
||||
return UCL_SUCCESS;
|
||||
}
|
||||
@ -181,7 +188,10 @@ inline int _host_view(mat_type &mat, UCL_Device &dev, const size_t n) {
|
||||
template <class mat_type>
|
||||
inline void _host_free(mat_type &mat) {
|
||||
if (mat.cols()>0) {
|
||||
CL_DESTRUCT_CALL(clReleaseMemObject(mat.cbegin()));
|
||||
// When viewing outside host allocation with discrete main memory on accelerator,
|
||||
// no cl_buffer object is created to avoid unnecessary creation of device allocs
|
||||
if (mat.cbegin()!=(cl_mem)(0))
|
||||
CL_DESTRUCT_CALL(clReleaseMemObject(mat.cbegin()));
|
||||
CL_DESTRUCT_CALL(clReleaseCommandQueue(mat.cq()));
|
||||
}
|
||||
}
|
||||
|
||||
@ -75,13 +75,21 @@ class UCL_BaseMat {
|
||||
inline enum UCL_MEMOPT kind() const { return _kind; }
|
||||
|
||||
inline bool shared_mem_device() {
|
||||
#ifdef _OCL_MAT
|
||||
#ifndef _OCL_MAT
|
||||
return false;
|
||||
#else
|
||||
|
||||
#if defined(GERYON_FORCE_SHARED_MAIN_MEM_ON)
|
||||
return true;
|
||||
#elif defined(GERYON_FORCE_SHARED_MAIN_MEM_OFF)
|
||||
return false;
|
||||
#else
|
||||
cl_device_id device;
|
||||
CL_SAFE_CALL(clGetCommandQueueInfo(_cq,CL_QUEUE_DEVICE,
|
||||
sizeof(cl_device_id),&device,NULL));
|
||||
return _shared_mem_device(device);
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -140,7 +140,10 @@ class UCL_H_Mat : public UCL_BaseMat {
|
||||
_end=_array+_cols;
|
||||
#ifdef _OCL_MAT
|
||||
_carray=input.cbegin();
|
||||
CL_SAFE_CALL(clRetainMemObject(input.cbegin()));
|
||||
// When viewing outside host allocation with discrete main memory on accelerator,
|
||||
// no cl_buffer object is created to avoid unnecessary creation of device allocs
|
||||
if (_carray!=(cl_mem)(0))
|
||||
CL_SAFE_CALL(clRetainMemObject(input.cbegin()));
|
||||
CL_SAFE_CALL(clRetainCommandQueue(input.cq()));
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -139,7 +139,10 @@ class UCL_H_Vec : public UCL_BaseMat {
|
||||
_end=_array+_cols;
|
||||
#ifdef _OCL_MAT
|
||||
_carray=input.cbegin();
|
||||
CL_SAFE_CALL(clRetainMemObject(input.cbegin()));
|
||||
// When viewing outside host allocation with discrete main memory on accelerator,
|
||||
// no cl_buffer object is created to avoid unnecessary creation of device allocs
|
||||
if (_carray!=(cl_mem)(0))
|
||||
CL_SAFE_CALL(clRetainMemObject(input.cbegin()));
|
||||
CL_SAFE_CALL(clRetainCommandQueue(input.cq()));
|
||||
#endif
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user