Updated UCL_Device built with OpenCL to use platforms that support accelerators by default

This commit is contained in:
Trung Nguyen
2018-05-23 16:11:55 -05:00
parent 6c42c9b378
commit 341fa160fe
2 changed files with 102 additions and 68 deletions

View File

@ -609,7 +609,7 @@ $(OBJ_DIR)/lal_lj_expand_coul_long.o: $(ALL_H) lal_lj_expand_coul_long.h lal_lj_
$(OBJ_DIR)/lal_lj_expand_coul_long_ext.o: $(ALL_H) lal_lj_expand_coul_long.h lal_lj_expand_coul_long_ext.cpp lal_base_charge.h $(OBJ_DIR)/lal_lj_expand_coul_long_ext.o: $(ALL_H) lal_lj_expand_coul_long.h lal_lj_expand_coul_long_ext.cpp lal_base_charge.h
$(OCL) -o $@ -c lal_lj_expand_coul_long_ext.cpp -I$(OBJ_DIR) $(OCL) -o $@ -c lal_lj_expand_coul_long_ext.cpp -I$(OBJ_DIR)
$(BIN_DIR)/ocl_get_devices: ./geryon/ucl_get_devices.cpp $(BIN_DIR)/ocl_get_devices: ./geryon/ucl_get_devices.cpp $(OCL_H)
$(OCL) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_OPENCL $(OCL_LINK) $(OCL) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_OPENCL $(OCL_LINK)
$(OCL_LIB): $(OBJS) $(PTXS) $(OCL_LIB): $(OBJS) $(PTXS)

View File

@ -280,6 +280,9 @@ class UCL_Device {
/// Return the OpenCL type for the device /// Return the OpenCL type for the device
inline cl_device_id & cl_device() { return _cl_device; } inline cl_device_id & cl_device() { return _cl_device; }
/// Select the platform that has accelerators
inline void set_platform_accelerator(int pid=-1);
private: private:
int _num_platforms; // Number of platforms int _num_platforms; // Number of platforms
int _platform; // UCL_Device ID for current platform int _platform; // UCL_Device ID for current platform
@ -311,8 +314,8 @@ UCL_Device::UCL_Device() {
return; return;
} else } else
_num_platforms=static_cast<int>(nplatforms); _num_platforms=static_cast<int>(nplatforms);
// note that platform 0 may not necessarily be associated with accelerators
set_platform(0); set_platform_accelerator();
} }
UCL_Device::~UCL_Device() { UCL_Device::~UCL_Device() {
@ -320,6 +323,7 @@ UCL_Device::~UCL_Device() {
} }
void UCL_Device::clear() { void UCL_Device::clear() {
_properties.clear();
if (_device>-1) { if (_device>-1) {
for (size_t i=0; i<_cq.size(); i++) { for (size_t i=0; i<_cq.size(); i++) {
CL_DESTRUCT_CALL(clReleaseCommandQueue(_cq.back())); CL_DESTRUCT_CALL(clReleaseCommandQueue(_cq.back()));
@ -529,75 +533,105 @@ int UCL_Device::set(int num) {
return create_context(); return create_context();
} }
// List all devices along with all properties // List all devices from all platforms along with all properties
void UCL_Device::print_all(std::ostream &out) { void UCL_Device::print_all(std::ostream &out) {
if (num_devices() == 0) // --- loop through the platforms
out << "There is no device supporting OpenCL\n"; for (int n=0; n<_num_platforms; n++) {
for (int i=0; i<num_devices(); ++i) {
out << "\nDevice " << i << ": \"" << name(i).c_str() << "\"\n"; set_platform(n);
out << " Type of device: "
<< device_type_name(i).c_str() << std::endl; out << "\nPlatform " << n << ":\n";
out << " Double precision support: ";
if (double_precision(i)) if (num_devices() == 0)
out << "Yes\n"; out << "There is no device supporting OpenCL\n";
else for (int i=0; i<num_devices(); ++i) {
out << "No\n"; out << "\nDevice " << i << ": \"" << name(i).c_str() << "\"\n";
out << " Total amount of global memory: " out << " Type of device: "
<< gigabytes(i) << " GB\n"; << device_type_name(i).c_str() << std::endl;
out << " Number of compute units/multiprocessors: " out << " Double precision support: ";
<< _properties[i].compute_units << std::endl; if (double_precision(i))
//out << " Number of cores: " out << "Yes\n";
// << cores(i) << std::endl; else
out << " Total amount of constant memory: " out << "No\n";
<< _properties[i].const_mem << " bytes\n"; out << " Total amount of global memory: "
out << " Total amount of local/shared memory per block: " << gigabytes(i) << " GB\n";
<< _properties[i].shared_mem << " bytes\n"; out << " Number of compute units/multiprocessors: "
//out << " Total number of registers available per block: " << _properties[i].compute_units << std::endl;
// << _properties[i].regsPerBlock << std::endl; //out << " Number of cores: "
//out << " Warp size: " // << cores(i) << std::endl;
// << _properties[i].warpSize << std::endl; out << " Total amount of constant memory: "
out << " Maximum group size (# of threads per block) " << _properties[i].const_mem << " bytes\n";
<< _properties[i].work_group_size << std::endl; out << " Total amount of local/shared memory per block: "
out << " Maximum item sizes (# threads for each dim) " << _properties[i].shared_mem << " bytes\n";
<< _properties[i].work_item_size[0] << " x " //out << " Total number of registers available per block: "
<< _properties[i].work_item_size[1] << " x " // << _properties[i].regsPerBlock << std::endl;
<< _properties[i].work_item_size[2] << std::endl; //out << " Warp size: "
//out << " Maximum sizes of each dimension of a grid: " // << _properties[i].warpSize << std::endl;
// << _properties[i].maxGridSize[0] << " x " out << " Maximum group size (# of threads per block) "
// << _properties[i].maxGridSize[1] << " x " << _properties[i].work_group_size << std::endl;
// << _properties[i].maxGridSize[2] << std::endl; out << " Maximum item sizes (# threads for each dim) "
//out << " Maximum memory pitch: " << _properties[i].work_item_size[0] << " x "
// << _properties[i].memPitch) << " bytes\n"; << _properties[i].work_item_size[1] << " x "
//out << " Texture alignment: " << _properties[i].work_item_size[2] << std::endl;
// << _properties[i].textureAlignment << " bytes\n"; //out << " Maximum sizes of each dimension of a grid: "
out << " Clock rate: " // << _properties[i].maxGridSize[0] << " x "
<< clock_rate(i) << " GHz\n"; // << _properties[i].maxGridSize[1] << " x "
//out << " Concurrent copy and execution: "; // << _properties[i].maxGridSize[2] << std::endl;
out << " ECC support: "; //out << " Maximum memory pitch: "
if (_properties[i].ecc_support) // << _properties[i].memPitch) << " bytes\n";
out << "Yes\n"; //out << " Texture alignment: "
else // << _properties[i].textureAlignment << " bytes\n";
out << "No\n"; out << " Clock rate: "
out << " Device fission into equal partitions: "; << clock_rate(i) << " GHz\n";
if (fission_equal(i)) //out << " Concurrent copy and execution: ";
out << "Yes\n"; out << " ECC support: ";
else if (_properties[i].ecc_support)
out << "No\n"; out << "Yes\n";
out << " Device fission by counts: "; else
if (fission_by_counts(i)) out << "No\n";
out << "Yes\n"; out << " Device fission into equal partitions: ";
else if (fission_equal(i))
out << "No\n"; out << "Yes\n";
out << " Device fission by affinity: "; else
if (fission_by_affinity(i)) out << "No\n";
out << "Yes\n"; out << " Device fission by counts: ";
else if (fission_by_counts(i))
out << "No\n"; out << "Yes\n";
out << " Maximum subdevices from fission: " else
<< max_sub_devices(i) << std::endl; out << "No\n";
out << " Device fission by affinity: ";
if (fission_by_affinity(i))
out << "Yes\n";
else
out << "No\n";
out << " Maximum subdevices from fission: "
<< max_sub_devices(i) << std::endl;
}
} }
} }
// Select the platform that is associated with accelerators
// if pid < 0, select the first platform
void UCL_Device::set_platform_accelerator(int pid) {
if (pid < 0) {
int found = 0;
for (int n=0; n<_num_platforms; n++) {
set_platform(n);
for (int i=0; i<num_devices(); i++) {
if (_properties[i].device_type==CL_DEVICE_TYPE_CPU ||
_properties[i].device_type==CL_DEVICE_TYPE_GPU ||
_properties[i].device_type==CL_DEVICE_TYPE_ACCELERATOR) {
found = 1;
break;
}
}
if (found) break;
}
} else {
set_platform(pid);
}
} }
} // namespace ucl_opencl
#endif #endif