Merge pull request #1008 from akohlmey/support-platform-select
OpenCL related fixes and improvements to the GPU package
This commit is contained in:
@ -165,8 +165,8 @@ class UCL_Device {
|
||||
/// Get the current OpenCL device name
|
||||
inline std::string name() { return name(_device); }
|
||||
/// Get the OpenCL device name
|
||||
inline std::string name(const int i)
|
||||
{ return std::string(_properties[i].name); }
|
||||
inline std::string name(const int i) {
|
||||
return std::string(_properties[i].name); }
|
||||
|
||||
/// Get a string telling the type of the current device
|
||||
inline std::string device_type_name() { return device_type_name(_device); }
|
||||
@ -281,7 +281,7 @@ class UCL_Device {
|
||||
inline cl_device_id & cl_device() { return _cl_device; }
|
||||
|
||||
/// Select the platform that has accelerators
|
||||
inline void set_platform_accelerator(int pid=-1);
|
||||
inline int set_platform_accelerator(int pid=-1);
|
||||
|
||||
private:
|
||||
int _num_platforms; // Number of platforms
|
||||
@ -324,6 +324,7 @@ UCL_Device::~UCL_Device() {
|
||||
|
||||
void UCL_Device::clear() {
|
||||
_properties.clear();
|
||||
_cl_devices.clear();
|
||||
if (_device>-1) {
|
||||
for (size_t i=0; i<_cq.size(); i++) {
|
||||
CL_DESTRUCT_CALL(clReleaseCommandQueue(_cq.back()));
|
||||
@ -520,8 +521,6 @@ int UCL_Device::device_type(const int i) {
|
||||
|
||||
// Set the CUDA device to the specified device number
|
||||
int UCL_Device::set(int num) {
|
||||
clear();
|
||||
|
||||
cl_device_id *device_list = new cl_device_id[_num_devices];
|
||||
cl_uint n;
|
||||
CL_SAFE_CALL(clGetDeviceIDs(_cl_platform,CL_DEVICE_TYPE_ALL,_num_devices,
|
||||
@ -612,7 +611,7 @@ void UCL_Device::print_all(std::ostream &out) {
|
||||
|
||||
// Select the platform that is associated with accelerators
|
||||
// if pid < 0, select the first platform
|
||||
void UCL_Device::set_platform_accelerator(int pid) {
|
||||
int UCL_Device::set_platform_accelerator(int pid) {
|
||||
if (pid < 0) {
|
||||
int found = 0;
|
||||
for (int n=0; n<_num_platforms; n++) {
|
||||
@ -625,10 +624,11 @@ void UCL_Device::set_platform_accelerator(int pid) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (found) break;
|
||||
if (found) return UCL_SUCCESS;
|
||||
}
|
||||
return UCL_ERROR;
|
||||
} else {
|
||||
set_platform(pid);
|
||||
return set_platform(pid);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -34,8 +34,8 @@ using namespace LAMMPS_AL;
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
DeviceT::Device() : _init_count(0), _device_init(false),
|
||||
_gpu_mode(GPU_FORCE), _first_device(0),
|
||||
_last_device(0), _compiled(false) {
|
||||
_gpu_mode(GPU_FORCE), _first_device(0),
|
||||
_last_device(0), _platform_id(-1), _compiled(false) {
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
@ -67,6 +67,17 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int first_gpu,
|
||||
_particle_split=p_split;
|
||||
_cell_size=cell_size;
|
||||
_block_pair=block_pair;
|
||||
// support selecting platform though "package device" keyword.
|
||||
// "0:generic" will select platform 0 and tune for generic device
|
||||
// "1:fermi" will select platform 1 and tune for Nvidia Fermi gpu
|
||||
if (ocl_vendor) {
|
||||
char *sep = NULL;
|
||||
if ((sep = strstr(ocl_vendor,":"))) {
|
||||
*sep = '\0';
|
||||
_platform_id = atoi(ocl_vendor);
|
||||
ocl_vendor = sep+1;
|
||||
}
|
||||
}
|
||||
|
||||
// Get the rank/size within the world
|
||||
MPI_Comm_rank(_comm_world,&_world_me);
|
||||
@ -135,6 +146,9 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int first_gpu,
|
||||
return -7;
|
||||
#endif
|
||||
|
||||
if (gpu->set_platform_accelerator(_platform_id)!=UCL_SUCCESS)
|
||||
return -12;
|
||||
|
||||
if (gpu->set(my_gpu)!=UCL_SUCCESS)
|
||||
return -6;
|
||||
|
||||
@ -191,13 +205,15 @@ int DeviceT::set_ocl_params(char *ocl_vendor) {
|
||||
_ocl_vendor_string="-DUSE_OPENCL";
|
||||
int token_count=0;
|
||||
std::string params[13];
|
||||
char *pch = strtok(ocl_vendor,"\" ");
|
||||
char *pch = strtok(ocl_vendor,",");
|
||||
pch = strtok(NULL,",");
|
||||
if (pch == NULL) return -11;
|
||||
while (pch != NULL) {
|
||||
if (token_count==13)
|
||||
return -11;
|
||||
params[token_count]=pch;
|
||||
token_count++;
|
||||
pch = strtok(NULL,"\" ");
|
||||
pch = strtok(NULL,",");
|
||||
}
|
||||
_ocl_vendor_string+=" -DMEM_THREADS="+params[0]+
|
||||
" -DTHREADS_PER_ATOM="+params[1]+
|
||||
@ -656,7 +672,7 @@ int DeviceT::compile_kernels() {
|
||||
dev_program=new UCL_Program(*gpu);
|
||||
int success=dev_program->load_string(device,compile_string().c_str());
|
||||
if (success!=UCL_SUCCESS)
|
||||
return -4;
|
||||
return -6;
|
||||
k_zero.set_function(*dev_program,"kernel_zero");
|
||||
k_info.set_function(*dev_program,"kernel_info");
|
||||
_compiled=true;
|
||||
|
||||
@ -292,7 +292,7 @@ class Device {
|
||||
MPI_Comm _comm_world, _comm_replica, _comm_gpu;
|
||||
int _procs_per_gpu, _gpu_rank, _world_me, _world_size, _replica_me,
|
||||
_replica_size;
|
||||
int _gpu_mode, _first_device, _last_device, _nthreads;
|
||||
int _gpu_mode, _first_device, _last_device, _platform_id, _nthreads;
|
||||
double _particle_split;
|
||||
double _cpu_full;
|
||||
double _ptx_arch;
|
||||
|
||||
Reference in New Issue
Block a user