Use primary context in CUDA GPU code.
Since LAMMPS uses the low-level driver API of CUDA, it needs to ensure that it is in the correct context when invoking such functions. At the moment it creates and switches to its own context inside `UCL_Device::set` but then assumes that the driver is still in that context for subsequent calls into CUDA; if another part of the program uses a different context (such as the CUDA runtime using the "primary" context) this will cause failures inside LAMMPS. This patch changes the context creation to instead use the primary context for the requested device. While it's not perfect, in that it still doesn't ensure that it's in the correct context before making driver API calls, it at least allows it to work with libraries that use the runtime API.
This commit is contained in:
@ -316,6 +316,7 @@ class UCL_Device {
|
||||
std::vector<CUstream> _cq;
|
||||
CUdevice _cu_device;
|
||||
CUcontext _context;
|
||||
CUcontext _old_context;
|
||||
};
|
||||
|
||||
// Grabs the properties for all devices
|
||||
@ -391,8 +392,9 @@ int UCL_Device::set_platform(const int pid) {
|
||||
int UCL_Device::set(int num) {
|
||||
clear();
|
||||
_device=_properties[num].device_id;
|
||||
CU_SAFE_CALL_NS(cuCtxGetCurrent(&_old_context));
|
||||
CU_SAFE_CALL_NS(cuDeviceGet(&_cu_device,_device));
|
||||
CUresult err=cuCtxCreate(&_context,0,_cu_device);
|
||||
CUresult err=cuDevicePrimaryCtxRetain(&_context,_cu_device);
|
||||
if (err!=CUDA_SUCCESS) {
|
||||
#ifndef UCL_NO_EXIT
|
||||
std::cerr << "UCL Error: Could not access accelerator number " << num
|
||||
@ -401,13 +403,17 @@ int UCL_Device::set(int num) {
|
||||
#endif
|
||||
return UCL_ERROR;
|
||||
}
|
||||
if (_context != _old_context) {
|
||||
CU_SAFE_CALL_NS(cuCtxSetCurrent(_context));
|
||||
}
|
||||
return UCL_SUCCESS;
|
||||
}
|
||||
|
||||
void UCL_Device::clear() {
|
||||
if (_device>-1) {
|
||||
for (int i=1; i<num_queues(); i++) pop_command_queue();
|
||||
cuCtxDestroy(_context);
|
||||
CU_SAFE_CALL_NS(cuCtxSetCurrent(_old_context));
|
||||
CU_SAFE_CALL_NS(cuDevicePrimaryCtxRelease(_cu_device));
|
||||
}
|
||||
_device=-1;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user