git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@15248 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -17,7 +17,7 @@
|
||||
/* -----------------------------------------------------------------------
|
||||
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
certain rights in this software. This software is distributed under
|
||||
the Simplified BSD License.
|
||||
----------------------------------------------------------------------- */
|
||||
|
||||
@ -35,7 +35,7 @@ namespace ucl_cudadr {
|
||||
// --------------------------------------------------------------------------
|
||||
// - COMMAND QUEUE STUFF
|
||||
// --------------------------------------------------------------------------
|
||||
typedef CUstream command_queue;
|
||||
typedef CUstream command_queue;
|
||||
|
||||
inline void ucl_sync(CUstream &stream) {
|
||||
CU_SAFE_CALL(cuStreamSynchronize(stream));
|
||||
@ -59,21 +59,21 @@ struct NVDProperties {
|
||||
|
||||
/// Class for looking at device properties
|
||||
/** \note Calls to change the device outside of the class results in incorrect
|
||||
* behavior
|
||||
* behavior
|
||||
* \note There is no error checking for indexing past the number of devices **/
|
||||
class UCL_Device {
|
||||
public:
|
||||
/// Collect properties for every GPU on the node
|
||||
/** \note You must set the active GPU with set() before using the device **/
|
||||
inline UCL_Device();
|
||||
|
||||
|
||||
inline ~UCL_Device();
|
||||
|
||||
/// Returns 1 (For compatibility with OpenCL)
|
||||
inline int num_platforms() { return 1; }
|
||||
|
||||
/// Return a string with name and info of the current platform
|
||||
inline std::string platform_name()
|
||||
inline std::string platform_name()
|
||||
{ return "NVIDIA Corporation NVIDIA CUDA Driver"; }
|
||||
|
||||
/// Delete any contexts/data and set the platform number to be used
|
||||
@ -97,24 +97,24 @@ class UCL_Device {
|
||||
|
||||
/// Returns the default stream for the current device
|
||||
inline command_queue & cq() { return cq(0); }
|
||||
|
||||
|
||||
/// Returns the stream indexed by i
|
||||
inline command_queue & cq(const int i) { return _cq[i]; }
|
||||
|
||||
|
||||
/// Block until all commands in the default stream have completed
|
||||
inline void sync() { sync(0); }
|
||||
|
||||
|
||||
/// Block until all commands in the specified stream have completed
|
||||
inline void sync(const int i) { ucl_sync(cq(i)); }
|
||||
|
||||
|
||||
/// Get the number of command queues currently available on device
|
||||
inline int num_queues()
|
||||
inline int num_queues()
|
||||
{ return _cq.size(); }
|
||||
|
||||
|
||||
/// Add a stream for device computations
|
||||
inline void push_command_queue() {
|
||||
_cq.push_back(CUstream());
|
||||
CU_SAFE_CALL(cuStreamCreate(&_cq.back(),0));
|
||||
_cq.push_back(CUstream());
|
||||
CU_SAFE_CALL(cuStreamCreate(&_cq.back(),0));
|
||||
}
|
||||
|
||||
/// Remove a stream for device computations
|
||||
@ -124,19 +124,19 @@ class UCL_Device {
|
||||
CU_SAFE_CALL_NS(cuStreamDestroy(_cq.back()));
|
||||
_cq.pop_back();
|
||||
}
|
||||
|
||||
|
||||
/// Set the default command queue (by default this is the null stream)
|
||||
/** \param i index of the command queue (as added by push_command_queue())
|
||||
/** \param i index of the command queue (as added by push_command_queue())
|
||||
If i is 0, the default command queue is set to the null stream **/
|
||||
inline void set_command_queue(const int i) {
|
||||
if (i==0) _cq[0]=0;
|
||||
else _cq[0]=_cq[i];
|
||||
}
|
||||
|
||||
|
||||
/// Get the current CUDA device name
|
||||
inline std::string name() { return name(_device); }
|
||||
/// Get the CUDA device name
|
||||
inline std::string name(const int i)
|
||||
inline std::string name(const int i)
|
||||
{ return std::string(_properties[i].name); }
|
||||
|
||||
/// Get a string telling the type of the current device
|
||||
@ -148,38 +148,38 @@ class UCL_Device {
|
||||
inline int device_type() { return device_type(_device); }
|
||||
/// Get device type (UCL_CPU, UCL_GPU, UCL_ACCELERATOR, UCL_DEFAULT)
|
||||
inline int device_type(const int i) { return UCL_GPU; }
|
||||
|
||||
|
||||
/// Returns true if host memory is efficiently addressable from device
|
||||
inline bool shared_memory() { return shared_memory(_device); }
|
||||
/// Returns true if host memory is efficiently addressable from device
|
||||
inline bool shared_memory(const int i) { return device_type(i)==UCL_CPU; }
|
||||
|
||||
|
||||
/// Returns true if double precision is support for the current device
|
||||
inline bool double_precision() { return double_precision(_device); }
|
||||
/// Returns true if double precision is support for the device
|
||||
inline bool double_precision(const int i) {return arch(i)>=1.3;}
|
||||
|
||||
|
||||
/// Get the number of compute units on the current device
|
||||
inline unsigned cus() { return cus(_device); }
|
||||
/// Get the number of compute units
|
||||
inline unsigned cus(const int i)
|
||||
inline unsigned cus(const int i)
|
||||
{ return _properties[i].multiProcessorCount; }
|
||||
|
||||
/// Get the number of cores in the current device
|
||||
inline unsigned cores() { return cores(_device); }
|
||||
/// Get the number of cores
|
||||
inline unsigned cores(const int i)
|
||||
{ if (arch(i)<2.0) return _properties[i].multiProcessorCount*8;
|
||||
inline unsigned cores(const int i)
|
||||
{ if (arch(i)<2.0) return _properties[i].multiProcessorCount*8;
|
||||
else if (arch(i)<2.1) return _properties[i].multiProcessorCount*32;
|
||||
else if (arch(i)<3.0) return _properties[i].multiProcessorCount*48;
|
||||
else return _properties[i].multiProcessorCount*192; }
|
||||
|
||||
|
||||
/// Get the gigabytes of global memory in the current device
|
||||
inline double gigabytes() { return gigabytes(_device); }
|
||||
/// Get the gigabytes of global memory
|
||||
inline double gigabytes(const int i)
|
||||
inline double gigabytes(const int i)
|
||||
{ return static_cast<double>(_properties[i].totalGlobalMem)/1073741824; }
|
||||
|
||||
|
||||
/// Get the bytes of global memory in the current device
|
||||
inline size_t bytes() { return bytes(_device); }
|
||||
/// Get the bytes of global memory
|
||||
@ -188,13 +188,13 @@ class UCL_Device {
|
||||
// Get the gigabytes of free memory in the current device
|
||||
inline double free_gigabytes() { return free_gigabytes(_device); }
|
||||
// Get the gigabytes of free memory
|
||||
inline double free_gigabytes(const int i)
|
||||
inline double free_gigabytes(const int i)
|
||||
{ return static_cast<double>(free_bytes(i))/1073741824; }
|
||||
|
||||
|
||||
// Get the bytes of free memory in the current device
|
||||
inline size_t free_bytes() { return free_bytes(_device); }
|
||||
// Get the bytes of free memory
|
||||
inline size_t free_bytes(const int i) {
|
||||
inline size_t free_bytes(const int i) {
|
||||
CUDA_INT_TYPE dfree, dtotal;
|
||||
CU_SAFE_CALL_NS(cuMemGetInfo(&dfree, &dtotal));
|
||||
return static_cast<size_t>(dfree);
|
||||
@ -203,21 +203,21 @@ class UCL_Device {
|
||||
/// Return the GPGPU compute capability for current device
|
||||
inline double arch() { return arch(_device); }
|
||||
/// Return the GPGPU compute capability
|
||||
inline double arch(const int i)
|
||||
inline double arch(const int i)
|
||||
{ return static_cast<double>(_properties[i].minor)/10+_properties[i].major;}
|
||||
|
||||
|
||||
/// Clock rate in GHz for current device
|
||||
inline double clock_rate() { return clock_rate(_device); }
|
||||
/// Clock rate in GHz
|
||||
inline double clock_rate(const int i)
|
||||
inline double clock_rate(const int i)
|
||||
{ return _properties[i].p.clockRate*1e-6;}
|
||||
|
||||
|
||||
/// Get the maximum number of threads per block
|
||||
inline size_t group_size() { return group_size(_device); }
|
||||
/// Get the maximum number of threads per block
|
||||
inline size_t group_size(const int i)
|
||||
inline size_t group_size(const int i)
|
||||
{ return _properties[i].p.maxThreadsPerBlock; }
|
||||
|
||||
|
||||
/// Return the maximum memory pitch in bytes for current device
|
||||
inline size_t max_pitch() { return max_pitch(_device); }
|
||||
/// Return the maximum memory pitch in bytes
|
||||
@ -242,7 +242,7 @@ class UCL_Device {
|
||||
{ return fission_by_counts(_device); }
|
||||
/// True if splitting device into subdevices by specified counts supported
|
||||
inline bool fission_by_counts(const int i)
|
||||
{ return false; }
|
||||
{ return false; }
|
||||
/// True if splitting device into subdevices by affinity domains supported
|
||||
inline bool fission_by_affinity()
|
||||
{ return fission_by_affinity(_device); }
|
||||
@ -259,7 +259,7 @@ class UCL_Device {
|
||||
|
||||
/// List all devices along with all properties
|
||||
inline void print_all(std::ostream &out);
|
||||
|
||||
|
||||
private:
|
||||
int _device, _num_devices;
|
||||
std::vector<NVDProperties> _properties;
|
||||
@ -279,16 +279,16 @@ UCL_Device::UCL_Device() {
|
||||
CU_SAFE_CALL_NS(cuDeviceComputeCapability(&major,&minor,m));
|
||||
if (major==9999)
|
||||
continue;
|
||||
|
||||
|
||||
_properties.push_back(NVDProperties());
|
||||
_properties.back().device_id=dev;
|
||||
_properties.back().major=major;
|
||||
_properties.back().minor=minor;
|
||||
|
||||
|
||||
char namecstr[1024];
|
||||
CU_SAFE_CALL_NS(cuDeviceGetName(namecstr,1024,m));
|
||||
_properties.back().name=namecstr;
|
||||
|
||||
|
||||
CU_SAFE_CALL_NS(cuDeviceTotalMem(&_properties.back().totalGlobalMem,m));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&_properties.back().multiProcessorCount,
|
||||
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
|
||||
@ -296,23 +296,23 @@ UCL_Device::UCL_Device() {
|
||||
CU_SAFE_CALL_NS(cuDeviceGetProperties(&_properties.back().p,m));
|
||||
#if CUDA_VERSION >= 2020
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(
|
||||
&_properties.back().kernelExecTimeoutEnabled,
|
||||
&_properties.back().kernelExecTimeoutEnabled,
|
||||
CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT,dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(
|
||||
&_properties.back().integrated,
|
||||
CU_DEVICE_ATTRIBUTE_INTEGRATED, dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(
|
||||
&_properties.back().canMapHostMemory,
|
||||
&_properties.back().canMapHostMemory,
|
||||
CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&_properties.back().computeMode,
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&_properties.back().computeMode,
|
||||
CU_DEVICE_ATTRIBUTE_COMPUTE_MODE,dev));
|
||||
#endif
|
||||
#if CUDA_VERSION >= 3010
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(
|
||||
&_properties.back().concurrentKernels,
|
||||
&_properties.back().concurrentKernels,
|
||||
CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(
|
||||
&_properties.back().ECCEnabled,
|
||||
&_properties.back().ECCEnabled,
|
||||
CU_DEVICE_ATTRIBUTE_ECC_ENABLED, dev));
|
||||
#endif
|
||||
}
|
||||
@ -365,7 +365,7 @@ void UCL_Device::print_all(std::ostream &out) {
|
||||
cuDriverGetVersion(&driver_version);
|
||||
out << "CUDA Driver Version: "
|
||||
<< driver_version/1000 << "." << driver_version%100
|
||||
<< std::endl;
|
||||
<< std::endl;
|
||||
#endif
|
||||
|
||||
if (num_devices() == 0)
|
||||
|
||||
Reference in New Issue
Block a user