Merge pull request #2720 from rbberger/gpu_updates
Update GPU package to allow using CUDA 11.3 drivers
This commit is contained in:
@ -728,6 +728,9 @@ void UCL_Device::print_all(std::ostream &out) {
|
||||
out << "\nDevice " << i << ": \"" << name(i).c_str() << "\"\n";
|
||||
out << " Type of device: "
|
||||
<< device_type_name(i).c_str() << std::endl;
|
||||
out << " Supported OpenCL Version: "
|
||||
<< _properties[i].cl_device_version / 100 << "."
|
||||
<< _properties[i].cl_device_version % 100 << std::endl;
|
||||
out << " Is a subdevice: ";
|
||||
if (is_subdevice(i))
|
||||
out << "Yes\n";
|
||||
@ -796,6 +799,16 @@ void UCL_Device::print_all(std::ostream &out) {
|
||||
out << "Yes\n";
|
||||
else
|
||||
out << "No\n";
|
||||
out << " Subgroup support: ";
|
||||
if (_properties[i].has_subgroup_support)
|
||||
out << "Yes\n";
|
||||
else
|
||||
out << "No\n";
|
||||
out << " Shuffle support: ";
|
||||
if (_properties[i].has_shuffle_support)
|
||||
out << "Yes\n";
|
||||
else
|
||||
out << "No\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -335,7 +335,7 @@ void BaseAtomicT::compile_kernels(UCL_Device &dev, const void *pair_str,
|
||||
_compiled=true;
|
||||
|
||||
#if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0))
|
||||
if (dev.cl_device_version() >= 210) {
|
||||
if (dev.has_subgroup_support()) {
|
||||
size_t mx_subgroup_sz = k_pair_fast.max_subgroup_size(_block_size);
|
||||
#if defined(LAL_OCL_EV_JIT)
|
||||
mx_subgroup_sz = std::min(mx_subgroup_sz, k_pair_noev.max_subgroup_size(_block_size));
|
||||
|
||||
@ -348,7 +348,7 @@ void BaseChargeT::compile_kernels(UCL_Device &dev, const void *pair_str,
|
||||
_compiled=true;
|
||||
|
||||
#if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0))
|
||||
if (dev.cl_device_version() >= 210) {
|
||||
if (dev.has_subgroup_support()) {
|
||||
size_t mx_subgroup_sz = k_pair_fast.max_subgroup_size(_block_size);
|
||||
#if defined(LAL_OCL_EV_JIT)
|
||||
mx_subgroup_sz = std::min(mx_subgroup_sz, k_pair_noev.max_subgroup_size(_block_size));
|
||||
|
||||
@ -356,7 +356,7 @@ void BaseDipoleT::compile_kernels(UCL_Device &dev, const void *pair_str,
|
||||
_compiled=true;
|
||||
|
||||
#if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0))
|
||||
if (dev.cl_device_version() >= 210) {
|
||||
if (dev.has_subgroup_support()) {
|
||||
size_t mx_subgroup_sz = k_pair_fast.max_subgroup_size(_block_size);
|
||||
#if defined(LAL_OCL_EV_JIT)
|
||||
mx_subgroup_sz = std::min(mx_subgroup_sz, k_pair_noev.max_subgroup_size(_block_size));
|
||||
|
||||
@ -356,7 +356,7 @@ void BaseDPDT::compile_kernels(UCL_Device &dev, const void *pair_str,
|
||||
_compiled=true;
|
||||
|
||||
#if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0))
|
||||
if (dev.cl_device_version() >= 210) {
|
||||
if (dev.has_subgroup_support()) {
|
||||
size_t mx_subgroup_sz = k_pair_fast.max_subgroup_size(_block_size);
|
||||
#if defined(LAL_OCL_EV_JIT)
|
||||
mx_subgroup_sz = std::min(mx_subgroup_sz, k_pair_noev.max_subgroup_size(_block_size));
|
||||
|
||||
@ -554,7 +554,7 @@ void BaseEllipsoidT::compile_kernels(UCL_Device &dev,
|
||||
_compiled=true;
|
||||
|
||||
#if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0))
|
||||
if (dev.cl_device_version() >= 210) {
|
||||
if (dev.has_subgroup_support()) {
|
||||
size_t mx_subgroup_sz = k_lj_fast.max_subgroup_size(_block_size);
|
||||
mx_subgroup_sz = std::min(mx_subgroup_sz, k_ellipsoid.max_subgroup_size(_block_size));
|
||||
mx_subgroup_sz = std::min(mx_subgroup_sz, k_sphere_ellipsoid.max_subgroup_size(_block_size));
|
||||
|
||||
@ -461,7 +461,7 @@ void BaseThreeT::compile_kernels(UCL_Device &dev, const void *pair_str,
|
||||
_compiled=true;
|
||||
|
||||
#if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0))
|
||||
if (dev.cl_device_version() >= 210) {
|
||||
if (dev.has_subgroup_support()) {
|
||||
size_t mx_subgroup_sz = k_pair.max_subgroup_size(_block_size);
|
||||
mx_subgroup_sz = std::min(mx_subgroup_sz, k_three_center.max_subgroup_size(_block_size));
|
||||
mx_subgroup_sz = std::min(mx_subgroup_sz, k_three_end.max_subgroup_size(_block_size));
|
||||
|
||||
@ -26,8 +26,8 @@
|
||||
|
||||
#if !defined(USE_OPENCL) && !defined(USE_HIP)
|
||||
#ifndef LAL_USE_OLD_NEIGHBOR
|
||||
// Issue with incorrect results with CUDA 11.2
|
||||
#if (CUDA_VERSION > 11019) && (CUDA_VERSION < 11030)
|
||||
// Issue with incorrect results with CUDA >= 11.2
|
||||
#if (CUDA_VERSION > 11019)
|
||||
#define LAL_USE_OLD_NEIGHBOR
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -34,8 +34,8 @@ _texture_2d( pos_tex,int4);
|
||||
#endif
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#if (__CUDACC_VER_MAJOR__ == 11) && (__CUDACC_VER_MINOR__ == 2)
|
||||
// Issue with incorrect results in CUDA 11.2
|
||||
#if (__CUDACC_VER_MAJOR__ == 11) && (__CUDACC_VER_MINOR__ >= 2)
|
||||
// Issue with incorrect results in CUDA >= 11.2
|
||||
#define LAL_USE_OLD_NEIGHBOR
|
||||
#endif
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user