Merge pull request #2720 from rbberger/gpu_updates

Update GPU package to allow using CUDA 11.3 drivers
This commit is contained in:
Axel Kohlmeyer
2021-04-22 14:28:51 -04:00
committed by GitHub
9 changed files with 23 additions and 10 deletions

View File

@ -728,6 +728,9 @@ void UCL_Device::print_all(std::ostream &out) {
out << "\nDevice " << i << ": \"" << name(i).c_str() << "\"\n";
out << " Type of device: "
<< device_type_name(i).c_str() << std::endl;
out << " Supported OpenCL Version: "
<< _properties[i].cl_device_version / 100 << "."
<< _properties[i].cl_device_version % 100 << std::endl;
out << " Is a subdevice: ";
if (is_subdevice(i))
out << "Yes\n";
@ -796,6 +799,16 @@ void UCL_Device::print_all(std::ostream &out) {
out << "Yes\n";
else
out << "No\n";
out << " Subgroup support: ";
if (_properties[i].has_subgroup_support)
out << "Yes\n";
else
out << "No\n";
out << " Shuffle support: ";
if (_properties[i].has_shuffle_support)
out << "Yes\n";
else
out << "No\n";
}
}
}

View File

@ -335,7 +335,7 @@ void BaseAtomicT::compile_kernels(UCL_Device &dev, const void *pair_str,
_compiled=true;
#if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0))
if (dev.cl_device_version() >= 210) {
if (dev.has_subgroup_support()) {
size_t mx_subgroup_sz = k_pair_fast.max_subgroup_size(_block_size);
#if defined(LAL_OCL_EV_JIT)
mx_subgroup_sz = std::min(mx_subgroup_sz, k_pair_noev.max_subgroup_size(_block_size));

View File

@ -348,7 +348,7 @@ void BaseChargeT::compile_kernels(UCL_Device &dev, const void *pair_str,
_compiled=true;
#if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0))
if (dev.cl_device_version() >= 210) {
if (dev.has_subgroup_support()) {
size_t mx_subgroup_sz = k_pair_fast.max_subgroup_size(_block_size);
#if defined(LAL_OCL_EV_JIT)
mx_subgroup_sz = std::min(mx_subgroup_sz, k_pair_noev.max_subgroup_size(_block_size));

View File

@ -356,7 +356,7 @@ void BaseDipoleT::compile_kernels(UCL_Device &dev, const void *pair_str,
_compiled=true;
#if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0))
if (dev.cl_device_version() >= 210) {
if (dev.has_subgroup_support()) {
size_t mx_subgroup_sz = k_pair_fast.max_subgroup_size(_block_size);
#if defined(LAL_OCL_EV_JIT)
mx_subgroup_sz = std::min(mx_subgroup_sz, k_pair_noev.max_subgroup_size(_block_size));

View File

@ -356,7 +356,7 @@ void BaseDPDT::compile_kernels(UCL_Device &dev, const void *pair_str,
_compiled=true;
#if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0))
if (dev.cl_device_version() >= 210) {
if (dev.has_subgroup_support()) {
size_t mx_subgroup_sz = k_pair_fast.max_subgroup_size(_block_size);
#if defined(LAL_OCL_EV_JIT)
mx_subgroup_sz = std::min(mx_subgroup_sz, k_pair_noev.max_subgroup_size(_block_size));

View File

@ -554,7 +554,7 @@ void BaseEllipsoidT::compile_kernels(UCL_Device &dev,
_compiled=true;
#if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0))
if (dev.cl_device_version() >= 210) {
if (dev.has_subgroup_support()) {
size_t mx_subgroup_sz = k_lj_fast.max_subgroup_size(_block_size);
mx_subgroup_sz = std::min(mx_subgroup_sz, k_ellipsoid.max_subgroup_size(_block_size));
mx_subgroup_sz = std::min(mx_subgroup_sz, k_sphere_ellipsoid.max_subgroup_size(_block_size));

View File

@ -461,7 +461,7 @@ void BaseThreeT::compile_kernels(UCL_Device &dev, const void *pair_str,
_compiled=true;
#if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0))
if (dev.cl_device_version() >= 210) {
if (dev.has_subgroup_support()) {
size_t mx_subgroup_sz = k_pair.max_subgroup_size(_block_size);
mx_subgroup_sz = std::min(mx_subgroup_sz, k_three_center.max_subgroup_size(_block_size));
mx_subgroup_sz = std::min(mx_subgroup_sz, k_three_end.max_subgroup_size(_block_size));

View File

@ -26,8 +26,8 @@
#if !defined(USE_OPENCL) && !defined(USE_HIP)
#ifndef LAL_USE_OLD_NEIGHBOR
// Issue with incorrect results with CUDA 11.2
#if (CUDA_VERSION > 11019) && (CUDA_VERSION < 11030)
// Issue with incorrect results with CUDA >= 11.2
#if (CUDA_VERSION > 11019)
#define LAL_USE_OLD_NEIGHBOR
#endif
#endif

View File

@ -34,8 +34,8 @@ _texture_2d( pos_tex,int4);
#endif
#ifdef NV_KERNEL
#if (__CUDACC_VER_MAJOR__ == 11) && (__CUDACC_VER_MINOR__ == 2)
// Issue with incorrect results in CUDA 11.2
#if (__CUDACC_VER_MAJOR__ == 11) && (__CUDACC_VER_MINOR__ >= 2)
// Issue with incorrect results in CUDA >= 11.2
#define LAL_USE_OLD_NEIGHBOR
#endif
#endif