From d405f2ec4b5faa4db404132be08e09df4a0b0422 Mon Sep 17 00:00:00 2001 From: Richard Berger Date: Thu, 22 Apr 2021 09:16:37 -0400 Subject: [PATCH 1/3] Update defines to use old neighbor code for CUDA >= 11.2 --- lib/gpu/lal_neighbor.h | 4 ++-- lib/gpu/lal_neighbor_gpu.cu | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/gpu/lal_neighbor.h b/lib/gpu/lal_neighbor.h index 5939567a41..fb854a706c 100644 --- a/lib/gpu/lal_neighbor.h +++ b/lib/gpu/lal_neighbor.h @@ -26,8 +26,8 @@ #if !defined(USE_OPENCL) && !defined(USE_HIP) #ifndef LAL_USE_OLD_NEIGHBOR -// Issue with incorrect results with CUDA 11.2 -#if (CUDA_VERSION > 11019) && (CUDA_VERSION < 11030) +// Issue with incorrect results with CUDA >= 11.2 +#if (CUDA_VERSION > 11019) #define LAL_USE_OLD_NEIGHBOR #endif #endif diff --git a/lib/gpu/lal_neighbor_gpu.cu b/lib/gpu/lal_neighbor_gpu.cu index 2aca505396..b6db97f68a 100644 --- a/lib/gpu/lal_neighbor_gpu.cu +++ b/lib/gpu/lal_neighbor_gpu.cu @@ -34,8 +34,8 @@ _texture_2d( pos_tex,int4); #endif #ifdef NV_KERNEL -#if (__CUDACC_VER_MAJOR__ == 11) && (__CUDACC_VER_MINOR__ == 2) -// Issue with incorrect results in CUDA 11.2 +#if (__CUDACC_VER_MAJOR__ == 11) && (__CUDACC_VER_MINOR__ >= 2) +// Issue with incorrect results in CUDA >= 11.2 #define LAL_USE_OLD_NEIGHBOR #endif #endif From 0f1f49afa786c9694bbcdaca5206c386c7e5d53d Mon Sep 17 00:00:00 2001 From: Richard Berger Date: Thu, 22 Apr 2021 12:52:30 -0400 Subject: [PATCH 2/3] Add more output to ocl_get_devices --- lib/gpu/geryon/ocl_device.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/lib/gpu/geryon/ocl_device.h b/lib/gpu/geryon/ocl_device.h index 435ee24dd3..4e9e42a3cf 100644 --- a/lib/gpu/geryon/ocl_device.h +++ b/lib/gpu/geryon/ocl_device.h @@ -728,6 +728,9 @@ void UCL_Device::print_all(std::ostream &out) { out << "\nDevice " << i << ": \"" << name(i).c_str() << "\"\n"; out << " Type of device: " << device_type_name(i).c_str() << std::endl; + out << " Supported OpenCL Version: " + << _properties[i].cl_device_version / 100 << "." + << _properties[i].cl_device_version % 100 << std::endl; out << " Is a subdevice: "; if (is_subdevice(i)) out << "Yes\n"; @@ -796,6 +799,16 @@ void UCL_Device::print_all(std::ostream &out) { out << "Yes\n"; else out << "No\n"; + out << " Subgroup support: "; + if (_properties[i].has_subgroup_support) + out << "Yes\n"; + else + out << "No\n"; + out << " Shuffle support: "; + if (_properties[i].has_shuffle_support) + out << "Yes\n"; + else + out << "No\n"; } } } From 0632922a9bf93d71019017b4fde58cdd519e0891 Mon Sep 17 00:00:00 2001 From: Richard Berger Date: Thu, 22 Apr 2021 12:54:13 -0400 Subject: [PATCH 3/3] Explicitly check for subgroup support instead of CL version --- lib/gpu/lal_base_atomic.cpp | 2 +- lib/gpu/lal_base_charge.cpp | 2 +- lib/gpu/lal_base_dipole.cpp | 2 +- lib/gpu/lal_base_dpd.cpp | 2 +- lib/gpu/lal_base_ellipsoid.cpp | 2 +- lib/gpu/lal_base_three.cpp | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/gpu/lal_base_atomic.cpp b/lib/gpu/lal_base_atomic.cpp index d35919105d..6aad138aa1 100644 --- a/lib/gpu/lal_base_atomic.cpp +++ b/lib/gpu/lal_base_atomic.cpp @@ -335,7 +335,7 @@ void BaseAtomicT::compile_kernels(UCL_Device &dev, const void *pair_str, _compiled=true; #if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0)) - if (dev.cl_device_version() >= 210) { + if (dev.has_subgroup_support()) { size_t mx_subgroup_sz = k_pair_fast.max_subgroup_size(_block_size); #if defined(LAL_OCL_EV_JIT) mx_subgroup_sz = std::min(mx_subgroup_sz, k_pair_noev.max_subgroup_size(_block_size)); diff --git a/lib/gpu/lal_base_charge.cpp b/lib/gpu/lal_base_charge.cpp index b0d08e4df7..9045420425 100644 --- a/lib/gpu/lal_base_charge.cpp +++ b/lib/gpu/lal_base_charge.cpp @@ -348,7 +348,7 @@ void BaseChargeT::compile_kernels(UCL_Device &dev, const void *pair_str, _compiled=true; #if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0)) - if (dev.cl_device_version() >= 210) { + if (dev.has_subgroup_support()) { size_t mx_subgroup_sz = k_pair_fast.max_subgroup_size(_block_size); #if defined(LAL_OCL_EV_JIT) mx_subgroup_sz = std::min(mx_subgroup_sz, k_pair_noev.max_subgroup_size(_block_size)); diff --git a/lib/gpu/lal_base_dipole.cpp b/lib/gpu/lal_base_dipole.cpp index 9781065b13..439637cbde 100644 --- a/lib/gpu/lal_base_dipole.cpp +++ b/lib/gpu/lal_base_dipole.cpp @@ -356,7 +356,7 @@ void BaseDipoleT::compile_kernels(UCL_Device &dev, const void *pair_str, _compiled=true; #if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0)) - if (dev.cl_device_version() >= 210) { + if (dev.has_subgroup_support()) { size_t mx_subgroup_sz = k_pair_fast.max_subgroup_size(_block_size); #if defined(LAL_OCL_EV_JIT) mx_subgroup_sz = std::min(mx_subgroup_sz, k_pair_noev.max_subgroup_size(_block_size)); diff --git a/lib/gpu/lal_base_dpd.cpp b/lib/gpu/lal_base_dpd.cpp index 4b6a964bfb..d3c3353415 100644 --- a/lib/gpu/lal_base_dpd.cpp +++ b/lib/gpu/lal_base_dpd.cpp @@ -356,7 +356,7 @@ void BaseDPDT::compile_kernels(UCL_Device &dev, const void *pair_str, _compiled=true; #if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0)) - if (dev.cl_device_version() >= 210) { + if (dev.has_subgroup_support()) { size_t mx_subgroup_sz = k_pair_fast.max_subgroup_size(_block_size); #if defined(LAL_OCL_EV_JIT) mx_subgroup_sz = std::min(mx_subgroup_sz, k_pair_noev.max_subgroup_size(_block_size)); diff --git a/lib/gpu/lal_base_ellipsoid.cpp b/lib/gpu/lal_base_ellipsoid.cpp index 98411a8033..2e22b2f602 100644 --- a/lib/gpu/lal_base_ellipsoid.cpp +++ b/lib/gpu/lal_base_ellipsoid.cpp @@ -554,7 +554,7 @@ void BaseEllipsoidT::compile_kernels(UCL_Device &dev, _compiled=true; #if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0)) - if (dev.cl_device_version() >= 210) { + if (dev.has_subgroup_support()) { size_t mx_subgroup_sz = k_lj_fast.max_subgroup_size(_block_size); mx_subgroup_sz = std::min(mx_subgroup_sz, k_ellipsoid.max_subgroup_size(_block_size)); mx_subgroup_sz = std::min(mx_subgroup_sz, k_sphere_ellipsoid.max_subgroup_size(_block_size)); diff --git a/lib/gpu/lal_base_three.cpp b/lib/gpu/lal_base_three.cpp index 660385eb56..15ef20230d 100644 --- a/lib/gpu/lal_base_three.cpp +++ b/lib/gpu/lal_base_three.cpp @@ -461,7 +461,7 @@ void BaseThreeT::compile_kernels(UCL_Device &dev, const void *pair_str, _compiled=true; #if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0)) - if (dev.cl_device_version() >= 210) { + if (dev.has_subgroup_support()) { size_t mx_subgroup_sz = k_pair.max_subgroup_size(_block_size); mx_subgroup_sz = std::min(mx_subgroup_sz, k_three_center.max_subgroup_size(_block_size)); mx_subgroup_sz = std::min(mx_subgroup_sz, k_three_end.max_subgroup_size(_block_size));