diff --git a/lib/gpu/geryon/ocl_device.h b/lib/gpu/geryon/ocl_device.h index 435ee24dd3..4e9e42a3cf 100644 --- a/lib/gpu/geryon/ocl_device.h +++ b/lib/gpu/geryon/ocl_device.h @@ -728,6 +728,9 @@ void UCL_Device::print_all(std::ostream &out) { out << "\nDevice " << i << ": \"" << name(i).c_str() << "\"\n"; out << " Type of device: " << device_type_name(i).c_str() << std::endl; + out << " Supported OpenCL Version: " + << _properties[i].cl_device_version / 100 << "." + << _properties[i].cl_device_version % 100 << std::endl; out << " Is a subdevice: "; if (is_subdevice(i)) out << "Yes\n"; @@ -796,6 +799,16 @@ void UCL_Device::print_all(std::ostream &out) { out << "Yes\n"; else out << "No\n"; + out << " Subgroup support: "; + if (_properties[i].has_subgroup_support) + out << "Yes\n"; + else + out << "No\n"; + out << " Shuffle support: "; + if (_properties[i].has_shuffle_support) + out << "Yes\n"; + else + out << "No\n"; } } } diff --git a/lib/gpu/lal_base_atomic.cpp b/lib/gpu/lal_base_atomic.cpp index d35919105d..6aad138aa1 100644 --- a/lib/gpu/lal_base_atomic.cpp +++ b/lib/gpu/lal_base_atomic.cpp @@ -335,7 +335,7 @@ void BaseAtomicT::compile_kernels(UCL_Device &dev, const void *pair_str, _compiled=true; #if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0)) - if (dev.cl_device_version() >= 210) { + if (dev.has_subgroup_support()) { size_t mx_subgroup_sz = k_pair_fast.max_subgroup_size(_block_size); #if defined(LAL_OCL_EV_JIT) mx_subgroup_sz = std::min(mx_subgroup_sz, k_pair_noev.max_subgroup_size(_block_size)); diff --git a/lib/gpu/lal_base_charge.cpp b/lib/gpu/lal_base_charge.cpp index b0d08e4df7..9045420425 100644 --- a/lib/gpu/lal_base_charge.cpp +++ b/lib/gpu/lal_base_charge.cpp @@ -348,7 +348,7 @@ void BaseChargeT::compile_kernels(UCL_Device &dev, const void *pair_str, _compiled=true; #if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0)) - if (dev.cl_device_version() >= 210) { + if (dev.has_subgroup_support()) { size_t mx_subgroup_sz = k_pair_fast.max_subgroup_size(_block_size); #if defined(LAL_OCL_EV_JIT) mx_subgroup_sz = std::min(mx_subgroup_sz, k_pair_noev.max_subgroup_size(_block_size)); diff --git a/lib/gpu/lal_base_dipole.cpp b/lib/gpu/lal_base_dipole.cpp index 9781065b13..439637cbde 100644 --- a/lib/gpu/lal_base_dipole.cpp +++ b/lib/gpu/lal_base_dipole.cpp @@ -356,7 +356,7 @@ void BaseDipoleT::compile_kernels(UCL_Device &dev, const void *pair_str, _compiled=true; #if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0)) - if (dev.cl_device_version() >= 210) { + if (dev.has_subgroup_support()) { size_t mx_subgroup_sz = k_pair_fast.max_subgroup_size(_block_size); #if defined(LAL_OCL_EV_JIT) mx_subgroup_sz = std::min(mx_subgroup_sz, k_pair_noev.max_subgroup_size(_block_size)); diff --git a/lib/gpu/lal_base_dpd.cpp b/lib/gpu/lal_base_dpd.cpp index 4b6a964bfb..d3c3353415 100644 --- a/lib/gpu/lal_base_dpd.cpp +++ b/lib/gpu/lal_base_dpd.cpp @@ -356,7 +356,7 @@ void BaseDPDT::compile_kernels(UCL_Device &dev, const void *pair_str, _compiled=true; #if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0)) - if (dev.cl_device_version() >= 210) { + if (dev.has_subgroup_support()) { size_t mx_subgroup_sz = k_pair_fast.max_subgroup_size(_block_size); #if defined(LAL_OCL_EV_JIT) mx_subgroup_sz = std::min(mx_subgroup_sz, k_pair_noev.max_subgroup_size(_block_size)); diff --git a/lib/gpu/lal_base_ellipsoid.cpp b/lib/gpu/lal_base_ellipsoid.cpp index 98411a8033..2e22b2f602 100644 --- a/lib/gpu/lal_base_ellipsoid.cpp +++ b/lib/gpu/lal_base_ellipsoid.cpp @@ -554,7 +554,7 @@ void BaseEllipsoidT::compile_kernels(UCL_Device &dev, _compiled=true; #if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0)) - if (dev.cl_device_version() >= 210) { + if (dev.has_subgroup_support()) { size_t mx_subgroup_sz = k_lj_fast.max_subgroup_size(_block_size); mx_subgroup_sz = std::min(mx_subgroup_sz, k_ellipsoid.max_subgroup_size(_block_size)); mx_subgroup_sz = std::min(mx_subgroup_sz, k_sphere_ellipsoid.max_subgroup_size(_block_size)); diff --git a/lib/gpu/lal_base_three.cpp b/lib/gpu/lal_base_three.cpp index 660385eb56..15ef20230d 100644 --- a/lib/gpu/lal_base_three.cpp +++ b/lib/gpu/lal_base_three.cpp @@ -461,7 +461,7 @@ void BaseThreeT::compile_kernels(UCL_Device &dev, const void *pair_str, _compiled=true; #if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0)) - if (dev.cl_device_version() >= 210) { + if (dev.has_subgroup_support()) { size_t mx_subgroup_sz = k_pair.max_subgroup_size(_block_size); mx_subgroup_sz = std::min(mx_subgroup_sz, k_three_center.max_subgroup_size(_block_size)); mx_subgroup_sz = std::min(mx_subgroup_sz, k_three_end.max_subgroup_size(_block_size)); diff --git a/lib/gpu/lal_neighbor.h b/lib/gpu/lal_neighbor.h index 5939567a41..fb854a706c 100644 --- a/lib/gpu/lal_neighbor.h +++ b/lib/gpu/lal_neighbor.h @@ -26,8 +26,8 @@ #if !defined(USE_OPENCL) && !defined(USE_HIP) #ifndef LAL_USE_OLD_NEIGHBOR -// Issue with incorrect results with CUDA 11.2 -#if (CUDA_VERSION > 11019) && (CUDA_VERSION < 11030) +// Issue with incorrect results with CUDA >= 11.2 +#if (CUDA_VERSION > 11019) #define LAL_USE_OLD_NEIGHBOR #endif #endif diff --git a/lib/gpu/lal_neighbor_gpu.cu b/lib/gpu/lal_neighbor_gpu.cu index 2aca505396..b6db97f68a 100644 --- a/lib/gpu/lal_neighbor_gpu.cu +++ b/lib/gpu/lal_neighbor_gpu.cu @@ -34,8 +34,8 @@ _texture_2d( pos_tex,int4); #endif #ifdef NV_KERNEL -#if (__CUDACC_VER_MAJOR__ == 11) && (__CUDACC_VER_MINOR__ == 2) -// Issue with incorrect results in CUDA 11.2 +#if (__CUDACC_VER_MAJOR__ == 11) && (__CUDACC_VER_MINOR__ >= 2) +// Issue with incorrect results in CUDA >= 11.2 #define LAL_USE_OLD_NEIGHBOR #endif #endif