From b16bb27184cb873ff9da8136d99f9043b6c19e91 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Tue, 10 Sep 2024 09:47:07 -0500 Subject: [PATCH] revert to using the max number of blocks on device for each pass, as the number of blocks (32767 or 65535) already saturates the number of SMs anyway --- lib/gpu/lal_neighbor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/gpu/lal_neighbor.cpp b/lib/gpu/lal_neighbor.cpp index 101e92953c..051f55f0a3 100644 --- a/lib/gpu/lal_neighbor.cpp +++ b/lib/gpu/lal_neighbor.cpp @@ -587,8 +587,8 @@ void Neighbor::build_nbor_list(double **x, const int inum, const int host_inum, const int g2x=static_cast(ceil(static_cast(_maxspecial)/b2x)); const int g2y=static_cast(ceil(static_cast(nt)/b2y)); // the maximum number of blocks on the device is typically 65535 - // we can use a lower number to have more resource per block - const int max_num_blocks = 32768; + // in principle we can use a lower number to have more resource per block 32768 + const int max_num_blocks = 65535; int shift = 0; if (g2y < max_num_blocks) { _shared->k_transpose.set_size(g2x,g2y,b2x,b2y);