revert to using the max number of blocks on device for each pass, as the number of blocks (32767 or 65535) already saturates the number of SMs anyway

This commit is contained in:
Trung Nguyen
2024-09-10 09:47:07 -05:00
parent c63c88f8b6
commit b16bb27184

View File

@ -587,8 +587,8 @@ void Neighbor::build_nbor_list(double **x, const int inum, const int host_inum,
const int g2x=static_cast<int>(ceil(static_cast<double>(_maxspecial)/b2x));
const int g2y=static_cast<int>(ceil(static_cast<double>(nt)/b2y));
// the maximum number of blocks on the device is typically 65535
// we can use a lower number to have more resource per block
const int max_num_blocks = 32768;
// in principle we can use a lower number to have more resource per block 32768
const int max_num_blocks = 65535;
int shift = 0;
if (g2y < max_num_blocks) {
_shared->k_transpose.set_size(g2x,g2y,b2x,b2y);