Added timing for the induced dipole spreading part, computed the block size to ensure all the CUs are occupied by the fphi_uind and fphi_mpole kernels

This commit is contained in:
Trung Nguyen
2022-10-06 15:03:58 -05:00
parent 009ed36301
commit 6b9e83fe20
10 changed files with 106 additions and 87 deletions

View File

@ -214,6 +214,7 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int ngpu,
}
}
_first_device = _last_device = best_device;
_max_cus = best_cus;
type = gpu->device_type(_first_device);
if (ndevices > 0) {