Added timing for the induced dipole spreading part, computed the block size to ensure all the CUs are occupied by the fphi_uind and fphi_mpole kernels
This commit is contained in:
@ -214,6 +214,7 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int ngpu,
|
||||
}
|
||||
}
|
||||
_first_device = _last_device = best_device;
|
||||
_max_cus = best_cus;
|
||||
type = gpu->device_type(_first_device);
|
||||
|
||||
if (ndevices > 0) {
|
||||
|
||||
Reference in New Issue
Block a user