Removed max_cus() from Device, used device->gpu->cus() instead

This commit is contained in:
Trung Nguyen
2022-10-07 15:50:30 -05:00
parent 6b9e83fe20
commit 00f46120c7
5 changed files with 9 additions and 12 deletions

View File

@ -278,10 +278,10 @@ int AmoebaT::polar_real(const int eflag, const int vflag) {
int nbor_pitch=this->nbor->nbor_pitch();
// Compute the block size and grid size to keep all cores busy
const int max_cus = this->device->max_cus();
const int cus = this->device->gpu->cus();
int BX=this->block_size();
int GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
while (GX < max_cus) {
while (GX < cus) {
BX /= 2;
GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
}

View File

@ -727,10 +727,10 @@ int BaseAmoebaT::fphi_uind() {
return 0;
// Compute the block size and grid size to keep all cores busy
const int max_cus = device->max_cus();
const int cus = device->gpu->cus();
int BX=block_size();
int GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
while (GX < max_cus) {
while (GX < cus) {
BX /= 2;
GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
}
@ -793,10 +793,10 @@ int BaseAmoebaT::fphi_mpole() {
int nbor_pitch=nbor->nbor_pitch();
// Compute the block size and grid size to keep all cores busy
const int max_cus = device->max_cus();
const int cus = device->gpu->cus();
int BX=block_size();
int GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
while (GX < max_cus) {
while (GX < cus) {
BX /= 2;
GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
}

View File

@ -214,7 +214,6 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int ngpu,
}
}
_first_device = _last_device = best_device;
_max_cus = best_cus;
type = gpu->device_type(_first_device);
if (ndevices > 0) {

View File

@ -241,8 +241,6 @@ class Device {
inline int shuffle_avail() const { return _shuffle_avail; }
/// For OpenCL, 0 if fast-math options disabled, 1 enabled
inline int fast_math() const { return _fast_math; }
/// return the max number of CUs among the devices
inline int max_cus() const { return _max_cus; }
/// Return the number of threads per atom for pair styles
inline int threads_per_atom() const { return _threads_per_atom; }
@ -326,7 +324,7 @@ class Device {
private:
std::queue<Answer<numtyp,acctyp> *> ans_queue;
int _init_count, _max_cus;
int _init_count;
bool _device_init, _host_timer_started, _time_device;
MPI_Comm _comm_world, _comm_replica, _comm_gpu;
int _procs_per_gpu, _gpu_rank, _world_me, _world_size, _replica_me,

View File

@ -619,10 +619,10 @@ int HippoT::polar_real(const int eflag, const int vflag) {
int nbor_pitch=this->nbor->nbor_pitch();
// Compute the block size and grid size to keep all cores busy
const int max_cus = this->device->max_cus();
const int cus = this->device->gpu->cus();
int BX=this->block_size();
int GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
while (GX < max_cus) {
while (GX < cus) {
BX /= 2;
GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
}