Removed max_cus() from Device, used device->gpu->cus() instead
This commit is contained in:
@ -278,10 +278,10 @@ int AmoebaT::polar_real(const int eflag, const int vflag) {
|
|||||||
int nbor_pitch=this->nbor->nbor_pitch();
|
int nbor_pitch=this->nbor->nbor_pitch();
|
||||||
|
|
||||||
// Compute the block size and grid size to keep all cores busy
|
// Compute the block size and grid size to keep all cores busy
|
||||||
const int max_cus = this->device->max_cus();
|
const int cus = this->device->gpu->cus();
|
||||||
int BX=this->block_size();
|
int BX=this->block_size();
|
||||||
int GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
|
int GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
|
||||||
while (GX < max_cus) {
|
while (GX < cus) {
|
||||||
BX /= 2;
|
BX /= 2;
|
||||||
GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
|
GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
|
||||||
}
|
}
|
||||||
|
|||||||
@ -727,10 +727,10 @@ int BaseAmoebaT::fphi_uind() {
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
// Compute the block size and grid size to keep all cores busy
|
// Compute the block size and grid size to keep all cores busy
|
||||||
const int max_cus = device->max_cus();
|
const int cus = device->gpu->cus();
|
||||||
int BX=block_size();
|
int BX=block_size();
|
||||||
int GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
|
int GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
|
||||||
while (GX < max_cus) {
|
while (GX < cus) {
|
||||||
BX /= 2;
|
BX /= 2;
|
||||||
GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
|
GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
|
||||||
}
|
}
|
||||||
@ -793,10 +793,10 @@ int BaseAmoebaT::fphi_mpole() {
|
|||||||
int nbor_pitch=nbor->nbor_pitch();
|
int nbor_pitch=nbor->nbor_pitch();
|
||||||
|
|
||||||
// Compute the block size and grid size to keep all cores busy
|
// Compute the block size and grid size to keep all cores busy
|
||||||
const int max_cus = device->max_cus();
|
const int cus = device->gpu->cus();
|
||||||
int BX=block_size();
|
int BX=block_size();
|
||||||
int GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
|
int GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
|
||||||
while (GX < max_cus) {
|
while (GX < cus) {
|
||||||
BX /= 2;
|
BX /= 2;
|
||||||
GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
|
GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
|
||||||
}
|
}
|
||||||
|
|||||||
@ -214,7 +214,6 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int ngpu,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
_first_device = _last_device = best_device;
|
_first_device = _last_device = best_device;
|
||||||
_max_cus = best_cus;
|
|
||||||
type = gpu->device_type(_first_device);
|
type = gpu->device_type(_first_device);
|
||||||
|
|
||||||
if (ndevices > 0) {
|
if (ndevices > 0) {
|
||||||
|
|||||||
@ -241,8 +241,6 @@ class Device {
|
|||||||
inline int shuffle_avail() const { return _shuffle_avail; }
|
inline int shuffle_avail() const { return _shuffle_avail; }
|
||||||
/// For OpenCL, 0 if fast-math options disabled, 1 enabled
|
/// For OpenCL, 0 if fast-math options disabled, 1 enabled
|
||||||
inline int fast_math() const { return _fast_math; }
|
inline int fast_math() const { return _fast_math; }
|
||||||
/// return the max number of CUs among the devices
|
|
||||||
inline int max_cus() const { return _max_cus; }
|
|
||||||
|
|
||||||
/// Return the number of threads per atom for pair styles
|
/// Return the number of threads per atom for pair styles
|
||||||
inline int threads_per_atom() const { return _threads_per_atom; }
|
inline int threads_per_atom() const { return _threads_per_atom; }
|
||||||
@ -326,7 +324,7 @@ class Device {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
std::queue<Answer<numtyp,acctyp> *> ans_queue;
|
std::queue<Answer<numtyp,acctyp> *> ans_queue;
|
||||||
int _init_count, _max_cus;
|
int _init_count;
|
||||||
bool _device_init, _host_timer_started, _time_device;
|
bool _device_init, _host_timer_started, _time_device;
|
||||||
MPI_Comm _comm_world, _comm_replica, _comm_gpu;
|
MPI_Comm _comm_world, _comm_replica, _comm_gpu;
|
||||||
int _procs_per_gpu, _gpu_rank, _world_me, _world_size, _replica_me,
|
int _procs_per_gpu, _gpu_rank, _world_me, _world_size, _replica_me,
|
||||||
|
|||||||
@ -619,10 +619,10 @@ int HippoT::polar_real(const int eflag, const int vflag) {
|
|||||||
int nbor_pitch=this->nbor->nbor_pitch();
|
int nbor_pitch=this->nbor->nbor_pitch();
|
||||||
|
|
||||||
// Compute the block size and grid size to keep all cores busy
|
// Compute the block size and grid size to keep all cores busy
|
||||||
const int max_cus = this->device->max_cus();
|
const int cus = this->device->gpu->cus();
|
||||||
int BX=this->block_size();
|
int BX=this->block_size();
|
||||||
int GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
|
int GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
|
||||||
while (GX < max_cus) {
|
while (GX < cus) {
|
||||||
BX /= 2;
|
BX /= 2;
|
||||||
GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
|
GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user