Reverted the block size tuning, which caused bugs for low atom counts (will revisit later)

This commit is contained in:
Trung Nguyen
2022-11-04 13:45:59 -05:00
parent 2f1f7ee0fa
commit a3cc0e8432
3 changed files with 26 additions and 18 deletions

View File

@ -278,14 +278,16 @@ int AmoebaT::polar_real(const int eflag, const int vflag) {
int nbor_pitch=this->nbor->nbor_pitch();
// Compute the block size and grid size to keep all cores busy
const int BX=this->block_size();
const int GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
/*
const int cus = this->device->gpu->cus();
int BX=this->block_size();
int GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
while (GX < cus) {
while (GX < cus && GX > 1) {
BX /= 2;
GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
}
*/
this->time_pair.start();
// Build the short neighbor list if not done yet

View File

@ -727,14 +727,16 @@ int BaseAmoebaT::fphi_uind() {
return 0;
// Compute the block size and grid size to keep all cores busy
const int cus = device->gpu->cus();
int BX=block_size();
int GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
while (GX < cus) {
const int BX=block_size();
const int GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
/*
const int cus = this->device->gpu->cus();
while (GX < cus && GX > 1) {
BX /= 2;
GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
}
*/
time_pair.start();
int ngridxy = _ngridx * _ngridy;
k_fphi_uind.set_size(GX,BX);
@ -793,14 +795,16 @@ int BaseAmoebaT::fphi_mpole() {
int nbor_pitch=nbor->nbor_pitch();
// Compute the block size and grid size to keep all cores busy
const int BX=block_size();
const int GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
/*
const int cus = device->gpu->cus();
int BX=block_size();
int GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
while (GX < cus) {
while (GX < cus && GX > 1) {
BX /= 2;
GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
}
*/
time_pair.start();
int ngridxy = _ngridx * _ngridy;
k_fphi_mpole.set_size(GX,BX);

View File

@ -619,14 +619,16 @@ int HippoT::polar_real(const int eflag, const int vflag) {
int nbor_pitch=this->nbor->nbor_pitch();
// Compute the block size and grid size to keep all cores busy
const int cus = this->device->gpu->cus();
int BX=this->block_size();
int GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
while (GX < cus) {
const int BX=this->block_size();
const int GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
/*
const int cus = this->device->gpu->cus();
while (GX < cus && GX > 1) {
BX /= 2;
GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
}
*/
this->time_pair.start();
// Build the short neighbor list if not done yet