Reverted the block size tuning, which caused bugs for low atom counts (will revisit later)
This commit is contained in:
@ -278,14 +278,16 @@ int AmoebaT::polar_real(const int eflag, const int vflag) {
|
|||||||
int nbor_pitch=this->nbor->nbor_pitch();
|
int nbor_pitch=this->nbor->nbor_pitch();
|
||||||
|
|
||||||
// Compute the block size and grid size to keep all cores busy
|
// Compute the block size and grid size to keep all cores busy
|
||||||
|
|
||||||
|
const int BX=this->block_size();
|
||||||
|
const int GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
|
||||||
|
/*
|
||||||
const int cus = this->device->gpu->cus();
|
const int cus = this->device->gpu->cus();
|
||||||
int BX=this->block_size();
|
while (GX < cus && GX > 1) {
|
||||||
int GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
|
|
||||||
while (GX < cus) {
|
|
||||||
BX /= 2;
|
BX /= 2;
|
||||||
GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
|
GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
this->time_pair.start();
|
this->time_pair.start();
|
||||||
|
|
||||||
// Build the short neighbor list if not done yet
|
// Build the short neighbor list if not done yet
|
||||||
|
|||||||
@ -727,14 +727,16 @@ int BaseAmoebaT::fphi_uind() {
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
// Compute the block size and grid size to keep all cores busy
|
// Compute the block size and grid size to keep all cores busy
|
||||||
const int cus = device->gpu->cus();
|
|
||||||
int BX=block_size();
|
const int BX=block_size();
|
||||||
int GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
|
const int GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
|
||||||
while (GX < cus) {
|
/*
|
||||||
|
const int cus = this->device->gpu->cus();
|
||||||
|
while (GX < cus && GX > 1) {
|
||||||
BX /= 2;
|
BX /= 2;
|
||||||
GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
|
GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
time_pair.start();
|
time_pair.start();
|
||||||
int ngridxy = _ngridx * _ngridy;
|
int ngridxy = _ngridx * _ngridy;
|
||||||
k_fphi_uind.set_size(GX,BX);
|
k_fphi_uind.set_size(GX,BX);
|
||||||
@ -793,14 +795,16 @@ int BaseAmoebaT::fphi_mpole() {
|
|||||||
int nbor_pitch=nbor->nbor_pitch();
|
int nbor_pitch=nbor->nbor_pitch();
|
||||||
|
|
||||||
// Compute the block size and grid size to keep all cores busy
|
// Compute the block size and grid size to keep all cores busy
|
||||||
|
|
||||||
|
const int BX=block_size();
|
||||||
|
const int GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
|
||||||
|
/*
|
||||||
const int cus = device->gpu->cus();
|
const int cus = device->gpu->cus();
|
||||||
int BX=block_size();
|
while (GX < cus && GX > 1) {
|
||||||
int GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
|
|
||||||
while (GX < cus) {
|
|
||||||
BX /= 2;
|
BX /= 2;
|
||||||
GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
|
GX=static_cast<int>(ceil(static_cast<double>(ainum)/BX));
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
time_pair.start();
|
time_pair.start();
|
||||||
int ngridxy = _ngridx * _ngridy;
|
int ngridxy = _ngridx * _ngridy;
|
||||||
k_fphi_mpole.set_size(GX,BX);
|
k_fphi_mpole.set_size(GX,BX);
|
||||||
|
|||||||
@ -619,14 +619,16 @@ int HippoT::polar_real(const int eflag, const int vflag) {
|
|||||||
int nbor_pitch=this->nbor->nbor_pitch();
|
int nbor_pitch=this->nbor->nbor_pitch();
|
||||||
|
|
||||||
// Compute the block size and grid size to keep all cores busy
|
// Compute the block size and grid size to keep all cores busy
|
||||||
const int cus = this->device->gpu->cus();
|
|
||||||
int BX=this->block_size();
|
const int BX=this->block_size();
|
||||||
int GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
|
const int GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
|
||||||
while (GX < cus) {
|
/*
|
||||||
|
const int cus = this->device->gpu->cus();
|
||||||
|
while (GX < cus && GX > 1) {
|
||||||
BX /= 2;
|
BX /= 2;
|
||||||
GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
|
GX=static_cast<int>(ceil(static_cast<double>(ainum)/(BX/this->_threads_per_atom)));
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
this->time_pair.start();
|
this->time_pair.start();
|
||||||
|
|
||||||
// Build the short neighbor list if not done yet
|
// Build the short neighbor list if not done yet
|
||||||
|
|||||||
Reference in New Issue
Block a user