Refactored the API so that different off2 values are used for different kernels

This commit is contained in:
Trung Nguyen
2021-09-16 17:14:36 -05:00
parent a21095fded
commit 98c1a0178c
6 changed files with 92 additions and 75 deletions

View File

@ -250,7 +250,8 @@ void BaseAmoebaT::compute_polar_real_host_nbor(const int f_ago, const int inum_f
const bool eflag_in, const bool vflag_in,
const bool eatom, const bool vatom,
int &host_start, const double cpu_time,
bool &success, double *host_q, const int nlocal,
bool &success, const double off2_polar, const double felec,
double *host_q, const int nlocal,
double *boxlo, double *prd, void **tep_ptr) {
acc_timers();
int eflag, vflag;
@ -316,6 +317,8 @@ void BaseAmoebaT::compute_polar_real_host_nbor(const int f_ago, const int inum_f
device->precompute(f_ago,nlocal,nall,host_x,host_type,success,host_q,
boxlo, prd);
_off2_polar = off2_polar;
_felec = felec;
const int red_blocks=polar_real(eflag,vflag);
ans->copy_answers(eflag_in,vflag_in,eatom,vatom,ilist,red_blocks);
device->add_ans_object(ans);
@ -437,8 +440,8 @@ int** BaseAmoebaT::compute_udirect2b(const int ago, const int inum_full, const i
const bool eflag_in, const bool vflag_in,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success, double *host_q, double *boxlo,
double *prd, void** fieldp_ptr) {
bool &success, const double off2_polar, double *host_q,
double *boxlo, double *prd, void** fieldp_ptr) {
acc_timers();
int eflag, vflag;
if (eatom) eflag=2;
@ -475,6 +478,7 @@ int** BaseAmoebaT::compute_udirect2b(const int ago, const int inum_full, const i
}
*fieldp_ptr=_fieldp.host.begin();
_off2_polar = off2_polar;
const int red_blocks=udirect2b(eflag,vflag);
// copy field and fieldp from device to host (_fieldp store both arrays, one after another)
@ -506,8 +510,8 @@ int** BaseAmoebaT::compute_umutual2b(const int ago, const int inum_full, const i
const bool eflag_in, const bool vflag_in,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success, double *host_q, double *boxlo,
double *prd, void** fieldp_ptr) {
bool &success, const double off2_polar, double *host_q,
double *boxlo, double *prd, void** fieldp_ptr) {
acc_timers();
int eflag, vflag;
if (eatom) eflag=2;
@ -544,6 +548,7 @@ int** BaseAmoebaT::compute_umutual2b(const int ago, const int inum_full, const i
}
*fieldp_ptr=_fieldp.host.begin();
_off2_polar = off2_polar;
const int red_blocks=umutual2b(eflag,vflag);
// copy field and fieldp from device to host (_fieldp store both arrays, one after another)
@ -574,8 +579,8 @@ int** BaseAmoebaT::compute_polar_real(const int ago, const int inum_full, const
const bool eflag_in, const bool vflag_in,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success, double *host_q, double *boxlo,
double *prd, void **tep_ptr) {
bool &success, const double felec, const double off2_polar,
double *host_q, double *boxlo, double *prd, void **tep_ptr) {
acc_timers();
int eflag, vflag;
if (eatom) eflag=2;
@ -620,6 +625,8 @@ int** BaseAmoebaT::compute_polar_real(const int ago, const int inum_full, const
}
*tep_ptr=_tep.host.begin();
_off2_polar = off2_polar;
_felec = felec;
const int red_blocks=polar_real(eflag,vflag);
ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
device->add_ans_object(ans);