Refactored the API so that different off2 values are used for different kernels

This commit is contained in:
Trung Nguyen
2021-09-16 17:14:36 -05:00
parent a21095fded
commit 98c1a0178c
6 changed files with 92 additions and 75 deletions

View File

@ -52,8 +52,7 @@ int AmoebaT::init(const int ntypes, const int max_amtype, const double *host_pda
const int nlocal, const int nall, const int max_nbors, const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const int maxspecial15, const int maxspecial, const int maxspecial15,
const double cell_size, const double gpu_split, FILE *_screen, const double cell_size, const double gpu_split, FILE *_screen,
const double aewald, const double felec, const double aewald, const double polar_dscale,
const double off2, const double polar_dscale,
const double polar_uscale) { const double polar_uscale) {
int success; int success;
success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,maxspecial15, success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,maxspecial15,
@ -97,8 +96,6 @@ int AmoebaT::init(const int ntypes, const int max_amtype, const double *host_pda
ucl_copy(sp_polar,dview,5,false); ucl_copy(sp_polar,dview,5,false);
_aewald = aewald; _aewald = aewald;
_felec = felec;
_off2 = off2;
_polar_dscale = polar_dscale; _polar_dscale = polar_dscale;
_polar_uscale = polar_uscale; _polar_uscale = polar_uscale;
@ -145,7 +142,7 @@ int AmoebaT::udirect2b(const int eflag, const int vflag) {
this->k_short_nbor.set_size(GX,BX); this->k_short_nbor.set_size(GX,BX);
this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor, this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
&this->_nbor_data->begin(), &this->_nbor_data->begin(),
&this->dev_short_nbor, &_off2, &ainum, &this->dev_short_nbor, &this->_off2_polar, &ainum,
&nbor_pitch, &this->_threads_per_atom); &nbor_pitch, &this->_threads_per_atom);
this->short_nbor_avail = true; this->short_nbor_avail = true;
} }
@ -155,7 +152,7 @@ int AmoebaT::udirect2b(const int eflag, const int vflag) {
&this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->nbor->dev_nbor, &this->_nbor_data->begin(),
&this->dev_short_nbor, &this->dev_short_nbor,
&this->_fieldp, &ainum, &_nall, &nbor_pitch, &this->_fieldp, &ainum, &_nall, &nbor_pitch,
&this->_threads_per_atom, &_aewald, &_off2, &this->_threads_per_atom, &_aewald, &this->_off2_polar,
&_polar_dscale, &_polar_uscale); &_polar_dscale, &_polar_uscale);
this->time_pair.stop(); this->time_pair.stop();
@ -181,19 +178,18 @@ int AmoebaT::umutual2b(const int eflag, const int vflag) {
if (!this->short_nbor_avail) { if (!this->short_nbor_avail) {
this->k_short_nbor.set_size(GX,BX); this->k_short_nbor.set_size(GX,BX);
this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor, this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
&this->_nbor_data->begin(), &this->_nbor_data->begin(), &this->dev_short_nbor,
&this->dev_short_nbor, &_off2, &ainum, &this->_off2_polar, &ainum, &nbor_pitch,
&nbor_pitch, &this->_threads_per_atom); &this->_threads_per_atom);
this->short_nbor_avail = true; this->short_nbor_avail = true;
} }
this->k_umutual2b.set_size(GX,BX); this->k_umutual2b.set_size(GX,BX);
this->k_umutual2b.run(&this->atom->x, &this->atom->extra, &damping, &sp_polar, this->k_umutual2b.run(&this->atom->x, &this->atom->extra, &damping, &sp_polar,
&this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->nbor->dev_nbor, &this->_nbor_data->begin(),
&this->dev_short_nbor, &this->dev_short_nbor, &this->_fieldp, &ainum, &_nall,
&this->_fieldp, &ainum, &_nall, &nbor_pitch, &nbor_pitch, &this->_threads_per_atom, &_aewald,
&this->_threads_per_atom, &_aewald, &_off2, &this->_off2_polar, &_polar_dscale, &_polar_uscale);
&_polar_dscale, &_polar_uscale);
this->time_pair.stop(); this->time_pair.stop();
return GX; return GX;
@ -219,7 +215,7 @@ int AmoebaT::polar_real(const int eflag, const int vflag) {
this->k_short_nbor.set_size(GX,BX); this->k_short_nbor.set_size(GX,BX);
this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor, this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
&this->_nbor_data->begin(), &this->_nbor_data->begin(),
&this->dev_short_nbor, &_off2, &ainum, &this->dev_short_nbor, &this->_off2_polar, &ainum,
&nbor_pitch, &this->_threads_per_atom); &nbor_pitch, &this->_threads_per_atom);
this->short_nbor_avail = true; this->short_nbor_avail = true;
} }
@ -230,8 +226,8 @@ int AmoebaT::polar_real(const int eflag, const int vflag) {
&this->dev_short_nbor, &this->dev_short_nbor,
&this->ans->force, &this->ans->engv, &this->_tep, &this->ans->force, &this->ans->engv, &this->_tep,
&eflag, &vflag, &ainum, &_nall, &nbor_pitch, &eflag, &vflag, &ainum, &_nall, &nbor_pitch,
&this->_threads_per_atom, &this->_threads_per_atom, &_aewald, &this->_felec,
&_aewald, &_felec, &_off2, &_polar_dscale, &_polar_uscale); &this->_off2_polar, &_polar_dscale, &_polar_uscale);
this->time_pair.stop(); this->time_pair.stop();
// Signal that short nbor list is not avail for the next time step // Signal that short nbor list is not avail for the next time step

View File

@ -45,8 +45,7 @@ class Amoeba : public BaseAmoeba<numtyp, acctyp> {
const int nlocal, const int nall, const int max_nbors, const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const int maxspecial15, const double cell_size, const int maxspecial, const int maxspecial15, const double cell_size,
const double gpu_split, FILE *_screen, const double gpu_split, FILE *_screen,
const double aewald, const double felec, const double aewald, const double polar_dscale,
const double off2, const double polar_dscale,
const double polar_uscale); const double polar_uscale);
/// Clear all host and device data /// Clear all host and device data
@ -75,7 +74,7 @@ class Amoeba : public BaseAmoeba<numtyp, acctyp> {
/// Number of atom types /// Number of atom types
int _lj_types; int _lj_types;
numtyp _aewald, _felec, _off2, _polar_dscale, _polar_uscale; numtyp _aewald, _polar_dscale, _polar_uscale;
numtyp _qqrd2e; numtyp _qqrd2e;
protected: protected:

View File

@ -36,8 +36,7 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype,
const int nlocal, const int nall, const int max_nbors, const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const int maxspecial15, const int maxspecial, const int maxspecial15,
const double cell_size, int &gpu_mode, FILE *screen, const double cell_size, int &gpu_mode, FILE *screen,
const double aewald, const double felec, const double aewald, const double polar_dscale,
const double off2, const double polar_dscale,
const double polar_uscale, int& tep_size) { const double polar_uscale, int& tep_size) {
AMOEBAMF.clear(); AMOEBAMF.clear();
gpu_mode=AMOEBAMF.device->gpu_mode(); gpu_mode=AMOEBAMF.device->gpu_mode();
@ -67,7 +66,7 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype,
host_special_polar_wscale, host_special_polar_piscale, host_special_polar_wscale, host_special_polar_piscale,
host_special_polar_pscale, nlocal, nall, max_nbors, host_special_polar_pscale, nlocal, nall, max_nbors,
maxspecial, maxspecial15, cell_size, gpu_split, screen, maxspecial, maxspecial15, cell_size, gpu_split, screen,
aewald, felec, off2, polar_dscale, polar_uscale); aewald, polar_dscale, polar_uscale);
AMOEBAMF.device->world_barrier(); AMOEBAMF.device->world_barrier();
if (message) if (message)
@ -87,7 +86,7 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype,
host_special_polar_wscale, host_special_polar_piscale, host_special_polar_wscale, host_special_polar_piscale,
host_special_polar_pscale, nlocal, nall, max_nbors, host_special_polar_pscale, nlocal, nall, max_nbors,
maxspecial, maxspecial15, cell_size, gpu_split, screen, maxspecial, maxspecial15, cell_size, gpu_split, screen,
aewald, felec, off2, polar_dscale, polar_uscale); aewald, polar_dscale, polar_uscale);
AMOEBAMF.device->gpu_barrier(); AMOEBAMF.device->gpu_barrier();
if (message) if (message)
@ -111,16 +110,16 @@ int** amoeba_gpu_compute_udirect2b(const int ago, const int inum_full,
double **host_uind, double **host_uinp, double **host_uind, double **host_uinp,
double *sublo, double *subhi, tagint *tag, int **nspecial, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, int *nspecial15, tagint** special15, tagint **special, int *nspecial15, tagint** special15,
const bool eflag, const bool vflag, const bool eflag, const bool vflag, const bool eatom,
const bool eatom, const bool vatom, int &host_start, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time, int **ilist, int **jnum, const double cpu_time,
bool &success, double *host_q, double *boxlo, bool &success, const double off2, double *host_q,
double *prd, void **fieldp_ptr) { double *boxlo, double *prd, void **fieldp_ptr) {
return AMOEBAMF.compute_udirect2b(ago, inum_full, nall, host_x, host_type, return AMOEBAMF.compute_udirect2b(ago, inum_full, nall, host_x, host_type,
host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_amtype, host_amgroup, host_rpole, host_uind, host_uinp,
sublo, subhi, tag, nspecial, special, nspecial15, special15, sublo, subhi, tag, nspecial, special, nspecial15, special15,
eflag, vflag, eatom, vatom, host_start, ilist, jnum, eflag, vflag, eatom, vatom, host_start, ilist, jnum,
cpu_time, success, host_q, boxlo, prd, fieldp_ptr); cpu_time, success, off2, host_q, boxlo, prd, fieldp_ptr);
} }
int** amoeba_gpu_compute_umutual2b(const int ago, const int inum_full, int** amoeba_gpu_compute_umutual2b(const int ago, const int inum_full,
@ -132,13 +131,13 @@ int** amoeba_gpu_compute_umutual2b(const int ago, const int inum_full,
const bool eflag, const bool vflag, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time, int **ilist, int **jnum, const double cpu_time,
bool &success, double *host_q, double *boxlo, bool &success, const double off2, double *host_q,
double *prd, void **fieldp_ptr) { double *boxlo, double *prd, void **fieldp_ptr) {
return AMOEBAMF.compute_umutual2b(ago, inum_full, nall, host_x, host_type, return AMOEBAMF.compute_umutual2b(ago, inum_full, nall, host_x, host_type,
host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_amtype, host_amgroup, host_rpole, host_uind, host_uinp,
sublo, subhi, tag, nspecial, special, nspecial15, special15, sublo, subhi, tag, nspecial, special, nspecial15, special15,
eflag, vflag, eatom, vatom, host_start, ilist, jnum, eflag, vflag, eatom, vatom, host_start, ilist, jnum,
cpu_time, success, host_q, boxlo, prd, fieldp_ptr); cpu_time, success, off2, host_q, boxlo, prd, fieldp_ptr);
} }
int** amoeba_gpu_compute_polar_real(const int ago, const int inum_full, int** amoeba_gpu_compute_polar_real(const int ago, const int inum_full,
@ -147,17 +146,16 @@ int** amoeba_gpu_compute_polar_real(const int ago, const int inum_full,
double **host_rpole, double **host_uind, double **host_uinp, double **host_rpole, double **host_uind, double **host_uinp,
double *sublo, double *subhi, tagint *tag, int **nspecial, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, int *nspecial15, tagint** special15, tagint **special, int *nspecial15, tagint** special15,
const bool eflag, const bool vflag, const bool eflag, const bool vflag, const bool eatom,
const bool eatom, const bool vatom, int &host_start, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time, int **ilist, int **jnum, const double cpu_time,
bool &success, double *host_q, double *boxlo, bool &success, const double felec, const double off2,
double *prd, void **tep_ptr) { double *host_q, double *boxlo, double *prd, void **tep_ptr) {
return AMOEBAMF.compute_polar_real(ago, inum_full, nall, host_x, host_type, return AMOEBAMF.compute_polar_real(ago, inum_full, nall, host_x, host_type,
host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_amtype, host_amgroup, host_rpole, host_uind, host_uinp,
sublo, subhi, tag, nspecial, special, nspecial15, special15, sublo, subhi, tag, nspecial, special, nspecial15, special15,
eflag, vflag, eatom, eflag, vflag, eatom, vatom, host_start, ilist, jnum,
vatom, host_start, ilist, jnum, cpu_time, success, cpu_time, success, felec, off2, host_q, boxlo, prd, tep_ptr);
host_q, boxlo, prd, tep_ptr);
} }
double amoeba_gpu_bytes() { double amoeba_gpu_bytes() {

View File

@ -250,7 +250,8 @@ void BaseAmoebaT::compute_polar_real_host_nbor(const int f_ago, const int inum_f
const bool eflag_in, const bool vflag_in, const bool eflag_in, const bool vflag_in,
const bool eatom, const bool vatom, const bool eatom, const bool vatom,
int &host_start, const double cpu_time, int &host_start, const double cpu_time,
bool &success, double *host_q, const int nlocal, bool &success, const double off2_polar, const double felec,
double *host_q, const int nlocal,
double *boxlo, double *prd, void **tep_ptr) { double *boxlo, double *prd, void **tep_ptr) {
acc_timers(); acc_timers();
int eflag, vflag; int eflag, vflag;
@ -316,6 +317,8 @@ void BaseAmoebaT::compute_polar_real_host_nbor(const int f_ago, const int inum_f
device->precompute(f_ago,nlocal,nall,host_x,host_type,success,host_q, device->precompute(f_ago,nlocal,nall,host_x,host_type,success,host_q,
boxlo, prd); boxlo, prd);
_off2_polar = off2_polar;
_felec = felec;
const int red_blocks=polar_real(eflag,vflag); const int red_blocks=polar_real(eflag,vflag);
ans->copy_answers(eflag_in,vflag_in,eatom,vatom,ilist,red_blocks); ans->copy_answers(eflag_in,vflag_in,eatom,vatom,ilist,red_blocks);
device->add_ans_object(ans); device->add_ans_object(ans);
@ -437,8 +440,8 @@ int** BaseAmoebaT::compute_udirect2b(const int ago, const int inum_full, const i
const bool eflag_in, const bool vflag_in, const bool eflag_in, const bool vflag_in,
const bool eatom, const bool vatom, int &host_start, const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time, int **ilist, int **jnum, const double cpu_time,
bool &success, double *host_q, double *boxlo, bool &success, const double off2_polar, double *host_q,
double *prd, void** fieldp_ptr) { double *boxlo, double *prd, void** fieldp_ptr) {
acc_timers(); acc_timers();
int eflag, vflag; int eflag, vflag;
if (eatom) eflag=2; if (eatom) eflag=2;
@ -475,6 +478,7 @@ int** BaseAmoebaT::compute_udirect2b(const int ago, const int inum_full, const i
} }
*fieldp_ptr=_fieldp.host.begin(); *fieldp_ptr=_fieldp.host.begin();
_off2_polar = off2_polar;
const int red_blocks=udirect2b(eflag,vflag); const int red_blocks=udirect2b(eflag,vflag);
// copy field and fieldp from device to host (_fieldp store both arrays, one after another) // copy field and fieldp from device to host (_fieldp store both arrays, one after another)
@ -506,8 +510,8 @@ int** BaseAmoebaT::compute_umutual2b(const int ago, const int inum_full, const i
const bool eflag_in, const bool vflag_in, const bool eflag_in, const bool vflag_in,
const bool eatom, const bool vatom, int &host_start, const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time, int **ilist, int **jnum, const double cpu_time,
bool &success, double *host_q, double *boxlo, bool &success, const double off2_polar, double *host_q,
double *prd, void** fieldp_ptr) { double *boxlo, double *prd, void** fieldp_ptr) {
acc_timers(); acc_timers();
int eflag, vflag; int eflag, vflag;
if (eatom) eflag=2; if (eatom) eflag=2;
@ -544,6 +548,7 @@ int** BaseAmoebaT::compute_umutual2b(const int ago, const int inum_full, const i
} }
*fieldp_ptr=_fieldp.host.begin(); *fieldp_ptr=_fieldp.host.begin();
_off2_polar = off2_polar;
const int red_blocks=umutual2b(eflag,vflag); const int red_blocks=umutual2b(eflag,vflag);
// copy field and fieldp from device to host (_fieldp store both arrays, one after another) // copy field and fieldp from device to host (_fieldp store both arrays, one after another)
@ -574,8 +579,8 @@ int** BaseAmoebaT::compute_polar_real(const int ago, const int inum_full, const
const bool eflag_in, const bool vflag_in, const bool eflag_in, const bool vflag_in,
const bool eatom, const bool vatom, int &host_start, const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time, int **ilist, int **jnum, const double cpu_time,
bool &success, double *host_q, double *boxlo, bool &success, const double felec, const double off2_polar,
double *prd, void **tep_ptr) { double *host_q, double *boxlo, double *prd, void **tep_ptr) {
acc_timers(); acc_timers();
int eflag, vflag; int eflag, vflag;
if (eatom) eflag=2; if (eatom) eflag=2;
@ -620,6 +625,8 @@ int** BaseAmoebaT::compute_polar_real(const int ago, const int inum_full, const
} }
*tep_ptr=_tep.host.begin(); *tep_ptr=_tep.host.begin();
_off2_polar = off2_polar;
_felec = felec;
const int red_blocks=polar_real(eflag,vflag); const int red_blocks=polar_real(eflag,vflag);
ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks); ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
device->add_ans_object(ans); device->add_ans_object(ans);

View File

@ -152,7 +152,7 @@ class BaseAmoeba {
const bool eflag, const bool vflag, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, const bool eatom, const bool vatom, int &host_start,
int **ilist, int **numj, const double cpu_time, bool &success, int **ilist, int **numj, const double cpu_time, bool &success,
double *charge, double *boxlo, double *prd, void **fieldp_ptr); const double off2_polar, double *charge, double *boxlo, double *prd, void **fieldp_ptr);
/// Compute the real space part of the induced field (umutual2b) with device neighboring /// Compute the real space part of the induced field (umutual2b) with device neighboring
int** compute_umutual2b(const int ago, const int inum_full, const int nall, int** compute_umutual2b(const int ago, const int inum_full, const int nall,
@ -165,7 +165,7 @@ class BaseAmoeba {
const bool eflag, const bool vflag, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, const bool eatom, const bool vatom, int &host_start,
int **ilist, int **numj, const double cpu_time, bool &success, int **ilist, int **numj, const double cpu_time, bool &success,
double *charge, double *boxlo, double *prd, void **fieldp_ptr); const double off2_polar, double *charge, double *boxlo, double *prd, void **fieldp_ptr);
/// Compute polar real-space with device neighboring /// Compute polar real-space with device neighboring
int** compute_polar_real(const int ago, const int inum_full, const int nall, int** compute_polar_real(const int ago, const int inum_full, const int nall,
@ -177,7 +177,8 @@ class BaseAmoeba {
const bool eflag, const bool vflag, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, const bool eatom, const bool vatom, int &host_start,
int **ilist, int **numj, const double cpu_time, bool &success, int **ilist, int **numj, const double cpu_time, bool &success,
double *charge, double *boxlo, double *prd, void **tep_ptr); const double felec, const double off2_polar, double *charge,
double *boxlo, double *prd, void **tep_ptr);
/// Compute polar real-space with host neighboring (not active for now) /// Compute polar real-space with host neighboring (not active for now)
void compute_polar_real_host_nbor(const int f_ago, const int inum_full, const int nall, void compute_polar_real_host_nbor(const int f_ago, const int inum_full, const int nall,
@ -186,8 +187,8 @@ class BaseAmoeba {
double **host_uinp, int *ilist, int *numj, double **host_uinp, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag, int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *charge, const double cpu_time, bool &success, const double felec, const double off2_polar,
const int nlocal, double *boxlo, double *prd, void **tep_ptr); double *charge, const int nlocal, double *boxlo, double *prd, void **tep_ptr);
// -------------------------- DEVICE DATA ------------------------- // -------------------------- DEVICE DATA -------------------------
@ -258,6 +259,8 @@ class BaseAmoeba {
bool short_nbor_avail; bool short_nbor_avail;
UCL_D_Vec<int> *_nbor_data; UCL_D_Vec<int> *_nbor_data;
numtyp _felec,_off2_hal,_off2_repulse,_off2_dispersion,_off2_mpole,_off2_polar;
void compile_kernels(UCL_Device &dev, const void *pair_string, void compile_kernels(UCL_Device &dev, const void *pair_string,
const char *kname_polar, const char *kname_udirect2b, const char *kname_polar, const char *kname_udirect2b,
const char *kname_umutual2b, const char *kname_short_nbor); const char *kname_umutual2b, const char *kname_short_nbor);

View File

@ -59,8 +59,7 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype,
const int nlocal, const int nall, const int max_nbors, const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const int maxspecial15, const int maxspecial, const int maxspecial15,
const double cell_size, int &gpu_mode, FILE *screen, const double cell_size, int &gpu_mode, FILE *screen,
const double aewald, const double felec, const double aewald, const double polar_dscale,
const double off2, const double polar_dscale,
const double polar_uscale, int& tep_size); const double polar_uscale, int& tep_size);
void amoeba_gpu_clear(); void amoeba_gpu_clear();
@ -69,33 +68,30 @@ int ** amoeba_gpu_compute_udirect2b(const int ago, const int inum, const int nal
double **host_rpole, double **host_uind, double **host_uinp, double **host_rpole, double **host_uind, double **host_uinp,
double *sublo, double *subhi, tagint *tag, int **nspecial, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, int* nspecial15, tagint** special15, tagint **special, int* nspecial15, tagint** special15,
const bool eflag, const bool vflag, const bool eflag, const bool vflag, const bool eatom, const bool vatom,
const bool eatom, const bool vatom, int &host_start, int &host_start, int **ilist, int **jnum, const double cpu_time,
int **ilist, int **jnum, const double cpu_time, bool &success, const double off2, double *host_q,
bool &success, double *host_q, double *boxlo, double *prd, double *boxlo, double *prd, void **fieldp_ptr);
void **fieldp_ptr);
int ** amoeba_gpu_compute_umutual2b(const int ago, const int inum, const int nall, int ** amoeba_gpu_compute_umutual2b(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *host_amtype, int *host_amgroup, double **host_x, int *host_type, int *host_amtype, int *host_amgroup,
double **host_rpole, double **host_uind, double **host_uinp, double **host_rpole, double **host_uind, double **host_uinp,
double *sublo, double *subhi, tagint *tag, int **nspecial, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, int* nspecial15, tagint** special15, tagint **special, int* nspecial15, tagint** special15,
const bool eflag, const bool vflag, const bool eflag, const bool vflag, const bool eatom, const bool vatom,
const bool eatom, const bool vatom, int &host_start, int &host_start, int **ilist, int **jnum, const double cpu_time,
int **ilist, int **jnum, const double cpu_time, bool &success, const double off2, double *host_q,
bool &success, double *host_q, double *boxlo, double *prd, double *boxlo, double *prd, void **fieldp_ptr);
void **fieldp_ptr);
int ** amoeba_gpu_compute_polar_real(const int ago, const int inum, const int nall, int ** amoeba_gpu_compute_polar_real(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *host_amtype, int *host_amgroup, double **host_x, int *host_type, int *host_amtype, int *host_amgroup,
double **host_rpole, double **host_uind, double **host_uinp, double **host_rpole, double **host_uind, double **host_uinp,
double *sublo, double *subhi, tagint *tag, int **nspecial, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, int* nspecial15, tagint** special15, tagint **special, int* nspecial15, tagint** special15,
const bool eflag, const bool vflag, const bool eflag, const bool vflag, const bool eatom, const bool vatom,
const bool eatom, const bool vatom, int &host_start, int &host_start, int **ilist, int **jnum, const double cpu_time,
int **ilist, int **jnum, const double cpu_time, bool &success, const double off2, const double felec, double *host_q,
bool &success, double *host_q, double *boxlo, double *prd, double *boxlo, double *prd, void **tep_ptr);
void **tep_ptr);
double amoeba_gpu_bytes(); double amoeba_gpu_bytes();
@ -155,6 +151,15 @@ void PairAmoebaGPU::polar_real()
} }
inum = atom->nlocal; inum = atom->nlocal;
// select the correct cutoff for the term
if (use_ewald) choose(POLAR_LONG);
else choose(POLAR);
// set the energy unit conversion factor for polar real-space calculation
double felec = 0.5 * electric / am_dielectric;
firstneigh = amoeba_gpu_compute_polar_real(neighbor->ago, inum, nall, atom->x, firstneigh = amoeba_gpu_compute_polar_real(neighbor->ago, inum, nall, atom->x,
atom->type, amtype, amgroup, atom->type, amtype, amgroup,
rpole, uind, uinp, sublo, subhi, rpole, uind, uinp, sublo, subhi,
@ -162,7 +167,7 @@ void PairAmoebaGPU::polar_real()
atom->nspecial15, atom->special15, atom->nspecial15, atom->special15,
eflag, vflag, eflag_atom, vflag_atom, eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time, host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo, success, felec, off2, atom->q, domain->boxlo,
domain->prd, &tep_pinned); domain->prd, &tep_pinned);
@ -278,11 +283,11 @@ void PairAmoebaGPU::init_style()
// select the squared cutoff (off2) for neighbor list builds (the polar term for now) // select the squared cutoff (off2) for neighbor list builds (the polar term for now)
// NOTE: induce and polar terms are using the same flags here // NOTE: induce and polar terms are using the same flags here
/*
if (use_ewald) choose(POLAR_LONG); if (use_ewald) choose(POLAR_LONG);
else choose(POLAR); else choose(POLAR);
*/
double cell_size = sqrt(off2) + neighbor->skin; double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0; int maxspecial=0;
int maxspecial15=0; int maxspecial15=0;
@ -303,8 +308,7 @@ void PairAmoebaGPU::init_style()
special_polar_pscale, atom->nlocal, special_polar_pscale, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial, atom->nlocal+atom->nghost, mnf, maxspecial,
maxspecial15, cell_size, gpu_mode, screen, maxspecial15, cell_size, gpu_mode, screen,
aewald, felec, off2, polar_dscale, polar_uscale, aewald, polar_dscale, polar_uscale, tep_size);
tep_size);
GPU_EXTRA::check_flag(success,error,world); GPU_EXTRA::check_flag(success,error,world);
if (gpu_mode == GPU_FORCE) if (gpu_mode == GPU_FORCE)
@ -784,13 +788,18 @@ void PairAmoebaGPU::udirect2b(double **field, double **fieldp)
} }
inum = atom->nlocal; inum = atom->nlocal;
// select the correct cutoff (off2) for the term
if (use_ewald) choose(POLAR_LONG);
else choose(POLAR);
firstneigh = amoeba_gpu_compute_udirect2b(neighbor->ago, inum, nall, atom->x, firstneigh = amoeba_gpu_compute_udirect2b(neighbor->ago, inum, nall, atom->x,
atom->type, amtype, amgroup, rpole, uind, uinp, atom->type, amtype, amgroup, rpole, uind, uinp,
sublo, subhi, atom->tag, atom->nspecial, atom->special, sublo, subhi, atom->tag, atom->nspecial, atom->special,
atom->nspecial15, atom->special15, atom->nspecial15, atom->special15,
eflag, vflag, eflag_atom, vflag_atom, eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time, host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo, success, off2, atom->q, domain->boxlo,
domain->prd, &fieldp_pinned); domain->prd, &fieldp_pinned);
if (!success) if (!success)
error->one(FLERR,"Insufficient memory on accelerator"); error->one(FLERR,"Insufficient memory on accelerator");
@ -1003,13 +1012,18 @@ void PairAmoebaGPU::umutual2b(double **field, double **fieldp)
} }
inum = atom->nlocal; inum = atom->nlocal;
// select the correct cutoff (off2) for the term
if (use_ewald) choose(POLAR_LONG);
else choose(POLAR);
firstneigh = amoeba_gpu_compute_umutual2b(neighbor->ago, inum, nall, atom->x, firstneigh = amoeba_gpu_compute_umutual2b(neighbor->ago, inum, nall, atom->x,
atom->type, amtype, amgroup, rpole, uind, uinp, atom->type, amtype, amgroup, rpole, uind, uinp,
sublo, subhi, atom->tag, atom->nspecial, atom->special, sublo, subhi, atom->tag, atom->nspecial, atom->special,
atom->nspecial15, atom->special15, atom->nspecial15, atom->special15,
eflag, vflag, eflag_atom, vflag_atom, eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time, host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo, success, off2, atom->q, domain->boxlo,
domain->prd, &fieldp_pinned); domain->prd, &fieldp_pinned);
if (!success) if (!success)
error->one(FLERR,"Insufficient memory on accelerator"); error->one(FLERR,"Insufficient memory on accelerator");