Refactored the API so that different off2 values are used for different kernels
This commit is contained in:
@ -52,8 +52,7 @@ int AmoebaT::init(const int ntypes, const int max_amtype, const double *host_pda
|
|||||||
const int nlocal, const int nall, const int max_nbors,
|
const int nlocal, const int nall, const int max_nbors,
|
||||||
const int maxspecial, const int maxspecial15,
|
const int maxspecial, const int maxspecial15,
|
||||||
const double cell_size, const double gpu_split, FILE *_screen,
|
const double cell_size, const double gpu_split, FILE *_screen,
|
||||||
const double aewald, const double felec,
|
const double aewald, const double polar_dscale,
|
||||||
const double off2, const double polar_dscale,
|
|
||||||
const double polar_uscale) {
|
const double polar_uscale) {
|
||||||
int success;
|
int success;
|
||||||
success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,maxspecial15,
|
success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,maxspecial15,
|
||||||
@ -97,8 +96,6 @@ int AmoebaT::init(const int ntypes, const int max_amtype, const double *host_pda
|
|||||||
ucl_copy(sp_polar,dview,5,false);
|
ucl_copy(sp_polar,dview,5,false);
|
||||||
|
|
||||||
_aewald = aewald;
|
_aewald = aewald;
|
||||||
_felec = felec;
|
|
||||||
_off2 = off2;
|
|
||||||
_polar_dscale = polar_dscale;
|
_polar_dscale = polar_dscale;
|
||||||
_polar_uscale = polar_uscale;
|
_polar_uscale = polar_uscale;
|
||||||
|
|
||||||
@ -145,7 +142,7 @@ int AmoebaT::udirect2b(const int eflag, const int vflag) {
|
|||||||
this->k_short_nbor.set_size(GX,BX);
|
this->k_short_nbor.set_size(GX,BX);
|
||||||
this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
|
this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
|
||||||
&this->_nbor_data->begin(),
|
&this->_nbor_data->begin(),
|
||||||
&this->dev_short_nbor, &_off2, &ainum,
|
&this->dev_short_nbor, &this->_off2_polar, &ainum,
|
||||||
&nbor_pitch, &this->_threads_per_atom);
|
&nbor_pitch, &this->_threads_per_atom);
|
||||||
this->short_nbor_avail = true;
|
this->short_nbor_avail = true;
|
||||||
}
|
}
|
||||||
@ -155,7 +152,7 @@ int AmoebaT::udirect2b(const int eflag, const int vflag) {
|
|||||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||||
&this->dev_short_nbor,
|
&this->dev_short_nbor,
|
||||||
&this->_fieldp, &ainum, &_nall, &nbor_pitch,
|
&this->_fieldp, &ainum, &_nall, &nbor_pitch,
|
||||||
&this->_threads_per_atom, &_aewald, &_off2,
|
&this->_threads_per_atom, &_aewald, &this->_off2_polar,
|
||||||
&_polar_dscale, &_polar_uscale);
|
&_polar_dscale, &_polar_uscale);
|
||||||
|
|
||||||
this->time_pair.stop();
|
this->time_pair.stop();
|
||||||
@ -181,19 +178,18 @@ int AmoebaT::umutual2b(const int eflag, const int vflag) {
|
|||||||
if (!this->short_nbor_avail) {
|
if (!this->short_nbor_avail) {
|
||||||
this->k_short_nbor.set_size(GX,BX);
|
this->k_short_nbor.set_size(GX,BX);
|
||||||
this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
|
this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
|
||||||
&this->_nbor_data->begin(),
|
&this->_nbor_data->begin(), &this->dev_short_nbor,
|
||||||
&this->dev_short_nbor, &_off2, &ainum,
|
&this->_off2_polar, &ainum, &nbor_pitch,
|
||||||
&nbor_pitch, &this->_threads_per_atom);
|
&this->_threads_per_atom);
|
||||||
this->short_nbor_avail = true;
|
this->short_nbor_avail = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
this->k_umutual2b.set_size(GX,BX);
|
this->k_umutual2b.set_size(GX,BX);
|
||||||
this->k_umutual2b.run(&this->atom->x, &this->atom->extra, &damping, &sp_polar,
|
this->k_umutual2b.run(&this->atom->x, &this->atom->extra, &damping, &sp_polar,
|
||||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||||
&this->dev_short_nbor,
|
&this->dev_short_nbor, &this->_fieldp, &ainum, &_nall,
|
||||||
&this->_fieldp, &ainum, &_nall, &nbor_pitch,
|
&nbor_pitch, &this->_threads_per_atom, &_aewald,
|
||||||
&this->_threads_per_atom, &_aewald, &_off2,
|
&this->_off2_polar, &_polar_dscale, &_polar_uscale);
|
||||||
&_polar_dscale, &_polar_uscale);
|
|
||||||
|
|
||||||
this->time_pair.stop();
|
this->time_pair.stop();
|
||||||
return GX;
|
return GX;
|
||||||
@ -219,7 +215,7 @@ int AmoebaT::polar_real(const int eflag, const int vflag) {
|
|||||||
this->k_short_nbor.set_size(GX,BX);
|
this->k_short_nbor.set_size(GX,BX);
|
||||||
this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
|
this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
|
||||||
&this->_nbor_data->begin(),
|
&this->_nbor_data->begin(),
|
||||||
&this->dev_short_nbor, &_off2, &ainum,
|
&this->dev_short_nbor, &this->_off2_polar, &ainum,
|
||||||
&nbor_pitch, &this->_threads_per_atom);
|
&nbor_pitch, &this->_threads_per_atom);
|
||||||
this->short_nbor_avail = true;
|
this->short_nbor_avail = true;
|
||||||
}
|
}
|
||||||
@ -230,8 +226,8 @@ int AmoebaT::polar_real(const int eflag, const int vflag) {
|
|||||||
&this->dev_short_nbor,
|
&this->dev_short_nbor,
|
||||||
&this->ans->force, &this->ans->engv, &this->_tep,
|
&this->ans->force, &this->ans->engv, &this->_tep,
|
||||||
&eflag, &vflag, &ainum, &_nall, &nbor_pitch,
|
&eflag, &vflag, &ainum, &_nall, &nbor_pitch,
|
||||||
&this->_threads_per_atom,
|
&this->_threads_per_atom, &_aewald, &this->_felec,
|
||||||
&_aewald, &_felec, &_off2, &_polar_dscale, &_polar_uscale);
|
&this->_off2_polar, &_polar_dscale, &_polar_uscale);
|
||||||
this->time_pair.stop();
|
this->time_pair.stop();
|
||||||
|
|
||||||
// Signal that short nbor list is not avail for the next time step
|
// Signal that short nbor list is not avail for the next time step
|
||||||
|
|||||||
@ -45,8 +45,7 @@ class Amoeba : public BaseAmoeba<numtyp, acctyp> {
|
|||||||
const int nlocal, const int nall, const int max_nbors,
|
const int nlocal, const int nall, const int max_nbors,
|
||||||
const int maxspecial, const int maxspecial15, const double cell_size,
|
const int maxspecial, const int maxspecial15, const double cell_size,
|
||||||
const double gpu_split, FILE *_screen,
|
const double gpu_split, FILE *_screen,
|
||||||
const double aewald, const double felec,
|
const double aewald, const double polar_dscale,
|
||||||
const double off2, const double polar_dscale,
|
|
||||||
const double polar_uscale);
|
const double polar_uscale);
|
||||||
|
|
||||||
/// Clear all host and device data
|
/// Clear all host and device data
|
||||||
@ -75,7 +74,7 @@ class Amoeba : public BaseAmoeba<numtyp, acctyp> {
|
|||||||
/// Number of atom types
|
/// Number of atom types
|
||||||
int _lj_types;
|
int _lj_types;
|
||||||
|
|
||||||
numtyp _aewald, _felec, _off2, _polar_dscale, _polar_uscale;
|
numtyp _aewald, _polar_dscale, _polar_uscale;
|
||||||
numtyp _qqrd2e;
|
numtyp _qqrd2e;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|||||||
@ -36,8 +36,7 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype,
|
|||||||
const int nlocal, const int nall, const int max_nbors,
|
const int nlocal, const int nall, const int max_nbors,
|
||||||
const int maxspecial, const int maxspecial15,
|
const int maxspecial, const int maxspecial15,
|
||||||
const double cell_size, int &gpu_mode, FILE *screen,
|
const double cell_size, int &gpu_mode, FILE *screen,
|
||||||
const double aewald, const double felec,
|
const double aewald, const double polar_dscale,
|
||||||
const double off2, const double polar_dscale,
|
|
||||||
const double polar_uscale, int& tep_size) {
|
const double polar_uscale, int& tep_size) {
|
||||||
AMOEBAMF.clear();
|
AMOEBAMF.clear();
|
||||||
gpu_mode=AMOEBAMF.device->gpu_mode();
|
gpu_mode=AMOEBAMF.device->gpu_mode();
|
||||||
@ -67,7 +66,7 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype,
|
|||||||
host_special_polar_wscale, host_special_polar_piscale,
|
host_special_polar_wscale, host_special_polar_piscale,
|
||||||
host_special_polar_pscale, nlocal, nall, max_nbors,
|
host_special_polar_pscale, nlocal, nall, max_nbors,
|
||||||
maxspecial, maxspecial15, cell_size, gpu_split, screen,
|
maxspecial, maxspecial15, cell_size, gpu_split, screen,
|
||||||
aewald, felec, off2, polar_dscale, polar_uscale);
|
aewald, polar_dscale, polar_uscale);
|
||||||
|
|
||||||
AMOEBAMF.device->world_barrier();
|
AMOEBAMF.device->world_barrier();
|
||||||
if (message)
|
if (message)
|
||||||
@ -87,7 +86,7 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype,
|
|||||||
host_special_polar_wscale, host_special_polar_piscale,
|
host_special_polar_wscale, host_special_polar_piscale,
|
||||||
host_special_polar_pscale, nlocal, nall, max_nbors,
|
host_special_polar_pscale, nlocal, nall, max_nbors,
|
||||||
maxspecial, maxspecial15, cell_size, gpu_split, screen,
|
maxspecial, maxspecial15, cell_size, gpu_split, screen,
|
||||||
aewald, felec, off2, polar_dscale, polar_uscale);
|
aewald, polar_dscale, polar_uscale);
|
||||||
|
|
||||||
AMOEBAMF.device->gpu_barrier();
|
AMOEBAMF.device->gpu_barrier();
|
||||||
if (message)
|
if (message)
|
||||||
@ -111,16 +110,16 @@ int** amoeba_gpu_compute_udirect2b(const int ago, const int inum_full,
|
|||||||
double **host_uind, double **host_uinp,
|
double **host_uind, double **host_uinp,
|
||||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||||
tagint **special, int *nspecial15, tagint** special15,
|
tagint **special, int *nspecial15, tagint** special15,
|
||||||
const bool eflag, const bool vflag,
|
const bool eflag, const bool vflag, const bool eatom,
|
||||||
const bool eatom, const bool vatom, int &host_start,
|
const bool vatom, int &host_start,
|
||||||
int **ilist, int **jnum, const double cpu_time,
|
int **ilist, int **jnum, const double cpu_time,
|
||||||
bool &success, double *host_q, double *boxlo,
|
bool &success, const double off2, double *host_q,
|
||||||
double *prd, void **fieldp_ptr) {
|
double *boxlo, double *prd, void **fieldp_ptr) {
|
||||||
return AMOEBAMF.compute_udirect2b(ago, inum_full, nall, host_x, host_type,
|
return AMOEBAMF.compute_udirect2b(ago, inum_full, nall, host_x, host_type,
|
||||||
host_amtype, host_amgroup, host_rpole, host_uind, host_uinp,
|
host_amtype, host_amgroup, host_rpole, host_uind, host_uinp,
|
||||||
sublo, subhi, tag, nspecial, special, nspecial15, special15,
|
sublo, subhi, tag, nspecial, special, nspecial15, special15,
|
||||||
eflag, vflag, eatom, vatom, host_start, ilist, jnum,
|
eflag, vflag, eatom, vatom, host_start, ilist, jnum,
|
||||||
cpu_time, success, host_q, boxlo, prd, fieldp_ptr);
|
cpu_time, success, off2, host_q, boxlo, prd, fieldp_ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
int** amoeba_gpu_compute_umutual2b(const int ago, const int inum_full,
|
int** amoeba_gpu_compute_umutual2b(const int ago, const int inum_full,
|
||||||
@ -132,13 +131,13 @@ int** amoeba_gpu_compute_umutual2b(const int ago, const int inum_full,
|
|||||||
const bool eflag, const bool vflag,
|
const bool eflag, const bool vflag,
|
||||||
const bool eatom, const bool vatom, int &host_start,
|
const bool eatom, const bool vatom, int &host_start,
|
||||||
int **ilist, int **jnum, const double cpu_time,
|
int **ilist, int **jnum, const double cpu_time,
|
||||||
bool &success, double *host_q, double *boxlo,
|
bool &success, const double off2, double *host_q,
|
||||||
double *prd, void **fieldp_ptr) {
|
double *boxlo, double *prd, void **fieldp_ptr) {
|
||||||
return AMOEBAMF.compute_umutual2b(ago, inum_full, nall, host_x, host_type,
|
return AMOEBAMF.compute_umutual2b(ago, inum_full, nall, host_x, host_type,
|
||||||
host_amtype, host_amgroup, host_rpole, host_uind, host_uinp,
|
host_amtype, host_amgroup, host_rpole, host_uind, host_uinp,
|
||||||
sublo, subhi, tag, nspecial, special, nspecial15, special15,
|
sublo, subhi, tag, nspecial, special, nspecial15, special15,
|
||||||
eflag, vflag, eatom, vatom, host_start, ilist, jnum,
|
eflag, vflag, eatom, vatom, host_start, ilist, jnum,
|
||||||
cpu_time, success, host_q, boxlo, prd, fieldp_ptr);
|
cpu_time, success, off2, host_q, boxlo, prd, fieldp_ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
int** amoeba_gpu_compute_polar_real(const int ago, const int inum_full,
|
int** amoeba_gpu_compute_polar_real(const int ago, const int inum_full,
|
||||||
@ -147,17 +146,16 @@ int** amoeba_gpu_compute_polar_real(const int ago, const int inum_full,
|
|||||||
double **host_rpole, double **host_uind, double **host_uinp,
|
double **host_rpole, double **host_uind, double **host_uinp,
|
||||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||||
tagint **special, int *nspecial15, tagint** special15,
|
tagint **special, int *nspecial15, tagint** special15,
|
||||||
const bool eflag, const bool vflag,
|
const bool eflag, const bool vflag, const bool eatom,
|
||||||
const bool eatom, const bool vatom, int &host_start,
|
const bool vatom, int &host_start,
|
||||||
int **ilist, int **jnum, const double cpu_time,
|
int **ilist, int **jnum, const double cpu_time,
|
||||||
bool &success, double *host_q, double *boxlo,
|
bool &success, const double felec, const double off2,
|
||||||
double *prd, void **tep_ptr) {
|
double *host_q, double *boxlo, double *prd, void **tep_ptr) {
|
||||||
return AMOEBAMF.compute_polar_real(ago, inum_full, nall, host_x, host_type,
|
return AMOEBAMF.compute_polar_real(ago, inum_full, nall, host_x, host_type,
|
||||||
host_amtype, host_amgroup, host_rpole, host_uind, host_uinp,
|
host_amtype, host_amgroup, host_rpole, host_uind, host_uinp,
|
||||||
sublo, subhi, tag, nspecial, special, nspecial15, special15,
|
sublo, subhi, tag, nspecial, special, nspecial15, special15,
|
||||||
eflag, vflag, eatom,
|
eflag, vflag, eatom, vatom, host_start, ilist, jnum,
|
||||||
vatom, host_start, ilist, jnum, cpu_time, success,
|
cpu_time, success, felec, off2, host_q, boxlo, prd, tep_ptr);
|
||||||
host_q, boxlo, prd, tep_ptr);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
double amoeba_gpu_bytes() {
|
double amoeba_gpu_bytes() {
|
||||||
|
|||||||
@ -250,7 +250,8 @@ void BaseAmoebaT::compute_polar_real_host_nbor(const int f_ago, const int inum_f
|
|||||||
const bool eflag_in, const bool vflag_in,
|
const bool eflag_in, const bool vflag_in,
|
||||||
const bool eatom, const bool vatom,
|
const bool eatom, const bool vatom,
|
||||||
int &host_start, const double cpu_time,
|
int &host_start, const double cpu_time,
|
||||||
bool &success, double *host_q, const int nlocal,
|
bool &success, const double off2_polar, const double felec,
|
||||||
|
double *host_q, const int nlocal,
|
||||||
double *boxlo, double *prd, void **tep_ptr) {
|
double *boxlo, double *prd, void **tep_ptr) {
|
||||||
acc_timers();
|
acc_timers();
|
||||||
int eflag, vflag;
|
int eflag, vflag;
|
||||||
@ -316,6 +317,8 @@ void BaseAmoebaT::compute_polar_real_host_nbor(const int f_ago, const int inum_f
|
|||||||
device->precompute(f_ago,nlocal,nall,host_x,host_type,success,host_q,
|
device->precompute(f_ago,nlocal,nall,host_x,host_type,success,host_q,
|
||||||
boxlo, prd);
|
boxlo, prd);
|
||||||
|
|
||||||
|
_off2_polar = off2_polar;
|
||||||
|
_felec = felec;
|
||||||
const int red_blocks=polar_real(eflag,vflag);
|
const int red_blocks=polar_real(eflag,vflag);
|
||||||
ans->copy_answers(eflag_in,vflag_in,eatom,vatom,ilist,red_blocks);
|
ans->copy_answers(eflag_in,vflag_in,eatom,vatom,ilist,red_blocks);
|
||||||
device->add_ans_object(ans);
|
device->add_ans_object(ans);
|
||||||
@ -437,8 +440,8 @@ int** BaseAmoebaT::compute_udirect2b(const int ago, const int inum_full, const i
|
|||||||
const bool eflag_in, const bool vflag_in,
|
const bool eflag_in, const bool vflag_in,
|
||||||
const bool eatom, const bool vatom, int &host_start,
|
const bool eatom, const bool vatom, int &host_start,
|
||||||
int **ilist, int **jnum, const double cpu_time,
|
int **ilist, int **jnum, const double cpu_time,
|
||||||
bool &success, double *host_q, double *boxlo,
|
bool &success, const double off2_polar, double *host_q,
|
||||||
double *prd, void** fieldp_ptr) {
|
double *boxlo, double *prd, void** fieldp_ptr) {
|
||||||
acc_timers();
|
acc_timers();
|
||||||
int eflag, vflag;
|
int eflag, vflag;
|
||||||
if (eatom) eflag=2;
|
if (eatom) eflag=2;
|
||||||
@ -475,6 +478,7 @@ int** BaseAmoebaT::compute_udirect2b(const int ago, const int inum_full, const i
|
|||||||
}
|
}
|
||||||
*fieldp_ptr=_fieldp.host.begin();
|
*fieldp_ptr=_fieldp.host.begin();
|
||||||
|
|
||||||
|
_off2_polar = off2_polar;
|
||||||
const int red_blocks=udirect2b(eflag,vflag);
|
const int red_blocks=udirect2b(eflag,vflag);
|
||||||
|
|
||||||
// copy field and fieldp from device to host (_fieldp store both arrays, one after another)
|
// copy field and fieldp from device to host (_fieldp store both arrays, one after another)
|
||||||
@ -506,8 +510,8 @@ int** BaseAmoebaT::compute_umutual2b(const int ago, const int inum_full, const i
|
|||||||
const bool eflag_in, const bool vflag_in,
|
const bool eflag_in, const bool vflag_in,
|
||||||
const bool eatom, const bool vatom, int &host_start,
|
const bool eatom, const bool vatom, int &host_start,
|
||||||
int **ilist, int **jnum, const double cpu_time,
|
int **ilist, int **jnum, const double cpu_time,
|
||||||
bool &success, double *host_q, double *boxlo,
|
bool &success, const double off2_polar, double *host_q,
|
||||||
double *prd, void** fieldp_ptr) {
|
double *boxlo, double *prd, void** fieldp_ptr) {
|
||||||
acc_timers();
|
acc_timers();
|
||||||
int eflag, vflag;
|
int eflag, vflag;
|
||||||
if (eatom) eflag=2;
|
if (eatom) eflag=2;
|
||||||
@ -544,6 +548,7 @@ int** BaseAmoebaT::compute_umutual2b(const int ago, const int inum_full, const i
|
|||||||
}
|
}
|
||||||
*fieldp_ptr=_fieldp.host.begin();
|
*fieldp_ptr=_fieldp.host.begin();
|
||||||
|
|
||||||
|
_off2_polar = off2_polar;
|
||||||
const int red_blocks=umutual2b(eflag,vflag);
|
const int red_blocks=umutual2b(eflag,vflag);
|
||||||
|
|
||||||
// copy field and fieldp from device to host (_fieldp store both arrays, one after another)
|
// copy field and fieldp from device to host (_fieldp store both arrays, one after another)
|
||||||
@ -574,8 +579,8 @@ int** BaseAmoebaT::compute_polar_real(const int ago, const int inum_full, const
|
|||||||
const bool eflag_in, const bool vflag_in,
|
const bool eflag_in, const bool vflag_in,
|
||||||
const bool eatom, const bool vatom, int &host_start,
|
const bool eatom, const bool vatom, int &host_start,
|
||||||
int **ilist, int **jnum, const double cpu_time,
|
int **ilist, int **jnum, const double cpu_time,
|
||||||
bool &success, double *host_q, double *boxlo,
|
bool &success, const double felec, const double off2_polar,
|
||||||
double *prd, void **tep_ptr) {
|
double *host_q, double *boxlo, double *prd, void **tep_ptr) {
|
||||||
acc_timers();
|
acc_timers();
|
||||||
int eflag, vflag;
|
int eflag, vflag;
|
||||||
if (eatom) eflag=2;
|
if (eatom) eflag=2;
|
||||||
@ -620,6 +625,8 @@ int** BaseAmoebaT::compute_polar_real(const int ago, const int inum_full, const
|
|||||||
}
|
}
|
||||||
*tep_ptr=_tep.host.begin();
|
*tep_ptr=_tep.host.begin();
|
||||||
|
|
||||||
|
_off2_polar = off2_polar;
|
||||||
|
_felec = felec;
|
||||||
const int red_blocks=polar_real(eflag,vflag);
|
const int red_blocks=polar_real(eflag,vflag);
|
||||||
ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
|
ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
|
||||||
device->add_ans_object(ans);
|
device->add_ans_object(ans);
|
||||||
|
|||||||
@ -152,7 +152,7 @@ class BaseAmoeba {
|
|||||||
const bool eflag, const bool vflag,
|
const bool eflag, const bool vflag,
|
||||||
const bool eatom, const bool vatom, int &host_start,
|
const bool eatom, const bool vatom, int &host_start,
|
||||||
int **ilist, int **numj, const double cpu_time, bool &success,
|
int **ilist, int **numj, const double cpu_time, bool &success,
|
||||||
double *charge, double *boxlo, double *prd, void **fieldp_ptr);
|
const double off2_polar, double *charge, double *boxlo, double *prd, void **fieldp_ptr);
|
||||||
|
|
||||||
/// Compute the real space part of the induced field (umutual2b) with device neighboring
|
/// Compute the real space part of the induced field (umutual2b) with device neighboring
|
||||||
int** compute_umutual2b(const int ago, const int inum_full, const int nall,
|
int** compute_umutual2b(const int ago, const int inum_full, const int nall,
|
||||||
@ -165,7 +165,7 @@ class BaseAmoeba {
|
|||||||
const bool eflag, const bool vflag,
|
const bool eflag, const bool vflag,
|
||||||
const bool eatom, const bool vatom, int &host_start,
|
const bool eatom, const bool vatom, int &host_start,
|
||||||
int **ilist, int **numj, const double cpu_time, bool &success,
|
int **ilist, int **numj, const double cpu_time, bool &success,
|
||||||
double *charge, double *boxlo, double *prd, void **fieldp_ptr);
|
const double off2_polar, double *charge, double *boxlo, double *prd, void **fieldp_ptr);
|
||||||
|
|
||||||
/// Compute polar real-space with device neighboring
|
/// Compute polar real-space with device neighboring
|
||||||
int** compute_polar_real(const int ago, const int inum_full, const int nall,
|
int** compute_polar_real(const int ago, const int inum_full, const int nall,
|
||||||
@ -177,7 +177,8 @@ class BaseAmoeba {
|
|||||||
const bool eflag, const bool vflag,
|
const bool eflag, const bool vflag,
|
||||||
const bool eatom, const bool vatom, int &host_start,
|
const bool eatom, const bool vatom, int &host_start,
|
||||||
int **ilist, int **numj, const double cpu_time, bool &success,
|
int **ilist, int **numj, const double cpu_time, bool &success,
|
||||||
double *charge, double *boxlo, double *prd, void **tep_ptr);
|
const double felec, const double off2_polar, double *charge,
|
||||||
|
double *boxlo, double *prd, void **tep_ptr);
|
||||||
|
|
||||||
/// Compute polar real-space with host neighboring (not active for now)
|
/// Compute polar real-space with host neighboring (not active for now)
|
||||||
void compute_polar_real_host_nbor(const int f_ago, const int inum_full, const int nall,
|
void compute_polar_real_host_nbor(const int f_ago, const int inum_full, const int nall,
|
||||||
@ -186,8 +187,8 @@ class BaseAmoeba {
|
|||||||
double **host_uinp, int *ilist, int *numj,
|
double **host_uinp, int *ilist, int *numj,
|
||||||
int **firstneigh, const bool eflag, const bool vflag,
|
int **firstneigh, const bool eflag, const bool vflag,
|
||||||
const bool eatom, const bool vatom, int &host_start,
|
const bool eatom, const bool vatom, int &host_start,
|
||||||
const double cpu_time, bool &success, double *charge,
|
const double cpu_time, bool &success, const double felec, const double off2_polar,
|
||||||
const int nlocal, double *boxlo, double *prd, void **tep_ptr);
|
double *charge, const int nlocal, double *boxlo, double *prd, void **tep_ptr);
|
||||||
|
|
||||||
// -------------------------- DEVICE DATA -------------------------
|
// -------------------------- DEVICE DATA -------------------------
|
||||||
|
|
||||||
@ -258,6 +259,8 @@ class BaseAmoeba {
|
|||||||
bool short_nbor_avail;
|
bool short_nbor_avail;
|
||||||
UCL_D_Vec<int> *_nbor_data;
|
UCL_D_Vec<int> *_nbor_data;
|
||||||
|
|
||||||
|
numtyp _felec,_off2_hal,_off2_repulse,_off2_dispersion,_off2_mpole,_off2_polar;
|
||||||
|
|
||||||
void compile_kernels(UCL_Device &dev, const void *pair_string,
|
void compile_kernels(UCL_Device &dev, const void *pair_string,
|
||||||
const char *kname_polar, const char *kname_udirect2b,
|
const char *kname_polar, const char *kname_udirect2b,
|
||||||
const char *kname_umutual2b, const char *kname_short_nbor);
|
const char *kname_umutual2b, const char *kname_short_nbor);
|
||||||
|
|||||||
@ -59,8 +59,7 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype,
|
|||||||
const int nlocal, const int nall, const int max_nbors,
|
const int nlocal, const int nall, const int max_nbors,
|
||||||
const int maxspecial, const int maxspecial15,
|
const int maxspecial, const int maxspecial15,
|
||||||
const double cell_size, int &gpu_mode, FILE *screen,
|
const double cell_size, int &gpu_mode, FILE *screen,
|
||||||
const double aewald, const double felec,
|
const double aewald, const double polar_dscale,
|
||||||
const double off2, const double polar_dscale,
|
|
||||||
const double polar_uscale, int& tep_size);
|
const double polar_uscale, int& tep_size);
|
||||||
void amoeba_gpu_clear();
|
void amoeba_gpu_clear();
|
||||||
|
|
||||||
@ -69,33 +68,30 @@ int ** amoeba_gpu_compute_udirect2b(const int ago, const int inum, const int nal
|
|||||||
double **host_rpole, double **host_uind, double **host_uinp,
|
double **host_rpole, double **host_uind, double **host_uinp,
|
||||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||||
tagint **special, int* nspecial15, tagint** special15,
|
tagint **special, int* nspecial15, tagint** special15,
|
||||||
const bool eflag, const bool vflag,
|
const bool eflag, const bool vflag, const bool eatom, const bool vatom,
|
||||||
const bool eatom, const bool vatom, int &host_start,
|
int &host_start, int **ilist, int **jnum, const double cpu_time,
|
||||||
int **ilist, int **jnum, const double cpu_time,
|
bool &success, const double off2, double *host_q,
|
||||||
bool &success, double *host_q, double *boxlo, double *prd,
|
double *boxlo, double *prd, void **fieldp_ptr);
|
||||||
void **fieldp_ptr);
|
|
||||||
|
|
||||||
int ** amoeba_gpu_compute_umutual2b(const int ago, const int inum, const int nall,
|
int ** amoeba_gpu_compute_umutual2b(const int ago, const int inum, const int nall,
|
||||||
double **host_x, int *host_type, int *host_amtype, int *host_amgroup,
|
double **host_x, int *host_type, int *host_amtype, int *host_amgroup,
|
||||||
double **host_rpole, double **host_uind, double **host_uinp,
|
double **host_rpole, double **host_uind, double **host_uinp,
|
||||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||||
tagint **special, int* nspecial15, tagint** special15,
|
tagint **special, int* nspecial15, tagint** special15,
|
||||||
const bool eflag, const bool vflag,
|
const bool eflag, const bool vflag, const bool eatom, const bool vatom,
|
||||||
const bool eatom, const bool vatom, int &host_start,
|
int &host_start, int **ilist, int **jnum, const double cpu_time,
|
||||||
int **ilist, int **jnum, const double cpu_time,
|
bool &success, const double off2, double *host_q,
|
||||||
bool &success, double *host_q, double *boxlo, double *prd,
|
double *boxlo, double *prd, void **fieldp_ptr);
|
||||||
void **fieldp_ptr);
|
|
||||||
|
|
||||||
int ** amoeba_gpu_compute_polar_real(const int ago, const int inum, const int nall,
|
int ** amoeba_gpu_compute_polar_real(const int ago, const int inum, const int nall,
|
||||||
double **host_x, int *host_type, int *host_amtype, int *host_amgroup,
|
double **host_x, int *host_type, int *host_amtype, int *host_amgroup,
|
||||||
double **host_rpole, double **host_uind, double **host_uinp,
|
double **host_rpole, double **host_uind, double **host_uinp,
|
||||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||||
tagint **special, int* nspecial15, tagint** special15,
|
tagint **special, int* nspecial15, tagint** special15,
|
||||||
const bool eflag, const bool vflag,
|
const bool eflag, const bool vflag, const bool eatom, const bool vatom,
|
||||||
const bool eatom, const bool vatom, int &host_start,
|
int &host_start, int **ilist, int **jnum, const double cpu_time,
|
||||||
int **ilist, int **jnum, const double cpu_time,
|
bool &success, const double off2, const double felec, double *host_q,
|
||||||
bool &success, double *host_q, double *boxlo, double *prd,
|
double *boxlo, double *prd, void **tep_ptr);
|
||||||
void **tep_ptr);
|
|
||||||
|
|
||||||
double amoeba_gpu_bytes();
|
double amoeba_gpu_bytes();
|
||||||
|
|
||||||
@ -155,6 +151,15 @@ void PairAmoebaGPU::polar_real()
|
|||||||
}
|
}
|
||||||
inum = atom->nlocal;
|
inum = atom->nlocal;
|
||||||
|
|
||||||
|
// select the correct cutoff for the term
|
||||||
|
|
||||||
|
if (use_ewald) choose(POLAR_LONG);
|
||||||
|
else choose(POLAR);
|
||||||
|
|
||||||
|
// set the energy unit conversion factor for polar real-space calculation
|
||||||
|
|
||||||
|
double felec = 0.5 * electric / am_dielectric;
|
||||||
|
|
||||||
firstneigh = amoeba_gpu_compute_polar_real(neighbor->ago, inum, nall, atom->x,
|
firstneigh = amoeba_gpu_compute_polar_real(neighbor->ago, inum, nall, atom->x,
|
||||||
atom->type, amtype, amgroup,
|
atom->type, amtype, amgroup,
|
||||||
rpole, uind, uinp, sublo, subhi,
|
rpole, uind, uinp, sublo, subhi,
|
||||||
@ -162,7 +167,7 @@ void PairAmoebaGPU::polar_real()
|
|||||||
atom->nspecial15, atom->special15,
|
atom->nspecial15, atom->special15,
|
||||||
eflag, vflag, eflag_atom, vflag_atom,
|
eflag, vflag, eflag_atom, vflag_atom,
|
||||||
host_start, &ilist, &numneigh, cpu_time,
|
host_start, &ilist, &numneigh, cpu_time,
|
||||||
success, atom->q, domain->boxlo,
|
success, felec, off2, atom->q, domain->boxlo,
|
||||||
domain->prd, &tep_pinned);
|
domain->prd, &tep_pinned);
|
||||||
|
|
||||||
|
|
||||||
@ -278,11 +283,11 @@ void PairAmoebaGPU::init_style()
|
|||||||
|
|
||||||
// select the squared cutoff (off2) for neighbor list builds (the polar term for now)
|
// select the squared cutoff (off2) for neighbor list builds (the polar term for now)
|
||||||
// NOTE: induce and polar terms are using the same flags here
|
// NOTE: induce and polar terms are using the same flags here
|
||||||
|
/*
|
||||||
if (use_ewald) choose(POLAR_LONG);
|
if (use_ewald) choose(POLAR_LONG);
|
||||||
else choose(POLAR);
|
else choose(POLAR);
|
||||||
|
*/
|
||||||
double cell_size = sqrt(off2) + neighbor->skin;
|
double cell_size = sqrt(maxcut) + neighbor->skin;
|
||||||
|
|
||||||
int maxspecial=0;
|
int maxspecial=0;
|
||||||
int maxspecial15=0;
|
int maxspecial15=0;
|
||||||
@ -303,8 +308,7 @@ void PairAmoebaGPU::init_style()
|
|||||||
special_polar_pscale, atom->nlocal,
|
special_polar_pscale, atom->nlocal,
|
||||||
atom->nlocal+atom->nghost, mnf, maxspecial,
|
atom->nlocal+atom->nghost, mnf, maxspecial,
|
||||||
maxspecial15, cell_size, gpu_mode, screen,
|
maxspecial15, cell_size, gpu_mode, screen,
|
||||||
aewald, felec, off2, polar_dscale, polar_uscale,
|
aewald, polar_dscale, polar_uscale, tep_size);
|
||||||
tep_size);
|
|
||||||
GPU_EXTRA::check_flag(success,error,world);
|
GPU_EXTRA::check_flag(success,error,world);
|
||||||
|
|
||||||
if (gpu_mode == GPU_FORCE)
|
if (gpu_mode == GPU_FORCE)
|
||||||
@ -784,13 +788,18 @@ void PairAmoebaGPU::udirect2b(double **field, double **fieldp)
|
|||||||
}
|
}
|
||||||
inum = atom->nlocal;
|
inum = atom->nlocal;
|
||||||
|
|
||||||
|
// select the correct cutoff (off2) for the term
|
||||||
|
|
||||||
|
if (use_ewald) choose(POLAR_LONG);
|
||||||
|
else choose(POLAR);
|
||||||
|
|
||||||
firstneigh = amoeba_gpu_compute_udirect2b(neighbor->ago, inum, nall, atom->x,
|
firstneigh = amoeba_gpu_compute_udirect2b(neighbor->ago, inum, nall, atom->x,
|
||||||
atom->type, amtype, amgroup, rpole, uind, uinp,
|
atom->type, amtype, amgroup, rpole, uind, uinp,
|
||||||
sublo, subhi, atom->tag, atom->nspecial, atom->special,
|
sublo, subhi, atom->tag, atom->nspecial, atom->special,
|
||||||
atom->nspecial15, atom->special15,
|
atom->nspecial15, atom->special15,
|
||||||
eflag, vflag, eflag_atom, vflag_atom,
|
eflag, vflag, eflag_atom, vflag_atom,
|
||||||
host_start, &ilist, &numneigh, cpu_time,
|
host_start, &ilist, &numneigh, cpu_time,
|
||||||
success, atom->q, domain->boxlo,
|
success, off2, atom->q, domain->boxlo,
|
||||||
domain->prd, &fieldp_pinned);
|
domain->prd, &fieldp_pinned);
|
||||||
if (!success)
|
if (!success)
|
||||||
error->one(FLERR,"Insufficient memory on accelerator");
|
error->one(FLERR,"Insufficient memory on accelerator");
|
||||||
@ -1003,13 +1012,18 @@ void PairAmoebaGPU::umutual2b(double **field, double **fieldp)
|
|||||||
}
|
}
|
||||||
inum = atom->nlocal;
|
inum = atom->nlocal;
|
||||||
|
|
||||||
|
// select the correct cutoff (off2) for the term
|
||||||
|
|
||||||
|
if (use_ewald) choose(POLAR_LONG);
|
||||||
|
else choose(POLAR);
|
||||||
|
|
||||||
firstneigh = amoeba_gpu_compute_umutual2b(neighbor->ago, inum, nall, atom->x,
|
firstneigh = amoeba_gpu_compute_umutual2b(neighbor->ago, inum, nall, atom->x,
|
||||||
atom->type, amtype, amgroup, rpole, uind, uinp,
|
atom->type, amtype, amgroup, rpole, uind, uinp,
|
||||||
sublo, subhi, atom->tag, atom->nspecial, atom->special,
|
sublo, subhi, atom->tag, atom->nspecial, atom->special,
|
||||||
atom->nspecial15, atom->special15,
|
atom->nspecial15, atom->special15,
|
||||||
eflag, vflag, eflag_atom, vflag_atom,
|
eflag, vflag, eflag_atom, vflag_atom,
|
||||||
host_start, &ilist, &numneigh, cpu_time,
|
host_start, &ilist, &numneigh, cpu_time,
|
||||||
success, atom->q, domain->boxlo,
|
success, off2, atom->q, domain->boxlo,
|
||||||
domain->prd, &fieldp_pinned);
|
domain->prd, &fieldp_pinned);
|
||||||
if (!success)
|
if (!success)
|
||||||
error->one(FLERR,"Insufficient memory on accelerator");
|
error->one(FLERR,"Insufficient memory on accelerator");
|
||||||
|
|||||||
Reference in New Issue
Block a user