Added necessary arguments to the hippo repulsion kernel
This commit is contained in:
@ -57,6 +57,7 @@ int HippoT::init(const int ntypes, const int max_amtype, const int max_amclass,
|
||||
const double *host_special_polar_wscale,
|
||||
const double *host_special_polar_piscale,
|
||||
const double *host_special_polar_pscale,
|
||||
const double *host_sizpr, const double *host_dmppr, const double *host_elepr,
|
||||
const double *host_csix, const double *host_adisp,
|
||||
const double *host_pcore, const double *host_palpha,
|
||||
const int nlocal, const int nall, const int max_nbors,
|
||||
@ -99,6 +100,16 @@ int HippoT::init(const int ntypes, const int max_amtype, const int max_amclass,
|
||||
coeff_amtype.alloc(max_amtype,*(this->ucl_device), UCL_READ_ONLY);
|
||||
ucl_copy(coeff_amtype,host_write,false);
|
||||
|
||||
for (int i = 0; i < max_amtype; i++) {
|
||||
host_write[i].x = host_sizpr[i];
|
||||
host_write[i].y = host_dmppr[i];
|
||||
host_write[i].z = host_elepr[i];
|
||||
host_write[i].w = (numtyp)0;
|
||||
}
|
||||
|
||||
coeff_rep.alloc(max_amtype,*(this->ucl_device), UCL_READ_ONLY);
|
||||
ucl_copy(coeff_rep,host_write,false);
|
||||
|
||||
UCL_H_Vec<numtyp4> host_write2(max_amclass, *(this->ucl_device), UCL_WRITE_ONLY);
|
||||
for (int i = 0; i < max_amclass; i++) {
|
||||
host_write2[i].x = host_csix[i];
|
||||
@ -133,7 +144,7 @@ int HippoT::init(const int ntypes, const int max_amtype, const int max_amclass,
|
||||
_polar_uscale = polar_uscale;
|
||||
|
||||
_allocated=true;
|
||||
this->_max_bytes=coeff_amtype.row_bytes() + coeff_amclass.row_bytes()
|
||||
this->_max_bytes=coeff_amtype.row_bytes() + coeff_rep.row_bytes() + coeff_amclass.row_bytes() +
|
||||
+ sp_polar.row_bytes() + sp_nonpolar.row_bytes() + this->_tep.row_bytes();
|
||||
return 0;
|
||||
}
|
||||
@ -145,6 +156,7 @@ void HippoT::clear() {
|
||||
_allocated=false;
|
||||
|
||||
coeff_amtype.clear();
|
||||
coeff_rep.clear();
|
||||
coeff_amclass.clear();
|
||||
sp_polar.clear();
|
||||
sp_nonpolar.clear();
|
||||
@ -173,7 +185,9 @@ int** HippoT::compute_repulsion(const int ago, const int inum_full,
|
||||
int &host_start, int **ilist, int **jnum,
|
||||
const double cpu_time, bool &success,
|
||||
const double aewald, const double off2_repulse,
|
||||
double *host_q, double *boxlo, double *prd, void **tep_ptr) {
|
||||
double *host_q, double *boxlo, double *prd,
|
||||
double cut2, double c0, double c1, double c2,
|
||||
double c3, double c4, double c5, void **tep_ptr) {
|
||||
this->acc_timers();
|
||||
int eflag, vflag;
|
||||
if (eatom) eflag=2;
|
||||
@ -219,7 +233,13 @@ int** HippoT::compute_repulsion(const int ago, const int inum_full,
|
||||
*tep_ptr=this->_tep.host.begin();
|
||||
|
||||
this->_off2_repulse = off2_repulse;
|
||||
this->_aewald = aewald;
|
||||
_cut2 = cut2;
|
||||
_c0 = c0;
|
||||
_c1 = c1;
|
||||
_c2 = c2;
|
||||
_c3 = c3;
|
||||
_c4 = c4;
|
||||
_c5 = c5;
|
||||
const int red_blocks=repulsion(eflag,vflag);
|
||||
|
||||
// only copy them back if this is the last kernel
|
||||
@ -266,13 +286,13 @@ int HippoT::repulsion(const int eflag, const int vflag) {
|
||||
|
||||
k_repulsion.set_size(GX,BX);
|
||||
k_repulsion.run(&this->atom->x, &this->atom->extra,
|
||||
&coeff_amtype, &coeff_amclass, &sp_nonpolar,
|
||||
&coeff_rep, &sp_nonpolar,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->dev_short_nbor,
|
||||
&this->ans->force, &this->ans->engv, &this->_tep,
|
||||
&eflag, &vflag, &ainum, &_nall, &nbor_pitch,
|
||||
&this->_threads_per_atom, &this->_aewald,
|
||||
&this->_off2_repulse);
|
||||
&this->_off2_repulse, &_cut2, &_c0, &_c1, &_c2, &_c3, &_c4, &_c5);
|
||||
this->time_pair.stop();
|
||||
|
||||
return GX;
|
||||
|
||||
@ -411,7 +411,7 @@ _texture( q_tex,int2);
|
||||
|
||||
__kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp *restrict extra,
|
||||
const __global numtyp4 *restrict coeff,
|
||||
const __global numtyp4 *restrict coeff_rep,
|
||||
const __global numtyp4 *restrict sp_nonpolar,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
@ -480,9 +480,9 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
|
||||
numtyp qiyz = pol3i.x; // rpole[i][9];
|
||||
numtyp qizz = pol3i.y; // rpole[i][12];
|
||||
int itype = pol3i.z; // amtype[i];
|
||||
numtyp sizi = coeff[itype].x; // sizpr[itype];
|
||||
numtyp dmpi = coeff[itype].y; // dmppr[itype];
|
||||
numtyp vali = coeff[itype].z; // elepr[itype];
|
||||
numtyp sizi = coeff_rep[itype].x; // sizpr[itype];
|
||||
numtyp dmpi = coeff_rep[itype].y; // dmppr[itype];
|
||||
numtyp vali = coeff_rep[itype].z; // elepr[itype];
|
||||
|
||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||
|
||||
@ -515,9 +515,9 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
|
||||
numtyp qkzz = pol3j.y; // rpole[j][12];
|
||||
int jtype = pol3j.z; // amtype[j];
|
||||
|
||||
numtyp sizk = coeff[jtype].x; // sizpr[jtype];
|
||||
numtyp dmpk = coeff[jtype].y; // dmppr[jtype];
|
||||
numtyp valk = coeff[jtype].z; // elepr[jtype];
|
||||
numtyp sizk = coeff_rep[jtype].x; // sizpr[jtype];
|
||||
numtyp dmpk = coeff_rep[jtype].y; // dmppr[jtype];
|
||||
numtyp valk = coeff_rep[jtype].z; // elepr[jtype];
|
||||
|
||||
const numtyp4 sp_nonpol = sp_nonpolar[sbmask15(jextra)];
|
||||
numtyp factor_repel = sp_nonpol.y; // factor_repel = special_repel[sbmask15(j)];
|
||||
|
||||
@ -47,6 +47,7 @@ class Hippo : public BaseAmoeba<numtyp, acctyp> {
|
||||
const double *host_special_polar_wscale,
|
||||
const double *host_special_polar_piscale,
|
||||
const double *host_special_polar_pscale,
|
||||
const double *host_sizpr, const double *host_dmppr, const double *host_elepr,
|
||||
const double *host_csix, const double *host_adisp,
|
||||
const double *host_pcore, const double *host_palpha,
|
||||
const int nlocal, const int nall, const int max_nbors,
|
||||
@ -56,18 +57,20 @@ class Hippo : public BaseAmoeba<numtyp, acctyp> {
|
||||
|
||||
/// Compute repulsion with device neighboring
|
||||
int** compute_repulsion(const int ago, const int inum_full,
|
||||
const int nall, double **host_x,
|
||||
int *host_type, int *host_amtype,
|
||||
int *host_amgroup, double **host_rpole,
|
||||
double *sublo, double *subhi, tagint *tag,
|
||||
int **nspecial, tagint **special,
|
||||
int *nspecial15, tagint **special15,
|
||||
const bool eflag_in, const bool vflag_in,
|
||||
const bool eatom, const bool vatom,
|
||||
int &host_start, int **ilist, int **jnum,
|
||||
const double cpu_time, bool &success,
|
||||
const double aewald, const double off2_repulse,
|
||||
double *host_q, double *boxlo, double *prd, void** tep_ptr);
|
||||
const int nall, double **host_x,
|
||||
int *host_type, int *host_amtype,
|
||||
int *host_amgroup, double **host_rpole,
|
||||
double *sublo, double *subhi, tagint *tag,
|
||||
int **nspecial, tagint **special,
|
||||
int *nspecial15, tagint **special15,
|
||||
const bool eflag_in, const bool vflag_in,
|
||||
const bool eatom, const bool vatom,
|
||||
int &host_start, int **ilist, int **jnum,
|
||||
const double cpu_time, bool &success,
|
||||
const double aewald, const double off2_repulse,
|
||||
double *host_q, double *boxlo, double *prd,
|
||||
double cut2, double c0, double c1, double c2,
|
||||
double c3, double c4, double c5,void** tep_ptr);
|
||||
|
||||
/// Compute dispersion real-space with device neighboring
|
||||
int** compute_dispersion_real(const int ago, const int inum_full, const int nall,
|
||||
@ -157,6 +160,8 @@ class Hippo : public BaseAmoeba<numtyp, acctyp> {
|
||||
UCL_D_Vec<numtyp4> coeff_amtype;
|
||||
/// csix = coeff_amclass.x; adisp = coeff_amclass.y;
|
||||
UCL_D_Vec<numtyp4> coeff_amclass;
|
||||
/// sizpr = coeff_rep.x; dmppr = coeff_rep.y; elepr = coeff_rep.z;
|
||||
UCL_D_Vec<numtyp4> coeff_rep;
|
||||
/// Special polar values [0-4]:
|
||||
/// sp_polar.x = special_polar_wscale
|
||||
/// sp_polar.y special_polar_pscale,
|
||||
@ -175,6 +180,7 @@ class Hippo : public BaseAmoeba<numtyp, acctyp> {
|
||||
/// Number of atom types
|
||||
int _lj_types;
|
||||
|
||||
numtyp _cut2,_c0,_c1,_c2,_c3,_c4,_c5;
|
||||
numtyp _polar_dscale, _polar_uscale;
|
||||
numtyp _qqrd2e;
|
||||
|
||||
|
||||
@ -37,6 +37,7 @@ int hippo_gpu_init(const int ntypes, const int max_amtype, const int max_amclass
|
||||
const double *host_special_polar_wscale,
|
||||
const double *host_special_polar_piscale,
|
||||
const double *host_special_polar_pscale,
|
||||
const double *host_sizpr, const double *host_dmppr, const double *host_elepr,
|
||||
const double *host_csix, const double *host_adisp,
|
||||
const double *host_pcore, const double *host_palpha,
|
||||
const int nlocal, const int nall, const int max_nbors,
|
||||
@ -74,6 +75,7 @@ int hippo_gpu_init(const int ntypes, const int max_amtype, const int max_amclass
|
||||
host_special_repel, host_special_disp,
|
||||
host_special_mpole, host_special_polar_wscale,
|
||||
host_special_polar_piscale, host_special_polar_pscale,
|
||||
host_sizpr, host_dmppr, host_elepr,
|
||||
host_csix, host_adisp, host_pcore, host_palpha,
|
||||
nlocal, nall, max_nbors,
|
||||
maxspecial, maxspecial15, cell_size, gpu_split,
|
||||
@ -99,6 +101,7 @@ int hippo_gpu_init(const int ntypes, const int max_amtype, const int max_amclass
|
||||
host_special_repel, host_special_disp,
|
||||
host_special_mpole, host_special_polar_wscale,
|
||||
host_special_polar_piscale, host_special_polar_pscale,
|
||||
host_sizpr, host_dmppr, host_elepr,
|
||||
host_csix, host_adisp, host_pcore, host_palpha,
|
||||
nlocal, nall, max_nbors,
|
||||
maxspecial, maxspecial15, cell_size, gpu_split,
|
||||
@ -129,12 +132,15 @@ int** hippo_gpu_compute_repulsion(const int ago, const int inum_full,
|
||||
const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success, const double aewald, const double off2,
|
||||
double *host_q, double *boxlo, double *prd, void **tep_ptr) {
|
||||
double *host_q, double *boxlo, double *prd,
|
||||
double cut2, double c0, double c1, double c2,
|
||||
double c3, double c4, double c5, void **tep_ptr) {
|
||||
return HIPPOMF.compute_repulsion(ago, inum_full, nall, host_x, host_type,
|
||||
host_amtype, host_amgroup, host_rpole, sublo, subhi,
|
||||
tag, nspecial, special, nspecial15, special15,
|
||||
eflag, vflag, eatom, vatom, host_start, ilist, jnum,
|
||||
cpu_time, success, aewald, off2, host_q, boxlo, prd, tep_ptr);
|
||||
cpu_time, success, aewald, off2, host_q, boxlo, prd,
|
||||
cut2, c0, c1, c2, c3, c4, c5, tep_ptr);
|
||||
}
|
||||
|
||||
int** hippo_gpu_compute_dispersion_real(const int ago, const int inum_full,
|
||||
|
||||
@ -58,6 +58,7 @@ int hippo_gpu_init(const int ntypes, const int max_amtype, const int max_amclass
|
||||
const double *host_special_polar_wscale,
|
||||
const double *host_special_polar_piscale,
|
||||
const double *host_special_polar_pscale,
|
||||
const double *host_sizpr, const double *host_dmppr, const double *host_elepr,
|
||||
const double *host_csix, const double *host_adisp,
|
||||
const double *host_pcore, const double *host_palpha,
|
||||
const int nlocal, const int nall, const int max_nbors,
|
||||
@ -75,7 +76,9 @@ int** hippo_gpu_compute_repulsion(const int ago, const int inum_full,
|
||||
const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success, const double aewald, const double off2,
|
||||
double *host_q, double *boxlo, double *prd, void **tep_ptr);
|
||||
double *host_q, double *boxlo, double *prd,
|
||||
double cut2, double c0, double c1, double c2,
|
||||
double c3, double c4, double c5, void **tep_ptr);
|
||||
|
||||
int** hippo_gpu_compute_dispersion_real(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
@ -203,7 +206,8 @@ void PairHippoGPU::init_style()
|
||||
pdamp, thole, dirdamp, amtype2class, special_hal,
|
||||
special_repel, special_disp, special_mpole,
|
||||
special_polar_wscale, special_polar_piscale,
|
||||
special_polar_pscale, csix, adisp, pcore, palpha,
|
||||
special_polar_pscale, sizpr, dmppr, elepr,
|
||||
csix, adisp, pcore, palpha,
|
||||
atom->nlocal, atom->nlocal+atom->nghost, mnf,
|
||||
maxspecial, maxspecial15, cell_size, gpu_mode,
|
||||
screen, polar_dscale, polar_uscale, tq_size);
|
||||
@ -261,7 +265,8 @@ void PairHippoGPU::repulsion()
|
||||
eflag, vflag, eflag_atom, vflag_atom,
|
||||
host_start, &ilist, &numneigh, cpu_time,
|
||||
success, aewald, off2, atom->q,
|
||||
domain->boxlo, domain->prd, &tq_pinned);
|
||||
domain->boxlo, domain->prd, cut2,
|
||||
c0, c1, c2, c3, c4, c5, &tq_pinned);
|
||||
|
||||
if (!success)
|
||||
error->one(FLERR,"Insufficient memory on accelerator");
|
||||
|
||||
Reference in New Issue
Block a user