Added necessary arguments to the hippo repulsion kernel

This commit is contained in:
Trung Nguyen
2021-09-29 09:40:33 -05:00
parent 17edd797a7
commit 4be44c386f
5 changed files with 66 additions and 29 deletions

View File

@ -57,6 +57,7 @@ int HippoT::init(const int ntypes, const int max_amtype, const int max_amclass,
const double *host_special_polar_wscale, const double *host_special_polar_wscale,
const double *host_special_polar_piscale, const double *host_special_polar_piscale,
const double *host_special_polar_pscale, const double *host_special_polar_pscale,
const double *host_sizpr, const double *host_dmppr, const double *host_elepr,
const double *host_csix, const double *host_adisp, const double *host_csix, const double *host_adisp,
const double *host_pcore, const double *host_palpha, const double *host_pcore, const double *host_palpha,
const int nlocal, const int nall, const int max_nbors, const int nlocal, const int nall, const int max_nbors,
@ -99,6 +100,16 @@ int HippoT::init(const int ntypes, const int max_amtype, const int max_amclass,
coeff_amtype.alloc(max_amtype,*(this->ucl_device), UCL_READ_ONLY); coeff_amtype.alloc(max_amtype,*(this->ucl_device), UCL_READ_ONLY);
ucl_copy(coeff_amtype,host_write,false); ucl_copy(coeff_amtype,host_write,false);
for (int i = 0; i < max_amtype; i++) {
host_write[i].x = host_sizpr[i];
host_write[i].y = host_dmppr[i];
host_write[i].z = host_elepr[i];
host_write[i].w = (numtyp)0;
}
coeff_rep.alloc(max_amtype,*(this->ucl_device), UCL_READ_ONLY);
ucl_copy(coeff_rep,host_write,false);
UCL_H_Vec<numtyp4> host_write2(max_amclass, *(this->ucl_device), UCL_WRITE_ONLY); UCL_H_Vec<numtyp4> host_write2(max_amclass, *(this->ucl_device), UCL_WRITE_ONLY);
for (int i = 0; i < max_amclass; i++) { for (int i = 0; i < max_amclass; i++) {
host_write2[i].x = host_csix[i]; host_write2[i].x = host_csix[i];
@ -133,7 +144,7 @@ int HippoT::init(const int ntypes, const int max_amtype, const int max_amclass,
_polar_uscale = polar_uscale; _polar_uscale = polar_uscale;
_allocated=true; _allocated=true;
this->_max_bytes=coeff_amtype.row_bytes() + coeff_amclass.row_bytes() this->_max_bytes=coeff_amtype.row_bytes() + coeff_rep.row_bytes() + coeff_amclass.row_bytes() +
+ sp_polar.row_bytes() + sp_nonpolar.row_bytes() + this->_tep.row_bytes(); + sp_polar.row_bytes() + sp_nonpolar.row_bytes() + this->_tep.row_bytes();
return 0; return 0;
} }
@ -145,6 +156,7 @@ void HippoT::clear() {
_allocated=false; _allocated=false;
coeff_amtype.clear(); coeff_amtype.clear();
coeff_rep.clear();
coeff_amclass.clear(); coeff_amclass.clear();
sp_polar.clear(); sp_polar.clear();
sp_nonpolar.clear(); sp_nonpolar.clear();
@ -173,7 +185,9 @@ int** HippoT::compute_repulsion(const int ago, const int inum_full,
int &host_start, int **ilist, int **jnum, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, const double cpu_time, bool &success,
const double aewald, const double off2_repulse, const double aewald, const double off2_repulse,
double *host_q, double *boxlo, double *prd, void **tep_ptr) { double *host_q, double *boxlo, double *prd,
double cut2, double c0, double c1, double c2,
double c3, double c4, double c5, void **tep_ptr) {
this->acc_timers(); this->acc_timers();
int eflag, vflag; int eflag, vflag;
if (eatom) eflag=2; if (eatom) eflag=2;
@ -219,7 +233,13 @@ int** HippoT::compute_repulsion(const int ago, const int inum_full,
*tep_ptr=this->_tep.host.begin(); *tep_ptr=this->_tep.host.begin();
this->_off2_repulse = off2_repulse; this->_off2_repulse = off2_repulse;
this->_aewald = aewald; _cut2 = cut2;
_c0 = c0;
_c1 = c1;
_c2 = c2;
_c3 = c3;
_c4 = c4;
_c5 = c5;
const int red_blocks=repulsion(eflag,vflag); const int red_blocks=repulsion(eflag,vflag);
// only copy them back if this is the last kernel // only copy them back if this is the last kernel
@ -266,13 +286,13 @@ int HippoT::repulsion(const int eflag, const int vflag) {
k_repulsion.set_size(GX,BX); k_repulsion.set_size(GX,BX);
k_repulsion.run(&this->atom->x, &this->atom->extra, k_repulsion.run(&this->atom->x, &this->atom->extra,
&coeff_amtype, &coeff_amclass, &sp_nonpolar, &coeff_rep, &sp_nonpolar,
&this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->nbor->dev_nbor, &this->_nbor_data->begin(),
&this->dev_short_nbor, &this->dev_short_nbor,
&this->ans->force, &this->ans->engv, &this->_tep, &this->ans->force, &this->ans->engv, &this->_tep,
&eflag, &vflag, &ainum, &_nall, &nbor_pitch, &eflag, &vflag, &ainum, &_nall, &nbor_pitch,
&this->_threads_per_atom, &this->_aewald, &this->_threads_per_atom, &this->_aewald,
&this->_off2_repulse); &this->_off2_repulse, &_cut2, &_c0, &_c1, &_c2, &_c3, &_c4, &_c5);
this->time_pair.stop(); this->time_pair.stop();
return GX; return GX;

View File

@ -411,7 +411,7 @@ _texture( q_tex,int2);
__kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_, __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
const __global numtyp *restrict extra, const __global numtyp *restrict extra,
const __global numtyp4 *restrict coeff, const __global numtyp4 *restrict coeff_rep,
const __global numtyp4 *restrict sp_nonpolar, const __global numtyp4 *restrict sp_nonpolar,
const __global int *dev_nbor, const __global int *dev_nbor,
const __global int *dev_packed, const __global int *dev_packed,
@ -480,9 +480,9 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
numtyp qiyz = pol3i.x; // rpole[i][9]; numtyp qiyz = pol3i.x; // rpole[i][9];
numtyp qizz = pol3i.y; // rpole[i][12]; numtyp qizz = pol3i.y; // rpole[i][12];
int itype = pol3i.z; // amtype[i]; int itype = pol3i.z; // amtype[i];
numtyp sizi = coeff[itype].x; // sizpr[itype]; numtyp sizi = coeff_rep[itype].x; // sizpr[itype];
numtyp dmpi = coeff[itype].y; // dmppr[itype]; numtyp dmpi = coeff_rep[itype].y; // dmppr[itype];
numtyp vali = coeff[itype].z; // elepr[itype]; numtyp vali = coeff_rep[itype].z; // elepr[itype];
for ( ; nbor<nbor_end; nbor+=n_stride) { for ( ; nbor<nbor_end; nbor+=n_stride) {
@ -515,9 +515,9 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
numtyp qkzz = pol3j.y; // rpole[j][12]; numtyp qkzz = pol3j.y; // rpole[j][12];
int jtype = pol3j.z; // amtype[j]; int jtype = pol3j.z; // amtype[j];
numtyp sizk = coeff[jtype].x; // sizpr[jtype]; numtyp sizk = coeff_rep[jtype].x; // sizpr[jtype];
numtyp dmpk = coeff[jtype].y; // dmppr[jtype]; numtyp dmpk = coeff_rep[jtype].y; // dmppr[jtype];
numtyp valk = coeff[jtype].z; // elepr[jtype]; numtyp valk = coeff_rep[jtype].z; // elepr[jtype];
const numtyp4 sp_nonpol = sp_nonpolar[sbmask15(jextra)]; const numtyp4 sp_nonpol = sp_nonpolar[sbmask15(jextra)];
numtyp factor_repel = sp_nonpol.y; // factor_repel = special_repel[sbmask15(j)]; numtyp factor_repel = sp_nonpol.y; // factor_repel = special_repel[sbmask15(j)];

View File

@ -47,6 +47,7 @@ class Hippo : public BaseAmoeba<numtyp, acctyp> {
const double *host_special_polar_wscale, const double *host_special_polar_wscale,
const double *host_special_polar_piscale, const double *host_special_polar_piscale,
const double *host_special_polar_pscale, const double *host_special_polar_pscale,
const double *host_sizpr, const double *host_dmppr, const double *host_elepr,
const double *host_csix, const double *host_adisp, const double *host_csix, const double *host_adisp,
const double *host_pcore, const double *host_palpha, const double *host_pcore, const double *host_palpha,
const int nlocal, const int nall, const int max_nbors, const int nlocal, const int nall, const int max_nbors,
@ -56,18 +57,20 @@ class Hippo : public BaseAmoeba<numtyp, acctyp> {
/// Compute repulsion with device neighboring /// Compute repulsion with device neighboring
int** compute_repulsion(const int ago, const int inum_full, int** compute_repulsion(const int ago, const int inum_full,
const int nall, double **host_x, const int nall, double **host_x,
int *host_type, int *host_amtype, int *host_type, int *host_amtype,
int *host_amgroup, double **host_rpole, int *host_amgroup, double **host_rpole,
double *sublo, double *subhi, tagint *tag, double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, int **nspecial, tagint **special,
int *nspecial15, tagint **special15, int *nspecial15, tagint **special15,
const bool eflag_in, const bool vflag_in, const bool eflag_in, const bool vflag_in,
const bool eatom, const bool vatom, const bool eatom, const bool vatom,
int &host_start, int **ilist, int **jnum, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, const double cpu_time, bool &success,
const double aewald, const double off2_repulse, const double aewald, const double off2_repulse,
double *host_q, double *boxlo, double *prd, void** tep_ptr); double *host_q, double *boxlo, double *prd,
double cut2, double c0, double c1, double c2,
double c3, double c4, double c5,void** tep_ptr);
/// Compute dispersion real-space with device neighboring /// Compute dispersion real-space with device neighboring
int** compute_dispersion_real(const int ago, const int inum_full, const int nall, int** compute_dispersion_real(const int ago, const int inum_full, const int nall,
@ -157,6 +160,8 @@ class Hippo : public BaseAmoeba<numtyp, acctyp> {
UCL_D_Vec<numtyp4> coeff_amtype; UCL_D_Vec<numtyp4> coeff_amtype;
/// csix = coeff_amclass.x; adisp = coeff_amclass.y; /// csix = coeff_amclass.x; adisp = coeff_amclass.y;
UCL_D_Vec<numtyp4> coeff_amclass; UCL_D_Vec<numtyp4> coeff_amclass;
/// sizpr = coeff_rep.x; dmppr = coeff_rep.y; elepr = coeff_rep.z;
UCL_D_Vec<numtyp4> coeff_rep;
/// Special polar values [0-4]: /// Special polar values [0-4]:
/// sp_polar.x = special_polar_wscale /// sp_polar.x = special_polar_wscale
/// sp_polar.y special_polar_pscale, /// sp_polar.y special_polar_pscale,
@ -175,6 +180,7 @@ class Hippo : public BaseAmoeba<numtyp, acctyp> {
/// Number of atom types /// Number of atom types
int _lj_types; int _lj_types;
numtyp _cut2,_c0,_c1,_c2,_c3,_c4,_c5;
numtyp _polar_dscale, _polar_uscale; numtyp _polar_dscale, _polar_uscale;
numtyp _qqrd2e; numtyp _qqrd2e;

View File

@ -37,6 +37,7 @@ int hippo_gpu_init(const int ntypes, const int max_amtype, const int max_amclass
const double *host_special_polar_wscale, const double *host_special_polar_wscale,
const double *host_special_polar_piscale, const double *host_special_polar_piscale,
const double *host_special_polar_pscale, const double *host_special_polar_pscale,
const double *host_sizpr, const double *host_dmppr, const double *host_elepr,
const double *host_csix, const double *host_adisp, const double *host_csix, const double *host_adisp,
const double *host_pcore, const double *host_palpha, const double *host_pcore, const double *host_palpha,
const int nlocal, const int nall, const int max_nbors, const int nlocal, const int nall, const int max_nbors,
@ -74,6 +75,7 @@ int hippo_gpu_init(const int ntypes, const int max_amtype, const int max_amclass
host_special_repel, host_special_disp, host_special_repel, host_special_disp,
host_special_mpole, host_special_polar_wscale, host_special_mpole, host_special_polar_wscale,
host_special_polar_piscale, host_special_polar_pscale, host_special_polar_piscale, host_special_polar_pscale,
host_sizpr, host_dmppr, host_elepr,
host_csix, host_adisp, host_pcore, host_palpha, host_csix, host_adisp, host_pcore, host_palpha,
nlocal, nall, max_nbors, nlocal, nall, max_nbors,
maxspecial, maxspecial15, cell_size, gpu_split, maxspecial, maxspecial15, cell_size, gpu_split,
@ -99,6 +101,7 @@ int hippo_gpu_init(const int ntypes, const int max_amtype, const int max_amclass
host_special_repel, host_special_disp, host_special_repel, host_special_disp,
host_special_mpole, host_special_polar_wscale, host_special_mpole, host_special_polar_wscale,
host_special_polar_piscale, host_special_polar_pscale, host_special_polar_piscale, host_special_polar_pscale,
host_sizpr, host_dmppr, host_elepr,
host_csix, host_adisp, host_pcore, host_palpha, host_csix, host_adisp, host_pcore, host_palpha,
nlocal, nall, max_nbors, nlocal, nall, max_nbors,
maxspecial, maxspecial15, cell_size, gpu_split, maxspecial, maxspecial15, cell_size, gpu_split,
@ -129,12 +132,15 @@ int** hippo_gpu_compute_repulsion(const int ago, const int inum_full,
const bool vatom, int &host_start, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time, int **ilist, int **jnum, const double cpu_time,
bool &success, const double aewald, const double off2, bool &success, const double aewald, const double off2,
double *host_q, double *boxlo, double *prd, void **tep_ptr) { double *host_q, double *boxlo, double *prd,
double cut2, double c0, double c1, double c2,
double c3, double c4, double c5, void **tep_ptr) {
return HIPPOMF.compute_repulsion(ago, inum_full, nall, host_x, host_type, return HIPPOMF.compute_repulsion(ago, inum_full, nall, host_x, host_type,
host_amtype, host_amgroup, host_rpole, sublo, subhi, host_amtype, host_amgroup, host_rpole, sublo, subhi,
tag, nspecial, special, nspecial15, special15, tag, nspecial, special, nspecial15, special15,
eflag, vflag, eatom, vatom, host_start, ilist, jnum, eflag, vflag, eatom, vatom, host_start, ilist, jnum,
cpu_time, success, aewald, off2, host_q, boxlo, prd, tep_ptr); cpu_time, success, aewald, off2, host_q, boxlo, prd,
cut2, c0, c1, c2, c3, c4, c5, tep_ptr);
} }
int** hippo_gpu_compute_dispersion_real(const int ago, const int inum_full, int** hippo_gpu_compute_dispersion_real(const int ago, const int inum_full,

View File

@ -58,6 +58,7 @@ int hippo_gpu_init(const int ntypes, const int max_amtype, const int max_amclass
const double *host_special_polar_wscale, const double *host_special_polar_wscale,
const double *host_special_polar_piscale, const double *host_special_polar_piscale,
const double *host_special_polar_pscale, const double *host_special_polar_pscale,
const double *host_sizpr, const double *host_dmppr, const double *host_elepr,
const double *host_csix, const double *host_adisp, const double *host_csix, const double *host_adisp,
const double *host_pcore, const double *host_palpha, const double *host_pcore, const double *host_palpha,
const int nlocal, const int nall, const int max_nbors, const int nlocal, const int nall, const int max_nbors,
@ -75,7 +76,9 @@ int** hippo_gpu_compute_repulsion(const int ago, const int inum_full,
const bool vatom, int &host_start, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time, int **ilist, int **jnum, const double cpu_time,
bool &success, const double aewald, const double off2, bool &success, const double aewald, const double off2,
double *host_q, double *boxlo, double *prd, void **tep_ptr); double *host_q, double *boxlo, double *prd,
double cut2, double c0, double c1, double c2,
double c3, double c4, double c5, void **tep_ptr);
int** hippo_gpu_compute_dispersion_real(const int ago, const int inum_full, int** hippo_gpu_compute_dispersion_real(const int ago, const int inum_full,
const int nall, double **host_x, int *host_type, const int nall, double **host_x, int *host_type,
@ -203,7 +206,8 @@ void PairHippoGPU::init_style()
pdamp, thole, dirdamp, amtype2class, special_hal, pdamp, thole, dirdamp, amtype2class, special_hal,
special_repel, special_disp, special_mpole, special_repel, special_disp, special_mpole,
special_polar_wscale, special_polar_piscale, special_polar_wscale, special_polar_piscale,
special_polar_pscale, csix, adisp, pcore, palpha, special_polar_pscale, sizpr, dmppr, elepr,
csix, adisp, pcore, palpha,
atom->nlocal, atom->nlocal+atom->nghost, mnf, atom->nlocal, atom->nlocal+atom->nghost, mnf,
maxspecial, maxspecial15, cell_size, gpu_mode, maxspecial, maxspecial15, cell_size, gpu_mode,
screen, polar_dscale, polar_uscale, tq_size); screen, polar_dscale, polar_uscale, tq_size);
@ -261,7 +265,8 @@ void PairHippoGPU::repulsion()
eflag, vflag, eflag_atom, vflag_atom, eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time, host_start, &ilist, &numneigh, cpu_time,
success, aewald, off2, atom->q, success, aewald, off2, atom->q,
domain->boxlo, domain->prd, &tq_pinned); domain->boxlo, domain->prd, cut2,
c0, c1, c2, c3, c4, c5, &tq_pinned);
if (!success) if (!success)
error->one(FLERR,"Insufficient memory on accelerator"); error->one(FLERR,"Insufficient memory on accelerator");