Fixed bugs in HippoT::compute_dispersion_real and compute_multipole_real to ensure that answers only get copied back from device in the last kernel activated.

This commit is contained in:
Trung Nguyen
2021-09-26 00:13:40 -05:00
parent f8bc091cb8
commit edbed9c9c9
5 changed files with 11 additions and 16 deletions

View File

@ -72,7 +72,6 @@ int HippoT::init(const int ntypes, const int max_amtype, const int max_amclass,
// specific to HIPPO // specific to HIPPO
k_dispersion.set_function(*(this->pair_program),"k_hippo_dispersion"); k_dispersion.set_function(*(this->pair_program),"k_hippo_dispersion");
_pval.alloc(this->_max_tep_size,*(this->ucl_device),UCL_READ_ONLY,UCL_READ_ONLY);
// If atom type constants fit in shared memory use fast kernel // If atom type constants fit in shared memory use fast kernel
int lj_types=ntypes; int lj_types=ntypes;
@ -312,8 +311,8 @@ int** HippoT::compute_dispersion_real(const int ago, const int inum_full,
// only copy them back if this is the last kernel // only copy them back if this is the last kernel
// otherwise, commenting out these two lines to leave the answers // otherwise, commenting out these two lines to leave the answers
// (forces, energies and virial) on the device until the last kernel // (forces, energies and virial) on the device until the last kernel
this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks); //this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
this->device->add_ans_object(this->ans); //this->device->add_ans_object(this->ans);
this->hd_balancer.stop_timer(); this->hd_balancer.stop_timer();
@ -430,9 +429,9 @@ int** HippoT::compute_multipole_real(const int ago, const int inum_full,
const int red_blocks=multipole_real(eflag,vflag); const int red_blocks=multipole_real(eflag,vflag);
// leave the answers (forces, energies and virial) on the device, // leave the answers (forces, energies and virial) on the device,
// only copy them back in the last kernel (polar_real) // only copy them back in the last kernel (this one, or polar_real once done)
//ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks); this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
//device->add_ans_object(ans); this->device->add_ans_object(this->ans);
this->hd_balancer.stop_timer(); this->hd_balancer.stop_timer();

View File

@ -1032,9 +1032,6 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
numtyp alphak = coeff_amclass[jtype].w; // palpha[jclass]; numtyp alphak = coeff_amclass[jtype].w; // palpha[jclass];
numtyp valk = polar6[j].x; numtyp valk = polar6[j].x;
if (i==0 && j < 10) printf("j = %d: corei = %f; corek = %f; alphai = %f; alphak = %f; vali = %f; valk = %f\n",
j, corei, corek, alphai, alphak, vali, valk);
// intermediates involving moments and separation distance // intermediates involving moments and separation distance
numtyp dir = dix*xr + diy*yr + diz*zr; numtyp dir = dix*xr + diy*yr + diz*zr;

View File

@ -130,8 +130,6 @@ class Hippo : public BaseAmoeba<numtyp, acctyp> {
UCL_Kernel k_dispersion; UCL_Kernel k_dispersion;
UCL_Vector<acctyp,acctyp> _pval;
protected: protected:
bool _allocated; bool _allocated;
int dispersion_real(const int eflag, const int vflag); int dispersion_real(const int eflag, const int vflag);

View File

@ -379,8 +379,7 @@ void PairAmoeba::multipole_real()
corek = pcore[jclass]; corek = pcore[jclass];
alphak = palpha[jclass]; alphak = palpha[jclass];
valk = pval[j]; valk = pval[j];
if (i==0 && j < 10) printf("j = %d: corei = %f; corek = %f; alphai = %f; alphak = %f; vali = %f; valk = %f\n",
j, corei, corek, alphai, alphak, vali, valk);
/* /*
printf("HIPPO MPOLE ij %d %d: pcore/alpha/val I %g %g %g: J %g %g %g\n", printf("HIPPO MPOLE ij %d %d: pcore/alpha/val I %g %g %g: J %g %g %g\n",
atom->tag[i],atom->tag[j],corei,alphai,vali,corek,alphak,valk); atom->tag[i],atom->tag[j],corei,alphai,vali,corek,alphak,valk);
@ -421,6 +420,8 @@ void PairAmoeba::multipole_real()
term2i*rr3i + term2k*rr3k + term2ik*rr3ik + term2i*rr3i + term2k*rr3k + term2ik*rr3ik +
term3i*rr5i + term3k*rr5k + term3ik*rr5ik; term3i*rr5i + term3k*rr5k + term3ik*rr5ik;
// find damped multipole intermediates for force and torque // find damped multipole intermediates for force and torque
de = term1*rr3 + term4ik*rr9ik + term5ik*rr11ik + de = term1*rr3 + term4ik*rr9ik + term5ik*rr11ik +
@ -527,14 +528,14 @@ void PairAmoeba::multipole_real()
// increment force-based gradient and torque on second site // increment force-based gradient and torque on second site
// commenting out j parts for DEBUGGING // commenting out j parts for DEBUGGING
fmpole[j][0] -= frcx; fmpole[j][0] -= frcx;
fmpole[j][1] -= frcy; fmpole[j][1] -= frcy;
fmpole[j][2] -= frcz; fmpole[j][2] -= frcz;
tq[j][0] += ttmk[0]; tq[j][0] += ttmk[0];
tq[j][1] += ttmk[1]; tq[j][1] += ttmk[1];
tq[j][2] += ttmk[2]; tq[j][2] += ttmk[2];
// increment the virial due to pairwise Cartesian forces // increment the virial due to pairwise Cartesian forces
vxx = -xr * frcx; vxx = -xr * frcx;

View File

@ -292,7 +292,7 @@ void PairHippoGPU::multipole_real()
// set the energy unit conversion factor for multipolar real-space calculation // set the energy unit conversion factor for multipolar real-space calculation
double felec = electric / am_dielectric; double felec = electric / am_dielectric;
printf("hippo gpu multipole\n");
firstneigh = hippo_gpu_compute_multipole_real(neighbor->ago, inum, nall, atom->x, firstneigh = hippo_gpu_compute_multipole_real(neighbor->ago, inum, nall, atom->x,
atom->type, amtype, amgroup, rpole, pval, atom->type, amtype, amgroup, rpole, pval,
sublo, subhi, atom->tag, sublo, subhi, atom->tag,