Fixed bugs in HippoT::compute_dispersion_real and compute_multipole_real to ensure that answers only get copied back from device in the last kernel activated.

This commit is contained in:
Trung Nguyen
2021-09-26 00:13:40 -05:00
parent f8bc091cb8
commit edbed9c9c9
5 changed files with 11 additions and 16 deletions

View File

@ -72,7 +72,6 @@ int HippoT::init(const int ntypes, const int max_amtype, const int max_amclass,
// specific to HIPPO
k_dispersion.set_function(*(this->pair_program),"k_hippo_dispersion");
_pval.alloc(this->_max_tep_size,*(this->ucl_device),UCL_READ_ONLY,UCL_READ_ONLY);
// If atom type constants fit in shared memory use fast kernel
int lj_types=ntypes;
@ -312,8 +311,8 @@ int** HippoT::compute_dispersion_real(const int ago, const int inum_full,
// only copy them back if this is the last kernel
// otherwise, commenting out these two lines to leave the answers
// (forces, energies and virial) on the device until the last kernel
this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
this->device->add_ans_object(this->ans);
//this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
//this->device->add_ans_object(this->ans);
this->hd_balancer.stop_timer();
@ -430,9 +429,9 @@ int** HippoT::compute_multipole_real(const int ago, const int inum_full,
const int red_blocks=multipole_real(eflag,vflag);
// leave the answers (forces, energies and virial) on the device,
// only copy them back in the last kernel (polar_real)
//ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
//device->add_ans_object(ans);
// only copy them back in the last kernel (this one, or polar_real once done)
this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
this->device->add_ans_object(this->ans);
this->hd_balancer.stop_timer();

View File

@ -1032,9 +1032,6 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
numtyp alphak = coeff_amclass[jtype].w; // palpha[jclass];
numtyp valk = polar6[j].x;
if (i==0 && j < 10) printf("j = %d: corei = %f; corek = %f; alphai = %f; alphak = %f; vali = %f; valk = %f\n",
j, corei, corek, alphai, alphak, vali, valk);
// intermediates involving moments and separation distance
numtyp dir = dix*xr + diy*yr + diz*zr;

View File

@ -130,8 +130,6 @@ class Hippo : public BaseAmoeba<numtyp, acctyp> {
UCL_Kernel k_dispersion;
UCL_Vector<acctyp,acctyp> _pval;
protected:
bool _allocated;
int dispersion_real(const int eflag, const int vflag);

View File

@ -379,8 +379,7 @@ void PairAmoeba::multipole_real()
corek = pcore[jclass];
alphak = palpha[jclass];
valk = pval[j];
if (i==0 && j < 10) printf("j = %d: corei = %f; corek = %f; alphai = %f; alphak = %f; vali = %f; valk = %f\n",
j, corei, corek, alphai, alphak, vali, valk);
/*
printf("HIPPO MPOLE ij %d %d: pcore/alpha/val I %g %g %g: J %g %g %g\n",
atom->tag[i],atom->tag[j],corei,alphai,vali,corek,alphak,valk);
@ -421,6 +420,8 @@ void PairAmoeba::multipole_real()
term2i*rr3i + term2k*rr3k + term2ik*rr3ik +
term3i*rr5i + term3k*rr5k + term3ik*rr5ik;
// find damped multipole intermediates for force and torque
de = term1*rr3 + term4ik*rr9ik + term5ik*rr11ik +

View File

@ -292,7 +292,7 @@ void PairHippoGPU::multipole_real()
// set the energy unit conversion factor for multipolar real-space calculation
double felec = electric / am_dielectric;
printf("hippo gpu multipole\n");
firstneigh = hippo_gpu_compute_multipole_real(neighbor->ago, inum, nall, atom->x,
atom->type, amtype, amgroup, rpole, pval,
sublo, subhi, atom->tag,