Fixed bugs in HippoT::compute_dispersion_real and compute_multipole_real to ensure that answers only get copied back from device in the last kernel activated.
This commit is contained in:
@ -72,7 +72,6 @@ int HippoT::init(const int ntypes, const int max_amtype, const int max_amclass,
|
|||||||
|
|
||||||
// specific to HIPPO
|
// specific to HIPPO
|
||||||
k_dispersion.set_function(*(this->pair_program),"k_hippo_dispersion");
|
k_dispersion.set_function(*(this->pair_program),"k_hippo_dispersion");
|
||||||
_pval.alloc(this->_max_tep_size,*(this->ucl_device),UCL_READ_ONLY,UCL_READ_ONLY);
|
|
||||||
|
|
||||||
// If atom type constants fit in shared memory use fast kernel
|
// If atom type constants fit in shared memory use fast kernel
|
||||||
int lj_types=ntypes;
|
int lj_types=ntypes;
|
||||||
@ -312,8 +311,8 @@ int** HippoT::compute_dispersion_real(const int ago, const int inum_full,
|
|||||||
// only copy them back if this is the last kernel
|
// only copy them back if this is the last kernel
|
||||||
// otherwise, commenting out these two lines to leave the answers
|
// otherwise, commenting out these two lines to leave the answers
|
||||||
// (forces, energies and virial) on the device until the last kernel
|
// (forces, energies and virial) on the device until the last kernel
|
||||||
this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
|
//this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
|
||||||
this->device->add_ans_object(this->ans);
|
//this->device->add_ans_object(this->ans);
|
||||||
|
|
||||||
this->hd_balancer.stop_timer();
|
this->hd_balancer.stop_timer();
|
||||||
|
|
||||||
@ -430,9 +429,9 @@ int** HippoT::compute_multipole_real(const int ago, const int inum_full,
|
|||||||
const int red_blocks=multipole_real(eflag,vflag);
|
const int red_blocks=multipole_real(eflag,vflag);
|
||||||
|
|
||||||
// leave the answers (forces, energies and virial) on the device,
|
// leave the answers (forces, energies and virial) on the device,
|
||||||
// only copy them back in the last kernel (polar_real)
|
// only copy them back in the last kernel (this one, or polar_real once done)
|
||||||
//ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
|
this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
|
||||||
//device->add_ans_object(ans);
|
this->device->add_ans_object(this->ans);
|
||||||
|
|
||||||
this->hd_balancer.stop_timer();
|
this->hd_balancer.stop_timer();
|
||||||
|
|
||||||
|
|||||||
@ -1032,9 +1032,6 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
|
|||||||
numtyp alphak = coeff_amclass[jtype].w; // palpha[jclass];
|
numtyp alphak = coeff_amclass[jtype].w; // palpha[jclass];
|
||||||
numtyp valk = polar6[j].x;
|
numtyp valk = polar6[j].x;
|
||||||
|
|
||||||
if (i==0 && j < 10) printf("j = %d: corei = %f; corek = %f; alphai = %f; alphak = %f; vali = %f; valk = %f\n",
|
|
||||||
j, corei, corek, alphai, alphak, vali, valk);
|
|
||||||
|
|
||||||
// intermediates involving moments and separation distance
|
// intermediates involving moments and separation distance
|
||||||
|
|
||||||
numtyp dir = dix*xr + diy*yr + diz*zr;
|
numtyp dir = dix*xr + diy*yr + diz*zr;
|
||||||
|
|||||||
@ -130,8 +130,6 @@ class Hippo : public BaseAmoeba<numtyp, acctyp> {
|
|||||||
|
|
||||||
UCL_Kernel k_dispersion;
|
UCL_Kernel k_dispersion;
|
||||||
|
|
||||||
UCL_Vector<acctyp,acctyp> _pval;
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
bool _allocated;
|
bool _allocated;
|
||||||
int dispersion_real(const int eflag, const int vflag);
|
int dispersion_real(const int eflag, const int vflag);
|
||||||
|
|||||||
@ -379,8 +379,7 @@ void PairAmoeba::multipole_real()
|
|||||||
corek = pcore[jclass];
|
corek = pcore[jclass];
|
||||||
alphak = palpha[jclass];
|
alphak = palpha[jclass];
|
||||||
valk = pval[j];
|
valk = pval[j];
|
||||||
if (i==0 && j < 10) printf("j = %d: corei = %f; corek = %f; alphai = %f; alphak = %f; vali = %f; valk = %f\n",
|
|
||||||
j, corei, corek, alphai, alphak, vali, valk);
|
|
||||||
/*
|
/*
|
||||||
printf("HIPPO MPOLE ij %d %d: pcore/alpha/val I %g %g %g: J %g %g %g\n",
|
printf("HIPPO MPOLE ij %d %d: pcore/alpha/val I %g %g %g: J %g %g %g\n",
|
||||||
atom->tag[i],atom->tag[j],corei,alphai,vali,corek,alphak,valk);
|
atom->tag[i],atom->tag[j],corei,alphai,vali,corek,alphak,valk);
|
||||||
@ -421,6 +420,8 @@ void PairAmoeba::multipole_real()
|
|||||||
term2i*rr3i + term2k*rr3k + term2ik*rr3ik +
|
term2i*rr3i + term2k*rr3k + term2ik*rr3ik +
|
||||||
term3i*rr5i + term3k*rr5k + term3ik*rr5ik;
|
term3i*rr5i + term3k*rr5k + term3ik*rr5ik;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// find damped multipole intermediates for force and torque
|
// find damped multipole intermediates for force and torque
|
||||||
|
|
||||||
de = term1*rr3 + term4ik*rr9ik + term5ik*rr11ik +
|
de = term1*rr3 + term4ik*rr9ik + term5ik*rr11ik +
|
||||||
@ -527,14 +528,14 @@ void PairAmoeba::multipole_real()
|
|||||||
|
|
||||||
// increment force-based gradient and torque on second site
|
// increment force-based gradient and torque on second site
|
||||||
// commenting out j parts for DEBUGGING
|
// commenting out j parts for DEBUGGING
|
||||||
|
|
||||||
fmpole[j][0] -= frcx;
|
fmpole[j][0] -= frcx;
|
||||||
fmpole[j][1] -= frcy;
|
fmpole[j][1] -= frcy;
|
||||||
fmpole[j][2] -= frcz;
|
fmpole[j][2] -= frcz;
|
||||||
tq[j][0] += ttmk[0];
|
tq[j][0] += ttmk[0];
|
||||||
tq[j][1] += ttmk[1];
|
tq[j][1] += ttmk[1];
|
||||||
tq[j][2] += ttmk[2];
|
tq[j][2] += ttmk[2];
|
||||||
|
|
||||||
// increment the virial due to pairwise Cartesian forces
|
// increment the virial due to pairwise Cartesian forces
|
||||||
|
|
||||||
vxx = -xr * frcx;
|
vxx = -xr * frcx;
|
||||||
|
|||||||
@ -292,7 +292,7 @@ void PairHippoGPU::multipole_real()
|
|||||||
// set the energy unit conversion factor for multipolar real-space calculation
|
// set the energy unit conversion factor for multipolar real-space calculation
|
||||||
|
|
||||||
double felec = electric / am_dielectric;
|
double felec = electric / am_dielectric;
|
||||||
printf("hippo gpu multipole\n");
|
|
||||||
firstneigh = hippo_gpu_compute_multipole_real(neighbor->ago, inum, nall, atom->x,
|
firstneigh = hippo_gpu_compute_multipole_real(neighbor->ago, inum, nall, atom->x,
|
||||||
atom->type, amtype, amgroup, rpole, pval,
|
atom->type, amtype, amgroup, rpole, pval,
|
||||||
sublo, subhi, atom->tag,
|
sublo, subhi, atom->tag,
|
||||||
|
|||||||
Reference in New Issue
Block a user