Fixed bugs in HippoT::compute_dispersion_real and compute_multipole_real to ensure that answers only get copied back from device in the last kernel activated.
This commit is contained in:
@ -72,7 +72,6 @@ int HippoT::init(const int ntypes, const int max_amtype, const int max_amclass,
|
||||
|
||||
// specific to HIPPO
|
||||
k_dispersion.set_function(*(this->pair_program),"k_hippo_dispersion");
|
||||
_pval.alloc(this->_max_tep_size,*(this->ucl_device),UCL_READ_ONLY,UCL_READ_ONLY);
|
||||
|
||||
// If atom type constants fit in shared memory use fast kernel
|
||||
int lj_types=ntypes;
|
||||
@ -312,8 +311,8 @@ int** HippoT::compute_dispersion_real(const int ago, const int inum_full,
|
||||
// only copy them back if this is the last kernel
|
||||
// otherwise, commenting out these two lines to leave the answers
|
||||
// (forces, energies and virial) on the device until the last kernel
|
||||
this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
|
||||
this->device->add_ans_object(this->ans);
|
||||
//this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
|
||||
//this->device->add_ans_object(this->ans);
|
||||
|
||||
this->hd_balancer.stop_timer();
|
||||
|
||||
@ -430,9 +429,9 @@ int** HippoT::compute_multipole_real(const int ago, const int inum_full,
|
||||
const int red_blocks=multipole_real(eflag,vflag);
|
||||
|
||||
// leave the answers (forces, energies and virial) on the device,
|
||||
// only copy them back in the last kernel (polar_real)
|
||||
//ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
|
||||
//device->add_ans_object(ans);
|
||||
// only copy them back in the last kernel (this one, or polar_real once done)
|
||||
this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
|
||||
this->device->add_ans_object(this->ans);
|
||||
|
||||
this->hd_balancer.stop_timer();
|
||||
|
||||
|
||||
@ -1032,9 +1032,6 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
|
||||
numtyp alphak = coeff_amclass[jtype].w; // palpha[jclass];
|
||||
numtyp valk = polar6[j].x;
|
||||
|
||||
if (i==0 && j < 10) printf("j = %d: corei = %f; corek = %f; alphai = %f; alphak = %f; vali = %f; valk = %f\n",
|
||||
j, corei, corek, alphai, alphak, vali, valk);
|
||||
|
||||
// intermediates involving moments and separation distance
|
||||
|
||||
numtyp dir = dix*xr + diy*yr + diz*zr;
|
||||
|
||||
@ -130,8 +130,6 @@ class Hippo : public BaseAmoeba<numtyp, acctyp> {
|
||||
|
||||
UCL_Kernel k_dispersion;
|
||||
|
||||
UCL_Vector<acctyp,acctyp> _pval;
|
||||
|
||||
protected:
|
||||
bool _allocated;
|
||||
int dispersion_real(const int eflag, const int vflag);
|
||||
|
||||
Reference in New Issue
Block a user