Fixed bugs in HippoT::compute_dispersion_real and compute_multipole_real to ensure that answers only get copied back from device in the last kernel activated.

2021-09-26 00:13:40 -05:00
parent f8bc091cb8
commit edbed9c9c9
5 changed files with 11 additions and 16 deletions
--- a/lib/gpu/lal_hippo.cpp
+++ b/lib/gpu/lal_hippo.cpp
@ -72,7 +72,6 @@ int HippoT::init(const int ntypes, const int max_amtype, const int max_amclass,
  // specific to HIPPO
  k_dispersion.set_function(*(this->pair_program),"k_hippo_dispersion");
  _pval.alloc(this->_max_tep_size,*(this->ucl_device),UCL_READ_ONLY,UCL_READ_ONLY);
  // If atom type constants fit in shared memory use fast kernel
  int lj_types=ntypes;
@ -312,8 +311,8 @@ int** HippoT::compute_dispersion_real(const int ago, const int inum_full,
  // only copy them back if this is the last kernel
  //   otherwise, commenting out these two lines to leave the answers
  //   (forces, energies and virial) on the device until the last kernel
-  this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
+  //this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
-  this->device->add_ans_object(this->ans);
+  //this->device->add_ans_object(this->ans);
  this->hd_balancer.stop_timer();
@ -430,9 +429,9 @@ int** HippoT::compute_multipole_real(const int ago, const int inum_full,
  const int red_blocks=multipole_real(eflag,vflag);
  // leave the answers (forces, energies and virial) on the device,
-  //   only copy them back in the last kernel (polar_real)
+  //   only copy them back in the last kernel (this one, or polar_real once done)
-  //ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
+  this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
-  //device->add_ans_object(ans);
+  this->device->add_ans_object(this->ans);
  this->hd_balancer.stop_timer();
--- a/lib/gpu/lal_hippo.cu
+++ b/lib/gpu/lal_hippo.cu
@ -1032,9 +1032,6 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
      numtyp alphak = coeff_amclass[jtype].w; // palpha[jclass];
      numtyp valk = polar6[j].x;
      if (i==0 && j < 10) printf("j = %d: corei = %f; corek = %f; alphai = %f; alphak = %f; vali = %f; valk = %f\n",
        j, corei, corek, alphai, alphak, vali, valk);
      // intermediates involving moments and separation distance
      numtyp dir = dix*xr + diy*yr + diz*zr;
--- a/lib/gpu/lal_hippo.h
+++ b/lib/gpu/lal_hippo.h
@ -130,8 +130,6 @@ class Hippo : public BaseAmoeba<numtyp, acctyp> {
  UCL_Kernel k_dispersion;
  UCL_Vector<acctyp,acctyp> _pval;
 protected:
  bool _allocated;
  int dispersion_real(const int eflag, const int vflag);
--- a/src/AMOEBA/amoeba_multipole.cpp
+++ b/src/AMOEBA/amoeba_multipole.cpp
@ -379,8 +379,7 @@ void PairAmoeba::multipole_real()
        corek = pcore[jclass];
        alphak = palpha[jclass];
        valk = pval[j];
-        if (i==0 && j < 10) printf("j = %d: corei = %f; corek = %f; alphai = %f; alphak = %f; vali = %f; valk = %f\n",
+
          j, corei, corek, alphai, alphak, vali, valk);
 	/*
 	printf("HIPPO MPOLE ij %d %d: pcore/alpha/val I %g %g %g: J %g %g %g\n",
 	       atom->tag[i],atom->tag[j],corei,alphai,vali,corek,alphak,valk);
@ -421,6 +420,8 @@ void PairAmoeba::multipole_real()
          term2i*rr3i + term2k*rr3k + term2ik*rr3ik + 
          term3i*rr5i + term3k*rr5k + term3ik*rr5ik;
        // find damped multipole intermediates for force and torque
        de = term1*rr3 + term4ik*rr9ik + term5ik*rr11ik + 
@ -527,14 +528,14 @@ void PairAmoeba::multipole_real()
      // increment force-based gradient and torque on second site
      // commenting out j parts for DEBUGGING
-      
+
      fmpole[j][0] -= frcx;
      fmpole[j][1] -= frcy;
      fmpole[j][2] -= frcz;
      tq[j][0] += ttmk[0];
      tq[j][1] += ttmk[1];
      tq[j][2] += ttmk[2];
-      
+
      // increment the virial due to pairwise Cartesian forces
      vxx = -xr * frcx;
--- a/src/GPU/pair_hippo_gpu.cpp
+++ b/src/GPU/pair_hippo_gpu.cpp
@ -292,7 +292,7 @@ void PairHippoGPU::multipole_real()
  // set the energy unit conversion factor for multipolar real-space calculation
  double felec = electric / am_dielectric;
-  printf("hippo gpu multipole\n");
+
  firstneigh = hippo_gpu_compute_multipole_real(neighbor->ago, inum, nall, atom->x,
                                                atom->type, amtype, amgroup, rpole, pval,
                                                sublo, subhi, atom->tag,