Fixed bugs in HippoT::compute_dispersion_real and compute_multipole_real to ensure that answers only get copied back from device in the last kernel activated.

2021-09-26 00:13:40 -05:00
parent f8bc091cb8
commit edbed9c9c9
5 changed files with 11 additions and 16 deletions
--- a/lib/gpu/lal_hippo.cpp
+++ b/lib/gpu/lal_hippo.cpp
@ -72,7 +72,6 @@ int HippoT::init(const int ntypes, const int max_amtype, const int max_amclass,

  // specific to HIPPO
  k_dispersion.set_function(*(this->pair_program),"k_hippo_dispersion");
-  _pval.alloc(this->_max_tep_size,*(this->ucl_device),UCL_READ_ONLY,UCL_READ_ONLY);

  // If atom type constants fit in shared memory use fast kernel
  int lj_types=ntypes;
@ -312,8 +311,8 @@ int** HippoT::compute_dispersion_real(const int ago, const int inum_full,
  // only copy them back if this is the last kernel
  //   otherwise, commenting out these two lines to leave the answers
  //   (forces, energies and virial) on the device until the last kernel
-  this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
-  this->device->add_ans_object(this->ans);
+  //this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
+  //this->device->add_ans_object(this->ans);

  this->hd_balancer.stop_timer();

@ -430,9 +429,9 @@ int** HippoT::compute_multipole_real(const int ago, const int inum_full,
  const int red_blocks=multipole_real(eflag,vflag);

  // leave the answers (forces, energies and virial) on the device,
-  //   only copy them back in the last kernel (polar_real)
-  //ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
-  //device->add_ans_object(ans);
+  //   only copy them back in the last kernel (this one, or polar_real once done)
+  this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
+  this->device->add_ans_object(this->ans);

  this->hd_balancer.stop_timer();

--- a/lib/gpu/lal_hippo.cu
+++ b/lib/gpu/lal_hippo.cu
@ -1032,9 +1032,6 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
      numtyp alphak = coeff_amclass[jtype].w; // palpha[jclass];
      numtyp valk = polar6[j].x;

-      if (i==0 && j < 10) printf("j = %d: corei = %f; corek = %f; alphai = %f; alphak = %f; vali = %f; valk = %f\n",
-        j, corei, corek, alphai, alphak, vali, valk);
-
      // intermediates involving moments and separation distance

      numtyp dir = dix*xr + diy*yr + diz*zr;
--- a/lib/gpu/lal_hippo.h
+++ b/lib/gpu/lal_hippo.h
@ -130,8 +130,6 @@ class Hippo : public BaseAmoeba<numtyp, acctyp> {

  UCL_Kernel k_dispersion;

-  UCL_Vector<acctyp,acctyp> _pval;
-
 protected:
  bool _allocated;
  int dispersion_real(const int eflag, const int vflag);
--- a/src/AMOEBA/amoeba_multipole.cpp
+++ b/src/AMOEBA/amoeba_multipole.cpp
@ -379,8 +379,7 @@ void PairAmoeba::multipole_real()
        corek = pcore[jclass];
        alphak = palpha[jclass];
        valk = pval[j];
-        if (i==0 && j < 10) printf("j = %d: corei = %f; corek = %f; alphai = %f; alphak = %f; vali = %f; valk = %f\n",
-          j, corei, corek, alphai, alphak, vali, valk);
+
 	/*
 	printf("HIPPO MPOLE ij %d %d: pcore/alpha/val I %g %g %g: J %g %g %g\n",
 	       atom->tag[i],atom->tag[j],corei,alphai,vali,corek,alphak,valk);
@ -421,6 +420,8 @@ void PairAmoeba::multipole_real()
          term2i*rr3i + term2k*rr3k + term2ik*rr3ik + 
          term3i*rr5i + term3k*rr5k + term3ik*rr5ik;

+        
+
        // find damped multipole intermediates for force and torque

        de = term1*rr3 + term4ik*rr9ik + term5ik*rr11ik + 
--- a/src/GPU/pair_hippo_gpu.cpp
+++ b/src/GPU/pair_hippo_gpu.cpp
@ -292,7 +292,7 @@ void PairHippoGPU::multipole_real()
  // set the energy unit conversion factor for multipolar real-space calculation

  double felec = electric / am_dielectric;
-  printf("hippo gpu multipole\n");
+
  firstneigh = hippo_gpu_compute_multipole_real(neighbor->ago, inum, nall, atom->x,
                                                atom->type, amtype, amgroup, rpole, pval,
                                                sublo, subhi, atom->tag,