Fixed bugs in the polar real kernel in hippo, getting closer..

2021-09-26 09:11:09 -05:00
parent 5193dcf8c5
commit 7437c98628
5 changed files with 169 additions and 117 deletions
--- a/lib/gpu/lal_hippo.cpp
+++ b/lib/gpu/lal_hippo.cpp
@ -430,8 +430,8 @@ int** HippoT::compute_multipole_real(const int ago, const int inum_full,

  // leave the answers (forces, energies and virial) on the device,
  //   only copy them back in the last kernel (this one, or polar_real once done)
-  this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
-  this->device->add_ans_object(this->ans);
+  //this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
+  //this->device->add_ans_object(this->ans);

  this->hd_balancer.stop_timer();

@ -568,6 +568,94 @@ int HippoT::umutual2b(const int eflag, const int vflag) {
  return GX;
 }

+// ---------------------------------------------------------------------------
+// Reneighbor on GPU if necessary, and then compute polar real-space
+// ---------------------------------------------------------------------------
+template <class numtyp, class acctyp>
+int** HippoT::compute_polar_real(const int ago, const int inum_full,
+                                 const int nall, double **host_x,
+                                 int *host_type, int *host_amtype,
+                                 int *host_amgroup, double **host_rpole,
+                                 double **host_uind, double **host_uinp,
+                                 double *host_pval, double *sublo, double *subhi,
+                                 tagint *tag, int **nspecial, tagint **special,
+                                 int *nspecial15, tagint **special15,
+                                 const bool eflag_in, const bool vflag_in,
+                                 const bool eatom, const bool vatom,
+                                 int &host_start, int **ilist, int **jnum,
+                                 const double cpu_time, bool &success,
+                                 const double aewald, const double felec,
+                                 const double off2_polar, double *host_q,
+                                 double *boxlo, double *prd, void **tep_ptr) {
+  this->acc_timers();
+  int eflag, vflag;
+  if (eatom) eflag=2;
+  else if (eflag_in) eflag=1;
+  else eflag=0;
+  if (vatom) vflag=2;
+  else if (vflag_in) vflag=1;
+  else vflag=0;
+
+  #ifdef LAL_NO_BLOCK_REDUCE
+  if (eflag) eflag=2;
+  if (vflag) vflag=2;
+  #endif
+
+  this->set_kernel(eflag,vflag);
+
+  // reallocate per-atom arrays, transfer data from the host
+  //   and build the neighbor lists if needed
+  // NOTE: 
+  //   For now we invoke precompute() again here,
+  //     to be able to turn on/off the udirect2b kernel (which comes before this)
+  //   Once all the kernels are ready, precompute() is needed only once
+  //     in the first kernel in a time step.
+  //   We only need to cast uind and uinp from host to device here
+  //     if the neighbor lists are rebuilt and other per-atom arrays
+  //     (x, type, amtype, amgroup, rpole) are ready on the device.
+
+  int** firstneigh = nullptr;
+  firstneigh = precompute(ago, inum_full, nall, host_x, host_type,
+                          host_amtype, host_amgroup, host_rpole,
+                          host_uind, host_uinp, host_pval, sublo, subhi, tag,
+                          nspecial, special, nspecial15, special15,
+                          eflag_in, vflag_in, eatom, vatom,
+                          host_start, ilist, jnum, cpu_time,
+                          success, host_q, boxlo, prd);
+
+  // ------------------- Resize _tep array ------------------------
+
+  if (inum_full>this->_max_tep_size) {
+    this->_max_tep_size=static_cast<int>(static_cast<double>(inum_full)*1.10);
+    this->_tep.resize(this->_max_tep_size*4);
+  }
+  *tep_ptr=this->_tep.host.begin();
+
+  this->_off2_polar = off2_polar;
+  this->_felec = felec;
+  this->_aewald = aewald;
+  const int red_blocks=polar_real(eflag,vflag);
+
+  // only copy answers (forces, energies and virial) back from the device
+  //   in the last kernel (which is polar_real here)
+  this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
+  this->device->add_ans_object(this->ans);
+
+  this->hd_balancer.stop_timer();
+
+  // copy tep from device to host
+
+  this->_tep.update_host(this->_max_tep_size*4,false);
+/*
+  printf("GPU lib: tep size = %d: max tep size = %d\n", this->_tep.cols(), _max_tep_size);
+  for (int i = 0; i < 10; i++) {
+    numtyp4* p = (numtyp4*)(&this->_tep[4*i]);
+    printf("i = %d; tep = %f %f %f\n", i, p->x, p->y, p->z);
+  }
+*/  
+  return firstneigh; // nbor->host_jlist.begin()-host_start;
+}
+
 // ---------------------------------------------------------------------------
 // Calculate the polar real-space term, returning tep
 // ---------------------------------------------------------------------------
--- a/lib/gpu/lal_hippo.cu
+++ b/lib/gpu/lal_hippo.cu
@ -1873,25 +1873,25 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
      numtyp dmpi[9],dmpk[9];
      numtyp dmpik[9];
      damppole(r,9,alphai,alphak,dmpi,dmpk,dmpik);
-      numtyp rr3core = bn[1] - (1.0-factor_dscale)*rr3;
-      numtyp rr5core = bn[2] - (1.0-factor_dscale)*rr5;
-      numtyp rr3i = bn[1] - (1.0-factor_dscale*dmpi[2])*rr3;
-      numtyp rr5i = bn[2] - (1.0-factor_dscale*dmpi[4])*rr5;
-      numtyp rr7i = bn[3] - (1.0-factor_dscale*dmpi[6])*rr7;
-      numtyp rr9i = bn[4] - (1.0-factor_dscale*dmpi[8])*rr9;
-      numtyp rr3k = bn[1] - (1.0-factor_dscale*dmpk[2])*rr3;
-      numtyp rr5k = bn[2] - (1.0-factor_dscale*dmpk[4])*rr5;
-      numtyp rr7k = bn[3] - (1.0-factor_dscale*dmpk[6])*rr7;
-      numtyp rr9k = bn[4] - (1.0-factor_dscale*dmpk[8])*rr9;
-      numtyp rr5ik = bn[2] - (1.0-factor_wscale*dmpik[4])*rr5;
-      numtyp rr7ik = bn[3] - (1.0-factor_wscale*dmpik[6])*rr7;
+      numtyp rr3core = bn[1] - ((numtyp)1.0-factor_dscale)*rr3;
+      numtyp rr5core = bn[2] - ((numtyp)1.0-factor_dscale)*rr5;
+      numtyp rr3i = bn[1] - ((numtyp)1.0-factor_dscale*dmpi[2])*rr3;
+      numtyp rr5i = bn[2] - ((numtyp)1.0-factor_dscale*dmpi[4])*rr5;
+      numtyp rr7i = bn[3] - ((numtyp)1.0-factor_dscale*dmpi[6])*rr7;
+      numtyp rr9i = bn[4] - ((numtyp)1.0-factor_dscale*dmpi[8])*rr9;
+      numtyp rr3k = bn[1] - ((numtyp)1.0-factor_dscale*dmpk[2])*rr3;
+      numtyp rr5k = bn[2] - ((numtyp)1.0-factor_dscale*dmpk[4])*rr5;
+      numtyp rr7k = bn[3] - ((numtyp)1.0-factor_dscale*dmpk[6])*rr7;
+      numtyp rr9k = bn[4] - ((numtyp)1.0-factor_dscale*dmpk[8])*rr9;
+      numtyp rr5ik = bn[2] - ((numtyp)1.0-factor_wscale*dmpik[4])*rr5;
+      numtyp rr7ik = bn[3] - ((numtyp)1.0-factor_wscale*dmpik[6])*rr7;

      // get the induced dipole field used for dipole torques

-      numtyp tix3 = 2.0*rr3i*ukx;
-      numtyp tiy3 = 2.0*rr3i*uky;
-      numtyp tiz3 = 2.0*rr3i*ukz;
-      numtyp tuir = -2.0*rr5i*ukr;
+      numtyp tix3 = (numtyp)2.0*rr3i*ukx;
+      numtyp tiy3 = (numtyp)2.0*rr3i*uky;
+      numtyp tiz3 = (numtyp)2.0*rr3i*ukz;
+      numtyp tuir = (numtyp)-2.0*rr5i*ukr;

      ufld[0] += tix3 + xr*tuir;
      ufld[1] += tiy3 + yr*tuir;
@ -1899,10 +1899,10 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,

      // get induced dipole field gradient used for quadrupole torques

-      numtyp tix5 = 4.0 * (rr5i*ukx);
-      numtyp tiy5 = 4.0 * (rr5i*uky);
-      numtyp tiz5 = 4.0 * (rr5i*ukz);
-      tuir = -2.0*rr7i*ukr;
+      numtyp tix5 = (numtyp)4.0 * (rr5i*ukx);
+      numtyp tiy5 = (numtyp)4.0 * (rr5i*uky);
+      numtyp tiz5 = (numtyp)4.0 * (rr5i*ukz);
+      tuir = (numtyp)-2.0*rr7i*ukr;

      dufld[0] += xr*tix5 + xr*xr*tuir;
      dufld[1] += xr*tiy5 + yr*tix5 + (numtyp)2.0*xr*yr*tuir;
@ -1911,7 +1911,7 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
      dufld[4] += yr*tiz5 + zr*tiy5 + (numtyp)2.0*yr*zr*tuir;
      dufld[5] += zr*tiz5 + zr*zr*tuir;
      
-      // get the dEd/dR terms used for direct polarization force
+      // get the field gradient for direct polarization force
      
      numtyp term1i,term2i,term3i,term4i,term5i,term6i,term7i,term8i;
      numtyp term1k,term2k,term3k,term4k,term5k,term6k,term7k,term8k;
@ -1921,16 +1921,16 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,

      term1i = rr3i - rr5i*xr*xr;
      term1core = rr3core - rr5core*xr*xr;
-      term2i = 2.0*rr5i*xr ;
+      term2i = (numtyp)2.0*rr5i*xr ;
      term3i = rr7i*xr*xr - rr5i;
-      term4i = 2.0*rr5i;
-      term5i = 5.0*rr7i*xr;
+      term4i = (numtyp)2.0*rr5i;
+      term5i = (numtyp)5.0*rr7i*xr;
      term6i = rr9i*xr*xr;
      term1k = rr3k - rr5k*xr*xr;
-      term2k = 2.0*rr5k*xr;
+      term2k = (numtyp)2.0*rr5k*xr;
      term3k = rr7k*xr*xr - rr5k;
      term4k = 2.0*rr5k;
-      term5k = 5.0*rr7k*xr;
+      term5k = (numtyp)5.0*rr7k*xr;
      term6k = rr9k*xr*xr;
      tixx = vali*term1i + corei*term1core + dix*term2i - dir*term3i -
        qixx*term4i + qix*term5i - qir*term6i + (qiy*yr+qiz*zr)*rr7i;
@ -1939,16 +1939,16 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,

      term1i = rr3i - rr5i*yr*yr;
      term1core = rr3core - rr5core*yr*yr;
-      term2i = 2.0*rr5i*yr;
+      term2i = (numtyp)2.0*rr5i*yr;
      term3i = rr7i*yr*yr - rr5i;
-      term4i = 2.0*rr5i;
-      term5i = 5.0*rr7i*yr;
+      term4i = (numtyp)2.0*rr5i;
+      term5i = (numtyp)5.0*rr7i*yr;
      term6i = rr9i*yr*yr;
      term1k = rr3k - rr5k*yr*yr;
-      term2k = 2.0*rr5k*yr;
+      term2k = (numtyp)2.0*rr5k*yr;
      term3k = rr7k*yr*yr - rr5k;
-      term4k = 2.0*rr5k;
-      term5k = 5.0*rr7k*yr;
+      term4k = (numtyp)2.0*rr5k;
+      term5k = (numtyp)5.0*rr7k*yr;
      term6k = rr9k*yr*yr;
      tiyy = vali*term1i + corei*term1core + diy*term2i - dir*term3i -
        qiyy*term4i + qiy*term5i - qir*term6i + (qix*xr+qiz*zr)*rr7i;
@ -1957,16 +1957,16 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,

      term1i = rr3i - rr5i*zr*zr;
      term1core = rr3core - rr5core*zr*zr;
-      term2i = 2.0*rr5i*zr;
+      term2i = (numtyp)2.0*rr5i*zr;
      term3i = rr7i*zr*zr - rr5i;
-      term4i = 2.0*rr5i;
-      term5i = 5.0*rr7i*zr;
+      term4i = (numtyp)2.0*rr5i;
+      term5i = (numtyp)5.0*rr7i*zr;
      term6i = rr9i*zr*zr;
      term1k = rr3k - rr5k*zr*zr;
-      term2k = 2.0*rr5k*zr;
+      term2k = (numtyp)2.0*rr5k*zr;
      term3k = rr7k*zr*zr - rr5k;
-      term4k = 2.0*rr5k;
-      term5k = 5.0*rr7k*zr;
+      term4k = (numtyp)2.0*rr5k;
+      term5k = (numtyp)5.0*rr7k*zr;
      term6k = rr9k*zr*zr;
      tizz = vali*term1i + corei*term1core + diz*term2i - dir*term3i -
        qizz*term4i + qiz*term5i - qir*term6i + (qix*xr+qiy*yr)*rr7i;
@ -1978,17 +1978,17 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
      term1core = rr5core*xr*yr;
      term3i = rr5i*yr;
      term4i = yr * (rr7i*xr);
-      term5i = 2.0*rr5i;
-      term6i = 2.0*rr7i*xr;
-      term7i = 2.0*rr7i*yr;
+      term5i = (numtyp)2.0*rr5i;
+      term6i = (numtyp)2.0*rr7i*xr;
+      term7i = (numtyp)2.0*rr7i*yr;
      term8i = yr*rr9i*xr;
      term2k = rr5k*xr;
      term1k = yr * term2k;
      term3k = rr5k*yr;
      term4k = yr * (rr7k*xr);
-      term5k = 2.0*rr5k;
-      term6k = 2.0*rr7k*xr;
-      term7k = 2.0*rr7k*yr;
+      term5k = (numtyp)2.0*rr5k;
+      term6k = (numtyp)2.0*rr7k*xr;
+      term7k = (numtyp)2.0*rr7k*yr;
      term8k = yr*rr9k*xr;
      tixy = -vali*term1i - corei*term1core + diy*term2i + dix*term3i -
        dir*term4i - qixy*term5i + qiy*term6i + qix*term7i - qir*term8i;
@ -2000,17 +2000,17 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
      term1core = rr5core*xr*zr;
      term3i = rr5i*zr;
      term4i = zr * (rr7i*xr);
-      term5i = 2.0*rr5i;
-      term6i = 2.0*rr7i*xr;
-      term7i = 2.0*rr7i*zr;
+      term5i = (numtyp)2.0*rr5i;
+      term6i = (numtyp)2.0*rr7i*xr;
+      term7i = (numtyp)2.0*rr7i*zr;
      term8i = zr*rr9i*xr;
      term2k = rr5k*xr;
      term1k = zr * term2k;
      term3k = rr5k*zr;
      term4k = zr * (rr7k*xr);
-      term5k = 2.0*rr5k;
-      term6k = 2.0*rr7k*xr;
-      term7k = 2.0*rr7k*zr;
+      term5k = (numtyp)2.0*rr5k;
+      term6k = (numtyp)2.0*rr7k*xr;
+      term7k = (numtyp)2.0*rr7k*zr;
      term8k = zr*rr9k*xr;
      tixz = -vali*term1i - corei*term1core + diz*term2i + dix*term3i -
        dir*term4i - qixz*term5i + qiz*term6i + qix*term7i - qir*term8i;
@ -2022,17 +2022,17 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
      term1core = rr5core*yr*zr;
      term3i = rr5i*zr;
      term4i = zr * (rr7i*yr);
-      term5i = 2.0*rr5i;
-      term6i = 2.0*rr7i*yr;
-      term7i = 2.0*rr7i*zr;
+      term5i = (numtyp)2.0*rr5i;
+      term6i = (numtyp)2.0*rr7i*yr;
+      term7i = (numtyp)2.0*rr7i*zr;
      term8i = zr*rr9i*yr;
      term2k = rr5k*yr;
      term1k = zr * term2k;
      term3k = rr5k*zr;
      term4k = zr * (rr7k*yr);
-      term5k = 2.0*rr5k;
-      term6k = 2.0*rr7k*yr;
-      term7k = 2.0*rr7k*zr;
+      term5k = (numtyp)2.0*rr5k;
+      term6k = (numtyp)2.0*rr7k*yr;
+      term7k = (numtyp)2.0*rr7k*zr;
      term8k = zr*rr9k*yr;
      tiyz = -vali*term1i - corei*term1core + diz*term2i + diy*term3i -
        dir*term4i - qiyz*term5i + qiz*term6i + qiy*term7i - qir*term8i;
@ -2043,14 +2043,14 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
      numtyp depy = tixy*ukx + tiyy*uky + tiyz*ukz - tkxy*uix - tkyy*uiy - tkyz*uiz;
      numtyp depz = tixz*ukx + tiyz*uky + tizz*ukz - tkxz*uix - tkyz*uiy - tkzz*uiz;

-      numtyp frcx = -2.0 * depx;
-      numtyp frcy = -2.0 * depy;
-      numtyp frcz = -2.0 * depz;
+      numtyp frcx = (numtyp)-2.0 * depx;
+      numtyp frcy = (numtyp)-2.0 * depy;
+      numtyp frcz = (numtyp)-2.0 * depz;

      // get the dEp/dR terms used for direct polarization force
      // poltyp == MUTUAL && hippo
      // tixx and tkxx
-      term1 = 2.0 * rr5ik;
+      term1 = (numtyp)2.0 * rr5ik;
      term2 = term1*xr;
      term3 = rr5ik - rr7ik*xr*xr;
      tixx = uix*term2 + uir*term3;
@ -2095,55 +2095,6 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
      frcy = frcy - depy;
      frcz = frcz - depz;

-      // get the dtau/dr terms used for mutual polarization force
-      // poltyp == MUTUAL  && hippo
-          
-      term1 = bn[2] - usc3*rr5;
-      term2 = bn[3] - usc5*rr7;
-      term3 = usr5 + term1;
-      term4 = rr3 * factor_uscale;
-      term5 = -xr*term3 + rc3[0]*term4;
-      term6 = -usr5 + xr*xr*term2 - rr5*xr*urc5[0];
-      tixx = uix*term5 + uir*term6;
-      tkxx = ukx*term5 + ukr*term6;
-
-      term5 = -yr*term3 + rc3[1]*term4;
-      term6 = -usr5 + yr*yr*term2 - rr5*yr*urc5[1];
-      tiyy = uiy*term5 + uir*term6;
-      tkyy = uky*term5 + ukr*term6;
-
-      term5 = -zr*term3 + rc3[2]*term4;
-      term6 = -usr5 + zr*zr*term2 - rr5*zr*urc5[2];
-      tizz = uiz*term5 + uir*term6;
-      tkzz = ukz*term5 + ukr*term6;
-
-      term4 = -usr5 * yr;
-      term5 = -xr*term1 + rr3*urc3[0];
-      term6 = xr*yr*term2 - rr5*yr*urc5[0];
-      tixy = uix*term4 + uiy*term5 + uir*term6;
-      tkxy = ukx*term4 + uky*term5 + ukr*term6;
-
-      term4 = -usr5 * zr;
-      term6 = xr*zr*term2 - rr5*zr*urc5[0];
-      tixz = uix*term4 + uiz*term5 + uir*term6;
-      tkxz = ukx*term4 + ukz*term5 + ukr*term6;
-
-      term5 = -yr*term1 + rr3*urc3[1];
-      term6 = yr*zr*term2 - rr5*zr*urc5[1];
-      tiyz = uiy*term4 + uiz*term5 + uir*term6;
-      tkyz = uky*term4 + ukz*term5 + ukr*term6;
-
-      depx = tixx*ukxp + tixy*ukyp + tixz*ukzp
-        + tkxx*uixp + tkxy*uiyp + tkxz*uizp;
-      depy = tixy*ukxp + tiyy*ukyp + tiyz*ukzp
-        + tkxy*uixp + tkyy*uiyp + tkyz*uizp;
-      depz = tixz*ukxp + tiyz*ukyp + tizz*ukzp
-        + tkxz*uixp + tkyz*uiyp + tkzz*uizp;
-
-      frcx = frcx + depx;
-      frcy = frcy + depy;
-      frcz = frcz + depz;
-
      f.x -= frcx;
      f.y -= frcy;
      f.z -= frcz;
--- a/lib/gpu/lal_hippo.h
+++ b/lib/gpu/lal_hippo.h
@ -90,6 +90,19 @@ class Hippo : public BaseAmoeba<numtyp, acctyp> {
                const double aewald, const double felec, const double off2_mpole, double *charge,
                double *boxlo, double *prd, void **tep_ptr);

+  /// Compute polar real-space with device neighboring
+  virtual int** compute_polar_real(const int ago, const int inum_full, const int nall,
+                double **host_x, int *host_type, int *host_amtype,
+                int *host_amgroup, double **host_rpole, double **host_uind,
+                double **host_uinp, double *host_pval, double *sublo, double *subhi,
+                tagint *tag, int **nspecial, tagint **special,
+                int *nspecial15, tagint **special15,
+                const bool eflag, const bool vflag,
+                const bool eatom, const bool vatom, int &host_start,
+                int **ilist, int **numj, const double cpu_time, bool &success,
+                const double aewald, const double felec, const double off2_polar,
+                double *charge, double *boxlo, double *prd, void **tep_ptr);
+
  /// Clear all host and device data
  /** \note This is called at the beginning of the init() routine **/
  void clear();
--- a/lib/gpu/lal_hippo_ext.cpp
+++ b/lib/gpu/lal_hippo_ext.cpp
@ -194,7 +194,7 @@ int** hippo_gpu_compute_polar_real(const int ago, const int inum_full,
                           const int nall, double **host_x, int *host_type,
                           int *host_amtype, int *host_amgroup,
                           double **host_rpole, double **host_uind, double **host_uinp,
-                           double *sublo, double *subhi, tagint *tag, int **nspecial,
+                           double *host_pval, double *sublo, double *subhi, tagint *tag, int **nspecial,
                           tagint **special, int *nspecial15, tagint** special15,
                           const bool eflag, const bool vflag, const bool eatom,
                           const bool vatom, int &host_start,
@ -202,7 +202,7 @@ int** hippo_gpu_compute_polar_real(const int ago, const int inum_full,
                           bool &success, const double aewald, const double felec, const double off2,
                           double *host_q, double *boxlo, double *prd, void **tep_ptr) {
  return HIPPOMF.compute_polar_real(ago, inum_full, nall, host_x, host_type,
-                          host_amtype, host_amgroup, host_rpole, host_uind, host_uinp,
+                          host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_pval,
                          sublo, subhi, tag, nspecial, special, nspecial15, special15,
                          eflag, vflag, eatom, vatom, host_start, ilist, jnum,
                          cpu_time, success, aewald, felec, off2, host_q, boxlo, prd, tep_ptr);
--- a/src/GPU/pair_hippo_gpu.cpp
+++ b/src/GPU/pair_hippo_gpu.cpp
@ -108,7 +108,7 @@ int ** hippo_gpu_compute_umutual2b(const int ago, const int inum, const int nall

 int ** hippo_gpu_compute_polar_real(const int ago, const int inum, const int nall,
              double **host_x, int *host_type, int *host_amtype, int *host_amgroup,
-              double **host_rpole, double **host_uind, double **host_uinp,
+              double **host_rpole, double **host_uind, double **host_uinp, double *host_pval,
              double *sublo, double *subhi, tagint *tag, int **nspecial,
              tagint **special, int* nspecial15, tagint** special15,
              const bool eflag, const bool vflag, const bool eatom, const bool vatom,
@ -138,7 +138,7 @@ PairHippoGPU::PairHippoGPU(LAMMPS *lmp) : PairAmoeba(lmp), gpu_mode(GPU_FORCE)
  gpu_multipole_real_ready = true;
  gpu_udirect2b_ready = false;
  gpu_umutual2b_ready = false;
-  gpu_polar_real_ready = false;
+  gpu_polar_real_ready = true;

  GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }
@ -1089,7 +1089,7 @@ void PairHippoGPU::polar_real()

  firstneigh = hippo_gpu_compute_polar_real(neighbor->ago, inum, nall, atom->x,
                                             atom->type, amtype, amgroup,
-                                             rpole, uind, uinp, sublo, subhi,
+                                             rpole, uind, uinp, pval, sublo, subhi,
                                             atom->tag, atom->nspecial, atom->special,
                                             atom->nspecial15, atom->special15,
                                             eflag, vflag, eflag_atom, vflag_atom,