Fixed bugs in the polar real kernel in hippo, getting closer..

2021-09-26 09:11:09 -05:00
parent 5193dcf8c5
commit 7437c98628
5 changed files with 169 additions and 117 deletions
--- a/lib/gpu/lal_hippo.cpp
+++ b/lib/gpu/lal_hippo.cpp
@ -430,8 +430,8 @@ int** HippoT::compute_multipole_real(const int ago, const int inum_full,
  // leave the answers (forces, energies and virial) on the device,
  //   only copy them back in the last kernel (this one, or polar_real once done)
-  this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
+  //this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
-  this->device->add_ans_object(this->ans);
+  //this->device->add_ans_object(this->ans);
  this->hd_balancer.stop_timer();
@ -568,6 +568,94 @@ int HippoT::umutual2b(const int eflag, const int vflag) {
  return GX;
 }
 // ---------------------------------------------------------------------------
 // Reneighbor on GPU if necessary, and then compute polar real-space
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
 int** HippoT::compute_polar_real(const int ago, const int inum_full,
                                 const int nall, double **host_x,
                                 int *host_type, int *host_amtype,
                                 int *host_amgroup, double **host_rpole,
                                 double **host_uind, double **host_uinp,
                                 double *host_pval, double *sublo, double *subhi,
                                 tagint *tag, int **nspecial, tagint **special,
                                 int *nspecial15, tagint **special15,
                                 const bool eflag_in, const bool vflag_in,
                                 const bool eatom, const bool vatom,
                                 int &host_start, int **ilist, int **jnum,
                                 const double cpu_time, bool &success,
                                 const double aewald, const double felec,
                                 const double off2_polar, double *host_q,
                                 double *boxlo, double *prd, void **tep_ptr) {
  this->acc_timers();
  int eflag, vflag;
  if (eatom) eflag=2;
  else if (eflag_in) eflag=1;
  else eflag=0;
  if (vatom) vflag=2;
  else if (vflag_in) vflag=1;
  else vflag=0;
  #ifdef LAL_NO_BLOCK_REDUCE
  if (eflag) eflag=2;
  if (vflag) vflag=2;
  #endif
  this->set_kernel(eflag,vflag);
  // reallocate per-atom arrays, transfer data from the host
  //   and build the neighbor lists if needed
  // NOTE: 
  //   For now we invoke precompute() again here,
  //     to be able to turn on/off the udirect2b kernel (which comes before this)
  //   Once all the kernels are ready, precompute() is needed only once
  //     in the first kernel in a time step.
  //   We only need to cast uind and uinp from host to device here
  //     if the neighbor lists are rebuilt and other per-atom arrays
  //     (x, type, amtype, amgroup, rpole) are ready on the device.
  int** firstneigh = nullptr;
  firstneigh = precompute(ago, inum_full, nall, host_x, host_type,
                          host_amtype, host_amgroup, host_rpole,
                          host_uind, host_uinp, host_pval, sublo, subhi, tag,
                          nspecial, special, nspecial15, special15,
                          eflag_in, vflag_in, eatom, vatom,
                          host_start, ilist, jnum, cpu_time,
                          success, host_q, boxlo, prd);
  // ------------------- Resize _tep array ------------------------
  if (inum_full>this->_max_tep_size) {
    this->_max_tep_size=static_cast<int>(static_cast<double>(inum_full)*1.10);
    this->_tep.resize(this->_max_tep_size*4);
  }
  *tep_ptr=this->_tep.host.begin();
  this->_off2_polar = off2_polar;
  this->_felec = felec;
  this->_aewald = aewald;
  const int red_blocks=polar_real(eflag,vflag);
  // only copy answers (forces, energies and virial) back from the device
  //   in the last kernel (which is polar_real here)
  this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
  this->device->add_ans_object(this->ans);
  this->hd_balancer.stop_timer();
  // copy tep from device to host
  this->_tep.update_host(this->_max_tep_size*4,false);
 /*
  printf("GPU lib: tep size = %d: max tep size = %d\n", this->_tep.cols(), _max_tep_size);
  for (int i = 0; i < 10; i++) {
    numtyp4* p = (numtyp4*)(&this->_tep[4*i]);
    printf("i = %d; tep = %f %f %f\n", i, p->x, p->y, p->z);
  }
 */  
  return firstneigh; // nbor->host_jlist.begin()-host_start;
 }
 // ---------------------------------------------------------------------------
 // Calculate the polar real-space term, returning tep
 // ---------------------------------------------------------------------------
--- a/lib/gpu/lal_hippo.cu
+++ b/lib/gpu/lal_hippo.cu
@ -1753,7 +1753,7 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
    numtyp corei = coeff_amclass[itype].z;  // pcore[iclass];
    numtyp alphai = coeff_amclass[itype].w; // palpha[iclass];
-     numtyp vali = polar6[i].x;
+    numtyp vali = polar6[i].x;
    for ( ; nbor<nbor_end; nbor+=n_stride) {
@ -1873,25 +1873,25 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
      numtyp dmpi[9],dmpk[9];
      numtyp dmpik[9];
      damppole(r,9,alphai,alphak,dmpi,dmpk,dmpik);
-      numtyp rr3core = bn[1] - (1.0-factor_dscale)*rr3;
+      numtyp rr3core = bn[1] - ((numtyp)1.0-factor_dscale)*rr3;
-      numtyp rr5core = bn[2] - (1.0-factor_dscale)*rr5;
+      numtyp rr5core = bn[2] - ((numtyp)1.0-factor_dscale)*rr5;
-      numtyp rr3i = bn[1] - (1.0-factor_dscale*dmpi[2])*rr3;
+      numtyp rr3i = bn[1] - ((numtyp)1.0-factor_dscale*dmpi[2])*rr3;
-      numtyp rr5i = bn[2] - (1.0-factor_dscale*dmpi[4])*rr5;
+      numtyp rr5i = bn[2] - ((numtyp)1.0-factor_dscale*dmpi[4])*rr5;
-      numtyp rr7i = bn[3] - (1.0-factor_dscale*dmpi[6])*rr7;
+      numtyp rr7i = bn[3] - ((numtyp)1.0-factor_dscale*dmpi[6])*rr7;
-      numtyp rr9i = bn[4] - (1.0-factor_dscale*dmpi[8])*rr9;
+      numtyp rr9i = bn[4] - ((numtyp)1.0-factor_dscale*dmpi[8])*rr9;
-      numtyp rr3k = bn[1] - (1.0-factor_dscale*dmpk[2])*rr3;
+      numtyp rr3k = bn[1] - ((numtyp)1.0-factor_dscale*dmpk[2])*rr3;
-      numtyp rr5k = bn[2] - (1.0-factor_dscale*dmpk[4])*rr5;
+      numtyp rr5k = bn[2] - ((numtyp)1.0-factor_dscale*dmpk[4])*rr5;
-      numtyp rr7k = bn[3] - (1.0-factor_dscale*dmpk[6])*rr7;
+      numtyp rr7k = bn[3] - ((numtyp)1.0-factor_dscale*dmpk[6])*rr7;
-      numtyp rr9k = bn[4] - (1.0-factor_dscale*dmpk[8])*rr9;
+      numtyp rr9k = bn[4] - ((numtyp)1.0-factor_dscale*dmpk[8])*rr9;
-      numtyp rr5ik = bn[2] - (1.0-factor_wscale*dmpik[4])*rr5;
+      numtyp rr5ik = bn[2] - ((numtyp)1.0-factor_wscale*dmpik[4])*rr5;
-      numtyp rr7ik = bn[3] - (1.0-factor_wscale*dmpik[6])*rr7;
+      numtyp rr7ik = bn[3] - ((numtyp)1.0-factor_wscale*dmpik[6])*rr7;
      // get the induced dipole field used for dipole torques
-      numtyp tix3 = 2.0*rr3i*ukx;
+      numtyp tix3 = (numtyp)2.0*rr3i*ukx;
-      numtyp tiy3 = 2.0*rr3i*uky;
+      numtyp tiy3 = (numtyp)2.0*rr3i*uky;
-      numtyp tiz3 = 2.0*rr3i*ukz;
+      numtyp tiz3 = (numtyp)2.0*rr3i*ukz;
-      numtyp tuir = -2.0*rr5i*ukr;
+      numtyp tuir = (numtyp)-2.0*rr5i*ukr;
      ufld[0] += tix3 + xr*tuir;
      ufld[1] += tiy3 + yr*tuir;
@ -1899,10 +1899,10 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
      // get induced dipole field gradient used for quadrupole torques
-      numtyp tix5 = 4.0 * (rr5i*ukx);
+      numtyp tix5 = (numtyp)4.0 * (rr5i*ukx);
-      numtyp tiy5 = 4.0 * (rr5i*uky);
+      numtyp tiy5 = (numtyp)4.0 * (rr5i*uky);
-      numtyp tiz5 = 4.0 * (rr5i*ukz);
+      numtyp tiz5 = (numtyp)4.0 * (rr5i*ukz);
-      tuir = -2.0*rr7i*ukr;
+      tuir = (numtyp)-2.0*rr7i*ukr;
      dufld[0] += xr*tix5 + xr*xr*tuir;
      dufld[1] += xr*tiy5 + yr*tix5 + (numtyp)2.0*xr*yr*tuir;
@ -1911,7 +1911,7 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
      dufld[4] += yr*tiz5 + zr*tiy5 + (numtyp)2.0*yr*zr*tuir;
      dufld[5] += zr*tiz5 + zr*zr*tuir;
-      // get the dEd/dR terms used for direct polarization force
+      // get the field gradient for direct polarization force
      numtyp term1i,term2i,term3i,term4i,term5i,term6i,term7i,term8i;
      numtyp term1k,term2k,term3k,term4k,term5k,term6k,term7k,term8k;
@ -1921,16 +1921,16 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
      term1i = rr3i - rr5i*xr*xr;
      term1core = rr3core - rr5core*xr*xr;
-      term2i = 2.0*rr5i*xr ;
+      term2i = (numtyp)2.0*rr5i*xr ;
      term3i = rr7i*xr*xr - rr5i;
-      term4i = 2.0*rr5i;
+      term4i = (numtyp)2.0*rr5i;
-      term5i = 5.0*rr7i*xr;
+      term5i = (numtyp)5.0*rr7i*xr;
      term6i = rr9i*xr*xr;
      term1k = rr3k - rr5k*xr*xr;
-      term2k = 2.0*rr5k*xr;
+      term2k = (numtyp)2.0*rr5k*xr;
      term3k = rr7k*xr*xr - rr5k;
      term4k = 2.0*rr5k;
-      term5k = 5.0*rr7k*xr;
+      term5k = (numtyp)5.0*rr7k*xr;
      term6k = rr9k*xr*xr;
      tixx = vali*term1i + corei*term1core + dix*term2i - dir*term3i -
        qixx*term4i + qix*term5i - qir*term6i + (qiy*yr+qiz*zr)*rr7i;
@ -1939,16 +1939,16 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
      term1i = rr3i - rr5i*yr*yr;
      term1core = rr3core - rr5core*yr*yr;
-      term2i = 2.0*rr5i*yr;
+      term2i = (numtyp)2.0*rr5i*yr;
      term3i = rr7i*yr*yr - rr5i;
-      term4i = 2.0*rr5i;
+      term4i = (numtyp)2.0*rr5i;
-      term5i = 5.0*rr7i*yr;
+      term5i = (numtyp)5.0*rr7i*yr;
      term6i = rr9i*yr*yr;
      term1k = rr3k - rr5k*yr*yr;
-      term2k = 2.0*rr5k*yr;
+      term2k = (numtyp)2.0*rr5k*yr;
      term3k = rr7k*yr*yr - rr5k;
-      term4k = 2.0*rr5k;
+      term4k = (numtyp)2.0*rr5k;
-      term5k = 5.0*rr7k*yr;
+      term5k = (numtyp)5.0*rr7k*yr;
      term6k = rr9k*yr*yr;
      tiyy = vali*term1i + corei*term1core + diy*term2i - dir*term3i -
        qiyy*term4i + qiy*term5i - qir*term6i + (qix*xr+qiz*zr)*rr7i;
@ -1957,16 +1957,16 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
      term1i = rr3i - rr5i*zr*zr;
      term1core = rr3core - rr5core*zr*zr;
-      term2i = 2.0*rr5i*zr;
+      term2i = (numtyp)2.0*rr5i*zr;
      term3i = rr7i*zr*zr - rr5i;
-      term4i = 2.0*rr5i;
+      term4i = (numtyp)2.0*rr5i;
-      term5i = 5.0*rr7i*zr;
+      term5i = (numtyp)5.0*rr7i*zr;
      term6i = rr9i*zr*zr;
      term1k = rr3k - rr5k*zr*zr;
-      term2k = 2.0*rr5k*zr;
+      term2k = (numtyp)2.0*rr5k*zr;
      term3k = rr7k*zr*zr - rr5k;
-      term4k = 2.0*rr5k;
+      term4k = (numtyp)2.0*rr5k;
-      term5k = 5.0*rr7k*zr;
+      term5k = (numtyp)5.0*rr7k*zr;
      term6k = rr9k*zr*zr;
      tizz = vali*term1i + corei*term1core + diz*term2i - dir*term3i -
        qizz*term4i + qiz*term5i - qir*term6i + (qix*xr+qiy*yr)*rr7i;
@ -1978,17 +1978,17 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
      term1core = rr5core*xr*yr;
      term3i = rr5i*yr;
      term4i = yr * (rr7i*xr);
-      term5i = 2.0*rr5i;
+      term5i = (numtyp)2.0*rr5i;
-      term6i = 2.0*rr7i*xr;
+      term6i = (numtyp)2.0*rr7i*xr;
-      term7i = 2.0*rr7i*yr;
+      term7i = (numtyp)2.0*rr7i*yr;
      term8i = yr*rr9i*xr;
      term2k = rr5k*xr;
      term1k = yr * term2k;
      term3k = rr5k*yr;
      term4k = yr * (rr7k*xr);
-      term5k = 2.0*rr5k;
+      term5k = (numtyp)2.0*rr5k;
-      term6k = 2.0*rr7k*xr;
+      term6k = (numtyp)2.0*rr7k*xr;
-      term7k = 2.0*rr7k*yr;
+      term7k = (numtyp)2.0*rr7k*yr;
      term8k = yr*rr9k*xr;
      tixy = -vali*term1i - corei*term1core + diy*term2i + dix*term3i -
        dir*term4i - qixy*term5i + qiy*term6i + qix*term7i - qir*term8i;
@ -2000,17 +2000,17 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
      term1core = rr5core*xr*zr;
      term3i = rr5i*zr;
      term4i = zr * (rr7i*xr);
-      term5i = 2.0*rr5i;
+      term5i = (numtyp)2.0*rr5i;
-      term6i = 2.0*rr7i*xr;
+      term6i = (numtyp)2.0*rr7i*xr;
-      term7i = 2.0*rr7i*zr;
+      term7i = (numtyp)2.0*rr7i*zr;
      term8i = zr*rr9i*xr;
      term2k = rr5k*xr;
      term1k = zr * term2k;
      term3k = rr5k*zr;
      term4k = zr * (rr7k*xr);
-      term5k = 2.0*rr5k;
+      term5k = (numtyp)2.0*rr5k;
-      term6k = 2.0*rr7k*xr;
+      term6k = (numtyp)2.0*rr7k*xr;
-      term7k = 2.0*rr7k*zr;
+      term7k = (numtyp)2.0*rr7k*zr;
      term8k = zr*rr9k*xr;
      tixz = -vali*term1i - corei*term1core + diz*term2i + dix*term3i -
        dir*term4i - qixz*term5i + qiz*term6i + qix*term7i - qir*term8i;
@ -2022,17 +2022,17 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
      term1core = rr5core*yr*zr;
      term3i = rr5i*zr;
      term4i = zr * (rr7i*yr);
-      term5i = 2.0*rr5i;
+      term5i = (numtyp)2.0*rr5i;
-      term6i = 2.0*rr7i*yr;
+      term6i = (numtyp)2.0*rr7i*yr;
-      term7i = 2.0*rr7i*zr;
+      term7i = (numtyp)2.0*rr7i*zr;
      term8i = zr*rr9i*yr;
      term2k = rr5k*yr;
      term1k = zr * term2k;
      term3k = rr5k*zr;
      term4k = zr * (rr7k*yr);
-      term5k = 2.0*rr5k;
+      term5k = (numtyp)2.0*rr5k;
-      term6k = 2.0*rr7k*yr;
+      term6k = (numtyp)2.0*rr7k*yr;
-      term7k = 2.0*rr7k*zr;
+      term7k = (numtyp)2.0*rr7k*zr;
      term8k = zr*rr9k*yr;
      tiyz = -vali*term1i - corei*term1core + diz*term2i + diy*term3i -
        dir*term4i - qiyz*term5i + qiz*term6i + qiy*term7i - qir*term8i;
@ -2043,14 +2043,14 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
      numtyp depy = tixy*ukx + tiyy*uky + tiyz*ukz - tkxy*uix - tkyy*uiy - tkyz*uiz;
      numtyp depz = tixz*ukx + tiyz*uky + tizz*ukz - tkxz*uix - tkyz*uiy - tkzz*uiz;
-      numtyp frcx = -2.0 * depx;
+      numtyp frcx = (numtyp)-2.0 * depx;
-      numtyp frcy = -2.0 * depy;
+      numtyp frcy = (numtyp)-2.0 * depy;
-      numtyp frcz = -2.0 * depz;
+      numtyp frcz = (numtyp)-2.0 * depz;
      // get the dEp/dR terms used for direct polarization force
      // poltyp == MUTUAL && hippo
      // tixx and tkxx
-      term1 = 2.0 * rr5ik;
+      term1 = (numtyp)2.0 * rr5ik;
      term2 = term1*xr;
      term3 = rr5ik - rr7ik*xr*xr;
      tixx = uix*term2 + uir*term3;
@ -2095,55 +2095,6 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
      frcy = frcy - depy;
      frcz = frcz - depz;
      // get the dtau/dr terms used for mutual polarization force
      // poltyp == MUTUAL  && hippo
      term1 = bn[2] - usc3*rr5;
      term2 = bn[3] - usc5*rr7;
      term3 = usr5 + term1;
      term4 = rr3 * factor_uscale;
      term5 = -xr*term3 + rc3[0]*term4;
      term6 = -usr5 + xr*xr*term2 - rr5*xr*urc5[0];
      tixx = uix*term5 + uir*term6;
      tkxx = ukx*term5 + ukr*term6;
      term5 = -yr*term3 + rc3[1]*term4;
      term6 = -usr5 + yr*yr*term2 - rr5*yr*urc5[1];
      tiyy = uiy*term5 + uir*term6;
      tkyy = uky*term5 + ukr*term6;
      term5 = -zr*term3 + rc3[2]*term4;
      term6 = -usr5 + zr*zr*term2 - rr5*zr*urc5[2];
      tizz = uiz*term5 + uir*term6;
      tkzz = ukz*term5 + ukr*term6;
      term4 = -usr5 * yr;
      term5 = -xr*term1 + rr3*urc3[0];
      term6 = xr*yr*term2 - rr5*yr*urc5[0];
      tixy = uix*term4 + uiy*term5 + uir*term6;
      tkxy = ukx*term4 + uky*term5 + ukr*term6;
      term4 = -usr5 * zr;
      term6 = xr*zr*term2 - rr5*zr*urc5[0];
      tixz = uix*term4 + uiz*term5 + uir*term6;
      tkxz = ukx*term4 + ukz*term5 + ukr*term6;
      term5 = -yr*term1 + rr3*urc3[1];
      term6 = yr*zr*term2 - rr5*zr*urc5[1];
      tiyz = uiy*term4 + uiz*term5 + uir*term6;
      tkyz = uky*term4 + ukz*term5 + ukr*term6;
      depx = tixx*ukxp + tixy*ukyp + tixz*ukzp
        + tkxx*uixp + tkxy*uiyp + tkxz*uizp;
      depy = tixy*ukxp + tiyy*ukyp + tiyz*ukzp
        + tkxy*uixp + tkyy*uiyp + tkyz*uizp;
      depz = tixz*ukxp + tiyz*ukyp + tizz*ukzp
        + tkxz*uixp + tkyz*uiyp + tkzz*uizp;
      frcx = frcx + depx;
      frcy = frcy + depy;
      frcz = frcz + depz;
      f.x -= frcx;
      f.y -= frcy;
      f.z -= frcz;
--- a/lib/gpu/lal_hippo.h
+++ b/lib/gpu/lal_hippo.h
@ -90,6 +90,19 @@ class Hippo : public BaseAmoeba<numtyp, acctyp> {
                const double aewald, const double felec, const double off2_mpole, double *charge,
                double *boxlo, double *prd, void **tep_ptr);
  /// Compute polar real-space with device neighboring
  virtual int** compute_polar_real(const int ago, const int inum_full, const int nall,
                double **host_x, int *host_type, int *host_amtype,
                int *host_amgroup, double **host_rpole, double **host_uind,
                double **host_uinp, double *host_pval, double *sublo, double *subhi,
                tagint *tag, int **nspecial, tagint **special,
                int *nspecial15, tagint **special15,
                const bool eflag, const bool vflag,
                const bool eatom, const bool vatom, int &host_start,
                int **ilist, int **numj, const double cpu_time, bool &success,
                const double aewald, const double felec, const double off2_polar,
                double *charge, double *boxlo, double *prd, void **tep_ptr);
  /// Clear all host and device data
  /** \note This is called at the beginning of the init() routine **/
  void clear();
--- a/lib/gpu/lal_hippo_ext.cpp
+++ b/lib/gpu/lal_hippo_ext.cpp
@ -194,7 +194,7 @@ int** hippo_gpu_compute_polar_real(const int ago, const int inum_full,
                           const int nall, double **host_x, int *host_type,
                           int *host_amtype, int *host_amgroup,
                           double **host_rpole, double **host_uind, double **host_uinp,
-                           double *sublo, double *subhi, tagint *tag, int **nspecial,
+                           double *host_pval, double *sublo, double *subhi, tagint *tag, int **nspecial,
                           tagint **special, int *nspecial15, tagint** special15,
                           const bool eflag, const bool vflag, const bool eatom,
                           const bool vatom, int &host_start,
@ -202,7 +202,7 @@ int** hippo_gpu_compute_polar_real(const int ago, const int inum_full,
                           bool &success, const double aewald, const double felec, const double off2,
                           double *host_q, double *boxlo, double *prd, void **tep_ptr) {
  return HIPPOMF.compute_polar_real(ago, inum_full, nall, host_x, host_type,
-                          host_amtype, host_amgroup, host_rpole, host_uind, host_uinp,
+                          host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_pval,
                          sublo, subhi, tag, nspecial, special, nspecial15, special15,
                          eflag, vflag, eatom, vatom, host_start, ilist, jnum,
                          cpu_time, success, aewald, felec, off2, host_q, boxlo, prd, tep_ptr);
--- a/src/GPU/pair_hippo_gpu.cpp
+++ b/src/GPU/pair_hippo_gpu.cpp
@ -108,7 +108,7 @@ int ** hippo_gpu_compute_umutual2b(const int ago, const int inum, const int nall
 int ** hippo_gpu_compute_polar_real(const int ago, const int inum, const int nall,
              double **host_x, int *host_type, int *host_amtype, int *host_amgroup,
-              double **host_rpole, double **host_uind, double **host_uinp,
+              double **host_rpole, double **host_uind, double **host_uinp, double *host_pval,
              double *sublo, double *subhi, tagint *tag, int **nspecial,
              tagint **special, int* nspecial15, tagint** special15,
              const bool eflag, const bool vflag, const bool eatom, const bool vatom,
@ -138,7 +138,7 @@ PairHippoGPU::PairHippoGPU(LAMMPS *lmp) : PairAmoeba(lmp), gpu_mode(GPU_FORCE)
  gpu_multipole_real_ready = true;
  gpu_udirect2b_ready = false;
  gpu_umutual2b_ready = false;
-  gpu_polar_real_ready = false;
+  gpu_polar_real_ready = true;
  GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }
@ -1089,7 +1089,7 @@ void PairHippoGPU::polar_real()
  firstneigh = hippo_gpu_compute_polar_real(neighbor->ago, inum, nall, atom->x,
                                             atom->type, amtype, amgroup,
-                                             rpole, uind, uinp, sublo, subhi,
+                                             rpole, uind, uinp, pval, sublo, subhi,
                                             atom->tag, atom->nspecial, atom->special,
                                             atom->nspecial15, atom->special15,
                                             eflag, vflag, eflag_atom, vflag_atom,