diff --git a/lib/gpu/lal_hippo.cpp b/lib/gpu/lal_hippo.cpp
index 7fa358e35a..07f8732bcb 100644
--- a/lib/gpu/lal_hippo.cpp
+++ b/lib/gpu/lal_hippo.cpp
@@ -210,10 +210,11 @@ int** HippoT::compute_dispersion_real(const int ago, const int inum_full,
   this->_aewald = aewald;
   const int red_blocks=dispersion_real(eflag,vflag);
 
-  // leave the answers (forces, energies and virial) on the device,
-  //   only copy them back in the last kernel (polar_real)
-  //ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
-  //device->add_ans_object(ans);
+  // only copy them back if this is the last kernel
+  //   otherwise, commenting out these two lines to leave the answers
+  //   (forces, energies and virial) on the device until the last kernel
+  this->ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
+  this->device->add_ans_object(this->ans);
 
   this->hd_balancer.stop_timer();
 
@@ -238,7 +239,7 @@ int HippoT::dispersion_real(const int eflag, const int vflag) {
                                (BX/this->_threads_per_atom)));
   this->time_pair.start();
 
-  // Build the short neighbor list for the cutoff off2_mpole,
+  // Build the short neighbor list for the cutoff off2_disp,
   //   at this point mpole is the first kernel in a time step
   
   this->k_short_nbor.set_size(GX,BX);
diff --git a/lib/gpu/lal_hippo.cu b/lib/gpu/lal_hippo.cu
index 07df4c6ad0..f9020cf9a6 100644
--- a/lib/gpu/lal_hippo.cu
+++ b/lib/gpu/lal_hippo.cu
@@ -512,7 +512,8 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
       numtyp tk,tk2;
       numtyp damp3,damp5;
       numtyp ddamp;
-      numtyp factor_disp = (numtyp)1.0; // factor_disp = special_disp[sbmask15(j)];
+      const numtyp4 sp_nonpol = sp_nonpolar[sbmask15(jextra)];
+      numtyp factor_disp = sp_nonpol.z; // factor_disp = special_disp[sbmask15(j)];
 
       if (ai != ak) {
         ai2 = ai * ai;
@@ -547,7 +548,7 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
       // apply damping and scaling factors for this interaction
 
       numtyp scale = factor_disp * damp*damp;
-      scale = scale - (numtyp )1.0;
+      scale = scale - (numtyp)1.0;
       numtyp e = -ci * ck * (expa+scale) / r6;
       numtyp rterm = -ucl_powr(ralpha2,(numtyp)3.0) * expterm / r;
       numtyp de = (numtyp)-6.0*e/r2 - ci*ck*rterm/r7 - (numtyp)2.0*ci*ck*factor_disp*damp*ddamp/r7;
@@ -562,7 +563,7 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
       f.x += dedx;
       f.y += dedy;
       f.z += dedz;
-      
+
       // increment the internal virial tensor components
 
       numtyp vxx = xr * dedx;
diff --git a/src/GPU/pair_hippo_gpu.cpp b/src/GPU/pair_hippo_gpu.cpp
index be5d4afc2b..a6e7b9edc6 100644
--- a/src/GPU/pair_hippo_gpu.cpp
+++ b/src/GPU/pair_hippo_gpu.cpp
@@ -133,11 +133,11 @@ PairHippoGPU::PairHippoGPU(LAMMPS *lmp) : PairAmoeba(lmp), gpu_mode(GPU_FORCE)
 
   gpu_hal_ready = false;               // always false for HIPPO
   gpu_repulsion_ready = false;         // true for HIPPO when ready
-  gpu_dispersion_real_ready = false;   // true for HIPPO when ready
+  gpu_dispersion_real_ready = true;   // true for HIPPO when ready
   gpu_multipole_real_ready = false;
   gpu_udirect2b_ready = false;
   gpu_umutual2b_ready = false;
-  gpu_polar_real_ready = true;
+  gpu_polar_real_ready = false;
 
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
 }