diff --git a/cmake/Modules/Packages/GPU.cmake b/cmake/Modules/Packages/GPU.cmake
index 2b6977005d..cf5bcd2ea2 100644
--- a/cmake/Modules/Packages/GPU.cmake
+++ b/cmake/Modules/Packages/GPU.cmake
@@ -172,6 +172,7 @@ elseif(GPU_API STREQUAL "OPENCL")
     ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff.cu
     ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl.cu
     ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod.cu
+    ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_hippo.cu
   )
 
   foreach(GPU_KERNEL ${GPU_LIB_CU})
@@ -188,6 +189,7 @@ elseif(GPU_API STREQUAL "OPENCL")
   GenerateOpenCLHeader(tersoff ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff.cu)
   GenerateOpenCLHeader(tersoff_zbl ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_zbl_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl.cu)
   GenerateOpenCLHeader(tersoff_mod ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_mod_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod.cu)
+  GenerateOpenCLHeader(hippo ${CMAKE_CURRENT_BINARY_DIR}/gpu/hippo_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_hippo_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_hippo.cu)
 
   list(APPEND GPU_LIB_SOURCES
     ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_cl.h
@@ -197,6 +199,7 @@ elseif(GPU_API STREQUAL "OPENCL")
     ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_cl.h
     ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_zbl_cl.h
     ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_mod_cl.h
+    ${CMAKE_CURRENT_BINARY_DIR}/gpu/hippo_cl.h
   )
 
   add_library(gpu STATIC ${GPU_LIB_SOURCES})
diff --git a/lib/gpu/Opencl.makefile b/lib/gpu/Opencl.makefile
index 2ff98827d4..64a2161f85 100644
--- a/lib/gpu/Opencl.makefile
+++ b/lib/gpu/Opencl.makefile
@@ -74,6 +74,9 @@ $(OBJ_DIR)/tersoff_mod_cl.h: lal_tersoff_mod.cu $(PRE1_H) lal_tersoff_mod_extra.
 $(OBJ_DIR)/tersoff_zbl_cl.h: lal_tersoff_zbl.cu $(PRE1_H) lal_tersoff_zbl_extra.h
 	$(BSH) ./geryon/file_to_cstr.sh tersoff_zbl $(PRE1_H) lal_tersoff_zbl_extra.h lal_tersoff_zbl.cu $(OBJ_DIR)/tersoff_zbl_cl.h;
 
+$(OBJ_DIR)/hippo_cl.h: lal_hippo.cu $(PRE1_H) lal_hippo_extra.h
+	$(BSH) ./geryon/file_to_cstr.sh hippo $(PRE1_H) lal_hippo_extra.h lal_hippo.cu $(OBJ_DIR)/hippo_cl.h;
+
 $(OBJ_DIR)/%_cl.h: lal_%.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh $* $(PRE1_H) $< $@;
 
diff --git a/lib/gpu/lal_amoeba.cu b/lib/gpu/lal_amoeba.cu
index e4d129214a..1deb3e3bb5 100644
--- a/lib/gpu/lal_amoeba.cu
+++ b/lib/gpu/lal_amoeba.cu
@@ -1064,7 +1064,7 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
       bcn[0] = bn[1] - ((numtyp)1.0-scalek*scale3)*rr3;
       bcn[1] = bn[2] - ((numtyp)1.0-scalek*scale5)*rr5;
 
-      numtyp tdipdip[6]; // the following tdipdip is incorrect!! needs work to store tdipdip
+      numtyp tdipdip[6];
       tdipdip[0] = -bcn[0] + bcn[1]*xr*xr;
       tdipdip[1] = bcn[1]*xr*yr;
       tdipdip[2] = bcn[1]*xr*zr;
@@ -1233,10 +1233,10 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
       numtyp r = ucl_sqrt(r2);
       
       const numtyp4 pol1j = polar1[j];
-      numtyp ck = polar1[j].x;   // rpole[j][0];
-      numtyp dkx = polar1[j].y;  // rpole[j][1];
-      numtyp dky = polar1[j].z;  // rpole[j][2];
-      numtyp dkz = polar1[j].w;  // rpole[j][3];
+      numtyp ck = pol1j.x;   // rpole[j][0];
+      numtyp dkx = pol1j.y;  // rpole[j][1];
+      numtyp dky = pol1j.z;  // rpole[j][2];
+      numtyp dkz = pol1j.w;  // rpole[j][3];
       const numtyp4 pol2j = polar2[j];
       numtyp qkxx = pol2j.x; // rpole[j][4];
       numtyp qkxy = pol2j.y; // rpole[j][5];
diff --git a/lib/gpu/lal_hippo.cpp b/lib/gpu/lal_hippo.cpp
index d31370be73..caf910863f 100644
--- a/lib/gpu/lal_hippo.cpp
+++ b/lib/gpu/lal_hippo.cpp
@@ -489,6 +489,81 @@ int HippoT::multipole_real(const int eflag, const int vflag) {
   return GX;
 }
 
+// ---------------------------------------------------------------------------
+// Reneighbor on GPU if necessary, and then compute the direct real space part
+//    of the permanent field
+// ---------------------------------------------------------------------------
+template <class numtyp, class acctyp>
+int** HippoT::compute_udirect2b(const int ago, const int inum_full,
+                                const int nall, double **host_x,
+                                int *host_type, int *host_amtype,
+                                int *host_amgroup, double **host_rpole,
+                                double **host_uind, double **host_uinp,
+                                double* host_pval,
+                                double *sublo, double *subhi, tagint *tag,
+                                int **nspecial, tagint **special,
+                                int *nspecial15, tagint **special15,
+                                const bool eflag_in, const bool vflag_in,
+                                const bool eatom, const bool vatom,
+                                int &host_start, int **ilist, int **jnum,
+                                const double cpu_time, bool &success,
+                                const double aewald, const double off2_polar,
+                                double *host_q, double *boxlo, double *prd,
+                                void** fieldp_ptr) {
+  this->acc_timers();
+  int eflag, vflag;
+  if (eatom) eflag=2;
+  else if (eflag_in) eflag=1;
+  else eflag=0;
+  if (vatom) vflag=2;
+  else if (vflag_in) vflag=1;
+  else vflag=0;
+
+  #ifdef LAL_NO_BLOCK_REDUCE
+  if (eflag) eflag=2;
+  if (vflag) vflag=2;
+  #endif
+
+  this->set_kernel(eflag,vflag);
+
+  // reallocate per-atom arrays, transfer data from the host
+  //   and build the neighbor lists if needed
+
+  int** firstneigh = nullptr;
+  firstneigh = precompute(ago, inum_full, nall, host_x, host_type,
+                          host_amtype, host_amgroup, host_rpole,
+                          host_uind, host_uinp, host_pval, sublo, subhi, tag,
+                          nspecial, special, nspecial15, special15,
+                          eflag_in, vflag_in, eatom, vatom,
+                          host_start, ilist, jnum, cpu_time,
+                          success, host_q, boxlo, prd);
+                         
+  // ------------------- Resize _fieldp array ------------------------
+
+  if (inum_full>this->_max_fieldp_size) {
+    this->_max_fieldp_size=static_cast<int>(static_cast<double>(inum_full)*1.10);
+    this->_fieldp.resize(this->_max_fieldp_size*8);
+  }
+  *fieldp_ptr=this->_fieldp.host.begin();
+
+  this->_off2_polar = off2_polar;
+  this->_aewald = aewald;
+  const int red_blocks=udirect2b(eflag,vflag);
+
+  // copy field and fieldp from device to host (_fieldp store both arrays, one after another)
+
+  this->_fieldp.update_host(this->_max_fieldp_size*8,false);
+/*
+  printf("GPU lib: _fieldp size = %d: max fieldp size = %d\n",
+    this->_fieldp.cols(), _max_fieldp_size);
+  for (int i = 0; i < 10; i++) {
+    numtyp4* p = (numtyp4*)(&this->_fieldp[4*i]);
+    printf("i = %d; field = %f %f %f\n", i, p->x, p->y, p->z);
+  }
+*/
+  return firstneigh; //nbor->host_jlist.begin()-host_start;
+}
+
 // ---------------------------------------------------------------------------
 // Calculate the real-space permanent field, returning field and fieldp
 // ---------------------------------------------------------------------------
@@ -518,7 +593,8 @@ int HippoT::udirect2b(const int eflag, const int vflag) {
   }
   
   this->k_udirect2b.set_size(GX,BX);
-  this->k_udirect2b.run(&this->atom->x, &this->atom->extra, &coeff_amtype, &sp_polar,
+  this->k_udirect2b.run(&this->atom->x, &this->atom->extra,
+                        &coeff_amtype, &coeff_amclass, &sp_polar,
                         &this->nbor->dev_nbor, &this->_nbor_data->begin(),
                         &this->dev_short_nbor,
                         &this->_fieldp, &ainum, &_nall, &nbor_pitch,
@@ -529,6 +605,80 @@ int HippoT::udirect2b(const int eflag, const int vflag) {
   return GX;
 }
 
+// ---------------------------------------------------------------------------
+// Reneighbor on GPU if necessary, and then compute the direct real space part
+//    of the induced field
+// ---------------------------------------------------------------------------
+template <class numtyp, class acctyp>
+int** HippoT::compute_umutual2b(const int ago, const int inum_full,
+                                     const int nall, double **host_x,
+                                     int *host_type, int *host_amtype,
+                                     int *host_amgroup, double **host_rpole,
+                                     double **host_uind, double **host_uinp, double *host_pval,
+                                     double *sublo, double *subhi, tagint *tag,
+                                     int **nspecial, tagint **special,
+                                     int *nspecial15, tagint **special15,
+                                     const bool eflag_in, const bool vflag_in,
+                                     const bool eatom, const bool vatom,
+                                     int &host_start, int **ilist, int **jnum,
+                                     const double cpu_time, bool &success,
+                                     const double aewald, const double off2_polar,
+                                     double *host_q, double *boxlo, double *prd,
+                                     void** fieldp_ptr) {
+  this->acc_timers();
+  int eflag, vflag;
+  if (eatom) eflag=2;
+  else if (eflag_in) eflag=1;
+  else eflag=0;
+  if (vatom) vflag=2;
+  else if (vflag_in) vflag=1;
+  else vflag=0;
+
+  #ifdef LAL_NO_BLOCK_REDUCE
+  if (eflag) eflag=2;
+  if (vflag) vflag=2;
+  #endif
+
+  this->set_kernel(eflag,vflag);
+
+  // reallocate per-atom arrays, transfer extra data from the host
+  //   and build the neighbor lists if needed
+
+  int** firstneigh = nullptr;
+  firstneigh = precompute(ago, inum_full, nall, host_x, host_type,
+                          host_amtype, host_amgroup, host_rpole,
+                          host_uind, host_uinp, host_pval, sublo, subhi, tag,
+                          nspecial, special, nspecial15, special15,
+                          eflag_in, vflag_in, eatom, vatom,
+                          host_start, ilist, jnum, cpu_time,
+                          success, host_q, boxlo, prd);
+
+  // ------------------- Resize _fieldp array ------------------------
+
+  if (inum_full>this->_max_fieldp_size) {
+    this->_max_fieldp_size=static_cast<int>(static_cast<double>(inum_full)*1.10);
+    this->_fieldp.resize(this->_max_fieldp_size*8);
+  }
+  *fieldp_ptr=this->_fieldp.host.begin();
+
+  this->_off2_polar = off2_polar;
+  this->_aewald = aewald;
+  const int red_blocks=umutual2b(eflag,vflag);
+
+  // copy field and fieldp from device to host (_fieldp store both arrays, one after another)
+
+  this->_fieldp.update_host(this->_max_fieldp_size*8,false);
+/*
+  printf("GPU lib: _fieldp size = %d: max fieldp size = %d\n",
+    this->_fieldp.cols(), _max_fieldp_size);
+  for (int i = 0; i < 10; i++) {
+    numtyp4* p = (numtyp4*)(&this->_fieldp[4*i]);
+    printf("i = %d; field = %f %f %f\n", i, p->x, p->y, p->z);
+  }
+*/
+  return firstneigh; //nbor->host_jlist.begin()-host_start;
+}
+
 // ---------------------------------------------------------------------------
 // Calculate the real-space induced field, returning field and fieldp
 // ---------------------------------------------------------------------------
@@ -558,7 +708,8 @@ int HippoT::umutual2b(const int eflag, const int vflag) {
   }
 
   this->k_umutual2b.set_size(GX,BX);
-  this->k_umutual2b.run(&this->atom->x, &this->atom->extra, &coeff_amtype, &sp_polar,
+  this->k_umutual2b.run(&this->atom->x, &this->atom->extra,
+                        &coeff_amtype, &coeff_amclass, &sp_polar,
                         &this->nbor->dev_nbor, &this->_nbor_data->begin(),
                         &this->dev_short_nbor, &this->_fieldp, &ainum, &_nall,
                         &nbor_pitch, &this->_threads_per_atom, &this->_aewald,
diff --git a/lib/gpu/lal_hippo.cu b/lib/gpu/lal_hippo.cu
index 45361ed1fb..487e852baf 100644
--- a/lib/gpu/lal_hippo.cu
+++ b/lib/gpu/lal_hippo.cu
@@ -16,7 +16,6 @@
 #if defined(NV_KERNEL) || defined(USE_HIP)
 #include <stdio.h>
 #include "lal_hippo_extra.h"
-//#include "lal_aux_fun1.h"
 #ifdef LAMMPS_SMALLBIG
 #define tagint int
 #endif
@@ -985,10 +984,10 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
     numtyp qiyz = pol3i.x;   // rpole[i][9];
     numtyp qizz = pol3i.y;   // rpole[i][12];
     itype  = pol3i.z;        // amtype[i];
-    iclass = coeff_amtype[itype].w;  // amtype2class[itype];
 
-    numtyp corei = coeff_amclass[itype].z;  // pcore[iclass];
-    numtyp alphai = coeff_amclass[itype].w; // palpha[iclass];
+    iclass = coeff_amtype[itype].w;  // amtype2class[itype];
+    numtyp corei = coeff_amclass[iclass].z;  // pcore[iclass];
+    numtyp alphai = coeff_amclass[iclass].w; // palpha[iclass];
     numtyp vali = polar6[i].x;
 
     for ( ; nbor<nbor_end; nbor+=n_stride) {
@@ -1028,8 +1027,8 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
       const numtyp4 sp_pol = sp_polar[sbmask15(jextra)];
       numtyp factor_mpole = sp_pol.w; // sp_mpole[sbmask15(jextra)];
 
-      numtyp corek = coeff_amclass[jtype].z;  // pcore[jclass];
-      numtyp alphak = coeff_amclass[jtype].w; // palpha[jclass];
+      numtyp corek = coeff_amclass[jclass].z;  // pcore[jclass];
+      numtyp alphak = coeff_amclass[jclass].w; // palpha[jclass];
       numtyp valk = polar6[j].x;
 
       // intermediates involving moments and separation distance
@@ -1249,10 +1248,11 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
 ------------------------------------------------------------------------- */
 
 __kernel void k_hippo_udirect2b(const __global numtyp4 *restrict x_,
-                                 const __global numtyp *restrict extra,
-                                 const __global numtyp4 *restrict coeff,
-                                 const __global numtyp4 *restrict sp_polar,
-                                 const __global int *dev_nbor,
+                                const __global numtyp *restrict extra,
+                                const __global numtyp4 *restrict coeff_amtype,
+                                const __global numtyp4 *restrict coeff_amclass,
+                                const __global numtyp4 *restrict sp_polar,
+                                const __global int *dev_nbor,
                                  const __global int *dev_packed,
                                  const __global int *dev_short_nbor,
                                  __global acctyp4 *restrict fieldp,
@@ -1273,6 +1273,7 @@ __kernel void k_hippo_udirect2b(const __global numtyp4 *restrict x_,
   numtyp4* polar1 = (numtyp4*)(&extra[0]);
   numtyp4* polar2 = (numtyp4*)(&extra[4*nall]);
   numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
+  numtyp4* polar6 = (numtyp4*)(&extra[20*nall]);
 
   if (ii<inum) {
     int numj, nbor, nbor_end;
@@ -1309,13 +1310,11 @@ __kernel void k_hippo_udirect2b(const __global numtyp4 *restrict x_,
     numtyp qizz = pol3i.y;   // rpole[i][12];
     int itype  = pol3i.z;    // amtype[i];
     int igroup = pol3i.w;    // amgroup[i];
-    
-    // debug:
-    // xi__ = ix; xi__.w = itype;
+    int iclass = coeff_amtype[itype].w;  // amtype2class[itype];
 
-    numtyp pdi = coeff[itype].x;
-    numtyp pti = coeff[itype].y;
-    numtyp ddi = coeff[itype].z;
+    numtyp corei = coeff_amclass[iclass].z;  // pcore[iclass];
+    numtyp alphai = coeff_amclass[iclass].w; // palpha[iclass];
+    numtyp vali = polar6[i].x;
 
     numtyp aesq2 = (numtyp)2.0 * aewald*aewald;
     numtyp aesq2n = (numtyp)0.0;
@@ -1360,15 +1359,21 @@ __kernel void k_hippo_udirect2b(const __global numtyp4 *restrict x_,
       numtyp qkzz = pol3j.y; // rpole[j][12];
       int jtype = pol3j.z; // amtype[j];
       int jgroup =  pol3j.w; // amgroup[j];
+      int jclass = coeff_amtype[jtype].w;  // amtype2class[jtype];
 
-      numtyp factor_dscale, factor_pscale;
+      numtyp corek = coeff_amclass[jclass].z;  // pcore[jclass];
+      numtyp alphak = coeff_amclass[jclass].w; // palpha[jclass];
+      numtyp valk = polar6[j].x;
+
+      numtyp factor_wscale, factor_dscale, factor_pscale, factor_uscale;
       const numtyp4 sp_pol = sp_polar[sbmask15(jextra)];
+      factor_wscale = sp_pol.x; // special_polar_wscale[sbmask15(jextra)];
       if (igroup == jgroup) {
-        factor_pscale = sp_pol.y; // sp_polar_piscale[sbmask15(jextra)];
-        factor_dscale = polar_dscale;
+        factor_dscale = factor_pscale = sp_pol.y; // special_polar_piscale[sbmask15(jextra)];
+        factor_uscale = polar_uscale;
       } else {
-        factor_pscale = sp_pol.z; // sp_polar_pscale[sbmask15(jextra)];
-        factor_dscale = (numtyp)1.0;
+        factor_dscale = factor_pscale = sp_pol.z; // special_polar_pscale[sbmask15(jextra)];
+        factor_uscale = (numtyp)1.0;
       }
 
       // intermediates involving moments and separation distance
@@ -1400,50 +1405,42 @@ __kernel void k_hippo_udirect2b(const __global numtyp4 *restrict x_,
         bn[m] = (bfac*bn[m-1]+aefac*exp2a) * r2inv;
       }
 
-      // find the field components for Thole polarization damping
-
-      numtyp scale3 = (numtyp)1.0;
-      numtyp scale5 = (numtyp)1.0;
-      numtyp scale7 = (numtyp)1.0;
-      numtyp damp = pdi * coeff[jtype].x; // pdamp[jtype]
-      if (damp != (numtyp)0.0) {
-        numtyp pgamma = MIN(ddi,coeff[jtype].z); // dirdamp[jtype]
-        if (pgamma != (numtyp)0.0) {
-          damp = pgamma * ucl_powr(r/damp,(numtyp)1.5);
-          if (damp < (numtyp)50.0) {
-            numtyp expdamp = ucl_exp(-damp) ;
-            scale3 = (numtyp)1.0 - expdamp ;
-            scale5 = (numtyp)1.0 - expdamp*((numtyp)1.0+(numtyp)0.5*damp);
-            scale7 = (numtyp)1.0 - expdamp*((numtyp)1.0+(numtyp)0.65*damp + (numtyp)0.15*damp*damp);
-          }
-        } else {
-          pgamma = MIN(pti,coeff[jtype].y); // thole[jtype]
-          damp = pgamma * ucl_powr(r/damp,(numtyp)3.0);
-          if (damp < (numtyp)50.0) {
-            numtyp expdamp = ucl_exp(-damp);
-            scale3 = (numtyp)1.0 - expdamp;
-            scale5 = (numtyp)1.0 - expdamp*((numtyp)1.0+damp);
-            scale7 = (numtyp)1.0 - expdamp*((numtyp)1.0+damp + (numtyp)0.6*damp*damp);
-          }
-        }
-      } else { // damp == 0: ???
-      }
-
+      // find the field components for charge penetration damping
+      numtyp dmpi[7],dmpk[7];
+      dampdir(r,alphai,alphak,dmpi,dmpk);
+          
       numtyp scalek = factor_dscale;
-      bcn[0] = bn[1] - ((numtyp)1.0-scalek*scale3)*rr3;
-      bcn[1] = bn[2] - ((numtyp)1.0-scalek*scale5)*rr5;
-      bcn[2] = bn[3] - ((numtyp)1.0-scalek*scale7)*rr7;
-      fid[0] = -xr*(bcn[0]*ck-bcn[1]*dkr+bcn[2]*qkr) - bcn[0]*dkx + (numtyp)2.0*bcn[1]*qkx;
-      fid[1] = -yr*(bcn[0]*ck-bcn[1]*dkr+bcn[2]*qkr) - bcn[0]*dky + (numtyp)2.0*bcn[1]*qky;
-      fid[2] = -zr*(bcn[0]*ck-bcn[1]*dkr+bcn[2]*qkr) - bcn[0]*dkz + (numtyp)2.0*bcn[1]*qkz;
-        
+      numtyp rr3i = bn[1] - ((numtyp)1.0-scalek*dmpi[2])*rr3;
+      numtyp rr5i = bn[2] - ((numtyp)1.0-scalek*dmpi[4])*rr5;
+      numtyp rr7i = bn[3] - ((numtyp)1.0-scalek*dmpi[6])*rr7;
+      numtyp rr3k = bn[1] - ((numtyp)1.0-scalek*dmpk[2])*rr3;
+      numtyp rr5k = bn[2] - ((numtyp)1.0-scalek*dmpk[4])*rr5;
+      numtyp rr7k = bn[3] - ((numtyp)1.0-scalek*dmpk[6])*rr7;
+      rr3 = bn[1] - ((numtyp)1.0-scalek)*rr3;
+      fid[0] = -xr*(rr3*corek + rr3k*valk - rr5k*dkr + rr7k*qkr) -
+        rr3k*dkx + (numtyp)2.0*rr5k*qkx;
+      fid[1] = -yr*(rr3*corek + rr3k*valk - rr5k*dkr + rr7k*qkr) -
+        rr3k*dky + (numtyp)2.0*rr5k*qky;
+      fid[2] = -zr*(rr3*corek + rr3k*valk - rr5k*dkr + rr7k*qkr) -
+        rr3k*dkz + (numtyp)2.0*rr5k*qkz;
+
       scalek = factor_pscale;
-      bcn[0] = bn[1] - ((numtyp)1.0-scalek*scale3)*rr3;
-      bcn[1] = bn[2] - ((numtyp)1.0-scalek*scale5)*rr5;
-      bcn[2] = bn[3] - ((numtyp)1.0-scalek*scale7)*rr7;
-      fip[0] = -xr*(bcn[0]*ck-bcn[1]*dkr+bcn[2]*qkr) - bcn[0]*dkx + (numtyp)2.0*bcn[1]*qkx;
-      fip[1] = -yr*(bcn[0]*ck-bcn[1]*dkr+bcn[2]*qkr) - bcn[0]*dky + (numtyp)2.0*bcn[1]*qky;
-      fip[2] = -zr*(bcn[0]*ck-bcn[1]*dkr+bcn[2]*qkr) - bcn[0]*dkz + (numtyp)2.0*bcn[1]*qkz;
+      rr3 = r2inv * rr1;
+      rr3i = bn[1] - ((numtyp)1.0-scalek*dmpi[2])*rr3;
+      rr5i = bn[2] - ((numtyp)1.0-scalek*dmpi[4])*rr5;
+      rr7i = bn[3] - ((numtyp)1.0-scalek*dmpi[6])*rr7;
+      rr3k = bn[1] - ((numtyp)1.0-scalek*dmpk[2])*rr3;
+      rr5k = bn[2] - ((numtyp)1.0-scalek*dmpk[4])*rr5;
+      rr7k = bn[3] - ((numtyp)1.0-scalek*dmpk[6])*rr7;
+      rr3 = bn[1] - ((numtyp)1.0-scalek)*rr3;
+      fip[0] = -xr*(rr3*corek + rr3k*valk - rr5k*dkr + rr7k*qkr) -
+        rr3k*dkx + (numtyp)2.0*rr5k*qkx;
+      fip[1] = -yr*(rr3*corek + rr3k*valk - rr5k*dkr + rr7k*qkr) -
+        rr3k*dky + (numtyp)2.0*rr5k*qky;
+      fip[2] = -zr*(rr3*corek + rr3k*valk - rr5k*dkr + rr7k*qkr) -
+        rr3k*dkz + (numtyp)2.0*rr5k*qkz;
+          
+      // find terms needed later to compute mutual polarization
 
       _fieldp[0] += fid[0];
       _fieldp[1] += fid[1];
@@ -1467,17 +1464,18 @@ __kernel void k_hippo_udirect2b(const __global numtyp4 *restrict x_,
 ------------------------------------------------------------------------- */
 
 __kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
-                                 const __global numtyp *restrict extra,
-                                 const __global numtyp4 *restrict coeff,
-                                 const __global numtyp4 *restrict sp_polar,
-                                 const __global int *dev_nbor,
-                                 const __global int *dev_packed,
-                                 const __global int *dev_short_nbor,
-                                 __global acctyp4 *restrict fieldp,
-                                 const int inum,  const int nall,
-                                 const int nbor_pitch, const int t_per_atom,
-                                 const numtyp aewald, const numtyp off2,
-                                 const numtyp polar_dscale, const numtyp polar_uscale)
+                                const __global numtyp *restrict extra,
+                                const __global numtyp4 *restrict coeff_amtype,
+                                const __global numtyp4 *restrict coeff_amclass,
+                                const __global numtyp4 *restrict sp_polar,
+                                const __global int *dev_nbor,
+                                const __global int *dev_packed,
+                                const __global int *dev_short_nbor,
+                                __global acctyp4 *restrict fieldp,
+                                const int inum,  const int nall,
+                                const int nbor_pitch, const int t_per_atom,
+                                const numtyp aewald, const numtyp off2,
+                                const numtyp polar_dscale, const numtyp polar_uscale)
 {
   int tid, ii, offset, i;
   atom_info(t_per_atom,ii,tid,offset);
@@ -1491,6 +1489,7 @@ __kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
   numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
   numtyp4* polar4 = (numtyp4*)(&extra[12*nall]);
   numtyp4* polar5 = (numtyp4*)(&extra[16*nall]);
+  numtyp4* polar6 = (numtyp4*)(&extra[20*nall]);
 
   //numtyp4 xi__;
 
@@ -1518,9 +1517,11 @@ __kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
     
     itype  = polar3[i].z; // amtype[i];
     igroup = polar3[i].w; // amgroup[i];
-    
-    numtyp pdi = coeff[itype].x;
-    numtyp pti = coeff[itype].y;
+
+    int iclass = coeff_amtype[itype].w;  // amtype2class[itype];
+    numtyp corei = coeff_amclass[iclass].z;  // pcore[iclass];
+    numtyp alphai = coeff_amclass[iclass].w; // palpha[iclass];
+    numtyp vali = polar6[i].x;
 
     numtyp aesq2 = (numtyp)2.0 * aewald*aewald;
     numtyp aesq2n = (numtyp)0.0;
@@ -1561,9 +1562,21 @@ __kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
       numtyp ukyp = pol5j.y; // uinp[j][1];
       numtyp ukzp = pol5j.z; // uinp[j][2];
 
-      numtyp factor_uscale;
-      if (igroup == jgroup) factor_uscale = polar_uscale;
-      else factor_uscale = (numtyp)1.0;
+      int jclass = coeff_amtype[jtype].w;  // amtype2class[jtype];
+      numtyp corek = coeff_amclass[jclass].z;  // pcore[jclass];
+      numtyp alphak = coeff_amclass[jclass].w; // palpha[jclass];
+      numtyp valk = polar6[j].x;
+
+      numtyp factor_wscale, factor_dscale, factor_pscale, factor_uscale;
+      const numtyp4 sp_pol = sp_polar[sbmask15(jextra)];
+      factor_wscale = sp_pol.x; // special_polar_wscale[sbmask15(jextra)];
+      if (igroup == jgroup) {
+        factor_dscale = factor_pscale = sp_pol.y; // special_polar_piscale[sbmask15(jextra)];
+        factor_uscale = polar_uscale;
+      } else {
+        factor_dscale = factor_pscale = sp_pol.z; // special_polar_pscale[sbmask15(jextra)];
+        factor_uscale = (numtyp)1.0;
+      }
 
       // calculate the real space Ewald error function terms
 
@@ -1583,32 +1596,20 @@ __kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
 
       // find terms needed later to compute mutual polarization
       // if (poltyp != DIRECT) 
-      numtyp scale3 = (numtyp)1.0;
-      numtyp scale5 = (numtyp)1.0;
-      numtyp damp = pdi * coeff[jtype].x; // pdamp[jtype]
-      if (damp != (numtyp)0.0) {
-        numtyp pgamma = MIN(pti,coeff[jtype].y); // thole[jtype]
-        damp = pgamma * ucl_powr(r/damp,(numtyp)3.0);
-        if (damp < (numtyp)50.0) {
-          numtyp expdamp = ucl_exp(-damp);
-          scale3 = (numtyp)1.0 - expdamp;
-          scale5 = (numtyp)1.0 - expdamp*((numtyp)1.0+damp);
-        }
-        
-      } else { // damp == 0: ???
-      }
+      numtyp dmpik[5];
+      dampmut(r,alphai,alphak,dmpik);
+      numtyp scalek = factor_wscale;
+      rr3 = r2inv * rr1;
+      numtyp rr3ik = bn[1] - ((numtyp)1.0-scalek*dmpik[2])*rr3;
+      numtyp rr5ik = bn[2] - ((numtyp)1.0-scalek*dmpik[4])*rr5;
 
-      numtyp scalek = factor_uscale;
-      bcn[0] = bn[1] - ((numtyp)1.0-scalek*scale3)*rr3;
-      bcn[1] = bn[2] - ((numtyp)1.0-scalek*scale5)*rr5;
-
-      numtyp tdipdip[6]; // the following tdipdip is incorrect!! needs work to store tdipdip
-      tdipdip[0] = -bcn[0] + bcn[1]*xr*xr;
-      tdipdip[1] = bcn[1]*xr*yr;
-      tdipdip[2] = bcn[1]*xr*zr;
-      tdipdip[3] = -bcn[0] + bcn[1]*yr*yr;
-      tdipdip[4] = bcn[1]*yr*zr;
-      tdipdip[5] = -bcn[0] + bcn[1]*zr*zr;
+      numtyp tdipdip[6];
+      tdipdip[0] = -rr3ik + rr5ik*xr*xr;
+      tdipdip[1] = rr5ik*xr*yr;
+      tdipdip[2] = rr5ik*xr*zr;
+      tdipdip[3] = -rr3ik + rr5ik*yr*yr;
+      tdipdip[4] = rr5ik*yr*zr;
+      tdipdip[5] = -rr3ik + rr5ik*zr*zr;
       //if (i==0 && j == 10) 
       //  printf("i = %d: j = %d: tdipdip %f %f %f %f %f %f\n",
       //    i, j,tdipdip[0],tdipdip[1],tdipdip[2],tdipdip[3],tdipdip[4],tdipdip[5]);
diff --git a/lib/gpu/lal_hippo.h b/lib/gpu/lal_hippo.h
index 36e7a81237..a6742eee28 100644
--- a/lib/gpu/lal_hippo.h
+++ b/lib/gpu/lal_hippo.h
@@ -90,6 +90,40 @@ class Hippo : public BaseAmoeba<numtyp, acctyp> {
                 const double aewald, const double felec, const double off2_mpole, double *charge,
                 double *boxlo, double *prd, void **tep_ptr);
 
+   /// Compute the real space part of the permanent field (udirect2b) with device neighboring
+   virtual int** compute_udirect2b(const int ago, const int inum_full,
+                  const int nall, double **host_x,
+                  int *host_type, int *host_amtype,
+                  int *host_amgroup, double **host_rpole,
+                  double **host_uind, double **host_uinp, double* host_pval,
+                  double *sublo, double *subhi, tagint *tag,
+                  int **nspecial, tagint **special,
+                  int *nspecial15, tagint **special15,
+                  const bool eflag_in, const bool vflag_in,
+                  const bool eatom, const bool vatom,
+                  int &host_start, int **ilist, int **jnum,
+                  const double cpu_time, bool &success,
+                  const double aewald, const double off2_polar,
+                  double *host_q, double *boxlo, double *prd,
+                  void** fieldp_ptr);
+
+   /// Compute the real space part of the induced field (umutual2b) with device neighboring
+   virtual int** compute_umutual2b(const int ago, const int inum_full,
+                                     const int nall, double **host_x,
+                                     int *host_type, int *host_amtype,
+                                     int *host_amgroup, double **host_rpole,
+                                     double **host_uind, double **host_uinp, double *host_pval,
+                                     double *sublo, double *subhi, tagint *tag,
+                                     int **nspecial, tagint **special,
+                                     int *nspecial15, tagint **special15,
+                                     const bool eflag_in, const bool vflag_in,
+                                     const bool eatom, const bool vatom,
+                                     int &host_start, int **ilist, int **jnum,
+                                     const double cpu_time, bool &success,
+                                     const double aewald, const double off2_polar,
+                                     double *host_q, double *boxlo, double *prd,
+                                     void** fieldp_ptr);
+
   /// Compute polar real-space with device neighboring
   virtual int** compute_polar_real(const int ago, const int inum_full, const int nall,
                 double **host_x, int *host_type, int *host_amtype,
diff --git a/lib/gpu/lal_hippo_ext.cpp b/lib/gpu/lal_hippo_ext.cpp
index 1851c3aba3..16b697d88f 100644
--- a/lib/gpu/lal_hippo_ext.cpp
+++ b/lib/gpu/lal_hippo_ext.cpp
@@ -157,7 +157,7 @@ int** hippo_gpu_compute_multipole_real(const int ago, const int inum_full,
 int** hippo_gpu_compute_udirect2b(const int ago, const int inum_full,
                            const int nall, double **host_x, int *host_type,
                            int *host_amtype, int *host_amgroup, double **host_rpole,
-                           double **host_uind, double **host_uinp,
+                           double **host_uind, double **host_uinp, double *host_pval,
                            double *sublo, double *subhi, tagint *tag, int **nspecial,
                            tagint **special, int *nspecial15, tagint** special15,
                            const bool eflag, const bool vflag, const bool eatom,
@@ -166,7 +166,7 @@ int** hippo_gpu_compute_udirect2b(const int ago, const int inum_full,
                            bool &success,  const double aewald, const double off2, double *host_q,
                            double *boxlo, double *prd, void **fieldp_ptr) {
   return HIPPOMF.compute_udirect2b(ago, inum_full, nall, host_x, host_type,
-                          host_amtype, host_amgroup, host_rpole, host_uind, host_uinp,
+                          host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_pval,
                           sublo, subhi, tag, nspecial, special, nspecial15, special15,
                           eflag, vflag, eatom, vatom, host_start, ilist, jnum,
                           cpu_time, success, aewald, off2, host_q, boxlo, prd, fieldp_ptr);
@@ -175,7 +175,7 @@ int** hippo_gpu_compute_udirect2b(const int ago, const int inum_full,
 int** hippo_gpu_compute_umutual2b(const int ago, const int inum_full,
                            const int nall, double **host_x, int *host_type,
                            int *host_amtype, int *host_amgroup, double **host_rpole,
-                           double **host_uind, double **host_uinp,
+                           double **host_uind, double **host_uinp, double *host_pval, 
                            double *sublo, double *subhi, tagint *tag, int **nspecial,
                            tagint **special, int *nspecial15, tagint** special15,
                            const bool eflag, const bool vflag,
@@ -184,7 +184,7 @@ int** hippo_gpu_compute_umutual2b(const int ago, const int inum_full,
                            bool &success, const double aewald, const double off2, double *host_q,
                            double *boxlo, double *prd, void **fieldp_ptr) {
   return HIPPOMF.compute_umutual2b(ago, inum_full, nall, host_x, host_type,
-                          host_amtype, host_amgroup, host_rpole, host_uind, host_uinp,
+                          host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_pval,
                           sublo, subhi, tag, nspecial, special, nspecial15, special15,
                           eflag, vflag, eatom, vatom, host_start, ilist, jnum,
                           cpu_time, success, aewald, off2, host_q, boxlo, prd, fieldp_ptr);
diff --git a/lib/gpu/lal_hippo_extra.h b/lib/gpu/lal_hippo_extra.h
index a06ac4425c..cacee4ae72 100644
--- a/lib/gpu/lal_hippo_extra.h
+++ b/lib/gpu/lal_hippo_extra.h
@@ -59,7 +59,7 @@ ucl_inline void damprep(const numtyp r, const numtyp r2, const numtyp rr1,
   // compute tolerance value for damping exponents
 
   eps = (numtyp)0.001;
-  diff = dmpi-dmpk; 
+  diff = dmpi-dmpk; // fabs(dmpi-dmpk)
   if (diff < (numtyp)0) diff = -diff;
 
   // treat the case where alpha damping exponents are equal
@@ -322,6 +322,109 @@ ucl_inline void damppole(const numtyp r, const int rorder,
   }
 }
 
+/* ----------------------------------------------------------------------
+   dampdir = direct field damping coefficents
+   dampdir generates coefficients for the direct field damping
+   function for powers of the interatomic distance
+------------------------------------------------------------------------- */
 
+ucl_inline void dampdir(numtyp r, numtyp alphai, numtyp alphak, numtyp *dmpi, numtyp *dmpk)
+{
+  numtyp eps,diff;
+  numtyp expi,expk;
+  numtyp dampi,dampk;
+  numtyp dampi2,dampk2;
+  numtyp dampi3,dampk3;
+  numtyp dampi4,dampk4;
+
+  // compute tolerance and exponential damping factors
+
+  eps = (numtyp)0.001;
+  diff = alphai-alphak; // fabs(alphai-alphak);
+  if (diff < (numtyp)0) diff = -diff;
+  dampi = alphai * r;
+  dampk = alphak * r;
+  expi = ucl_exp(-dampi);
+  expk = ucl_exp(-dampk);
+
+  // core-valence charge penetration damping for Gordon f1 (HIPPO)
+
+  dampi2 = dampi * dampi;
+  dampi3 = dampi * dampi2;
+  dampi4 = dampi2 * dampi2;
+  dmpi[2] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2)*expi;
+  dmpi[4] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0)*expi;
+  dmpi[6] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 + dampi4/(numtyp)30.0)*expi;
+  if (diff < eps) {
+    dmpk[2] = dmpi[2];
+    dmpk[4] = dmpi[4];
+    dmpk[6] = dmpi[6];
+  } else {
+    dampk2 = dampk * dampk;
+    dampk3 = dampk * dampk2;
+    dampk4 = dampk2 * dampk2;
+    dmpk[2] = (numtyp)1.0 - ((numtyp)1.0 + dampk + (numtyp)0.5*dampk2)*expk;
+    dmpk[4] = (numtyp)1.0 - ((numtyp)1.0 + dampk + (numtyp)0.5*dampk2 + dampk3/(numtyp)6.0)*expk;
+    dmpk[6] = (numtyp)1.0 - ((numtyp)1.0 + dampk + (numtyp)0.5*dampk2 + dampk3/(numtyp)6.0 + dampk4/30.0)*expk;
+  }
+}
+
+/* ----------------------------------------------------------------------
+   dampmut = mutual field damping coefficents
+   dampmut generates coefficients for the mutual field damping
+   function for powers of the interatomic distance
+------------------------------------------------------------------------- */
+
+ucl_inline void dampmut(numtyp r, numtyp alphai, numtyp alphak, numtyp dmpik[5])
+{
+  numtyp termi,termk;
+  numtyp termi2,termk2;
+  numtyp alphai2,alphak2;
+  numtyp eps,diff;
+  numtyp expi,expk;
+  numtyp dampi,dampk;
+  numtyp dampi2,dampi3;
+  numtyp dampi4,dampi5;
+  numtyp dampk2,dampk3;
+
+  // compute tolerance and exponential damping factors
+
+  eps = (numtyp)0.001;
+  diff = alphai-alphak; // fabs(alphai-alphak);
+  if (diff < (numtyp)0) diff = -diff;
+  dampi = alphai * r;
+  dampk = alphak * r;
+  expi = ucl_exp(-dampi);
+  expk = ucl_exp(-dampk);
+
+  // valence-valence charge penetration damping for Gordon f1 (HIPPO)
+
+  dampi2 = dampi * dampi;
+  dampi3 = dampi * dampi2;
+  if (diff < eps) {
+    dampi4 = dampi2 * dampi2;
+    dampi5 = dampi2 * dampi3;
+    dmpik[2] = 1.0 - (1.0 + dampi + 0.5*dampi2 + 
+                      7.0*dampi3/48.0 + dampi4/48.0)*expi;
+    dmpik[4] = 1.0 - (1.0 + dampi + 0.5*dampi2 + dampi3/6.0 + 
+                      dampi4/24.0 + dampi5/144.0)*expi;
+  } else {
+    dampk2 = dampk * dampk;
+    dampk3 = dampk * dampk2;
+    alphai2 = alphai * alphai;
+    alphak2 = alphak * alphak;
+    termi = alphak2 / (alphak2-alphai2);
+    termk = alphai2 / (alphai2-alphak2);
+    termi2 = termi * termi;
+    termk2 = termk * termk;
+    dmpik[2] = 1.0 - termi2*(1.0+dampi+0.5*dampi2)*expi - 
+      termk2*(1.0+dampk+0.5*dampk2)*expk - 
+      2.0*termi2*termk*(1.0+dampi)*expi - 2.0*termk2*termi*(1.0+dampk)*expk;
+    dmpik[4] = 1.0 - termi2*(1.0+dampi+0.5*dampi2 + dampi3/6.0)*expi - 
+      termk2*(1.0+dampk+0.5*dampk2 + dampk3/6.00)*expk - 
+      2.0*termi2*termk *(1.0+dampi+dampi2/3.0)*expi - 
+      2.0*termk2*termi *(1.0+dampk+dampk2/3.0)*expk;
+  }
+}
 
 #endif
diff --git a/src/AMOEBA/amoeba_induce.cpp b/src/AMOEBA/amoeba_induce.cpp
index 5b855abdd0..617eb89fcd 100644
--- a/src/AMOEBA/amoeba_induce.cpp
+++ b/src/AMOEBA/amoeba_induce.cpp
@@ -1900,7 +1900,7 @@ void PairAmoeba::dampmut(double r, double alphai, double alphak, double *dmpik)
 ------------------------------------------------------------------------- */
 
 void PairAmoeba::dampdir(double r, double alphai, double alphak, 
-                         double *dmpi, double *dmpk)
+                         double dmpi[7], double dmpk[7])
 {
   double eps,diff;
   double expi,expk;
diff --git a/src/GPU/pair_hippo_gpu.cpp b/src/GPU/pair_hippo_gpu.cpp
index 1067969c7b..f4cbf28561 100644
--- a/src/GPU/pair_hippo_gpu.cpp
+++ b/src/GPU/pair_hippo_gpu.cpp
@@ -89,7 +89,7 @@ int ** hippo_gpu_compute_multipole_real(const int ago, const int inum, const int
 int ** hippo_gpu_compute_udirect2b(const int ago, const int inum, const int nall,
               double **host_x, int *host_type, int *host_amtype, int *host_amgroup,
               double **host_rpole, double **host_uind, double **host_uinp, 
-              double *sublo, double *subhi, tagint *tag, int **nspecial,
+              double *host_pval, double *sublo, double *subhi, tagint *tag, int **nspecial,
               tagint **special, int* nspecial15, tagint** special15,
               const bool eflag, const bool vflag, const bool eatom, const bool vatom,
               int &host_start, int **ilist, int **jnum, const double cpu_time,
@@ -98,7 +98,7 @@ int ** hippo_gpu_compute_udirect2b(const int ago, const int inum, const int nall
 
 int ** hippo_gpu_compute_umutual2b(const int ago, const int inum, const int nall,
               double **host_x, int *host_type, int *host_amtype, int *host_amgroup,
-              double **host_rpole, double **host_uind, double **host_uinp, 
+              double **host_rpole, double **host_uind, double **host_uinp, double *host_pval,
               double *sublo, double *subhi, tagint *tag, int **nspecial,
               tagint **special, int* nspecial15, tagint** special15,
               const bool eflag, const bool vflag, const bool eatom, const bool vatom,
@@ -136,8 +136,8 @@ PairHippoGPU::PairHippoGPU(LAMMPS *lmp) : PairAmoeba(lmp), gpu_mode(GPU_FORCE)
   gpu_repulsion_ready = false;         // true for HIPPO when ready
   gpu_dispersion_real_ready = true;   // true for HIPPO when ready
   gpu_multipole_real_ready = true;
-  gpu_udirect2b_ready = false;
-  gpu_umutual2b_ready = false;
+  gpu_udirect2b_ready = true;
+  gpu_umutual2b_ready = true;
   gpu_polar_real_ready = true;
 
   GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
@@ -791,7 +791,7 @@ void PairHippoGPU::udirect2b(double **field, double **fieldp)
 
   firstneigh = hippo_gpu_compute_udirect2b(neighbor->ago, inum, nall, atom->x,
                                             atom->type, amtype, amgroup, rpole,
-                                            uind, uinp, sublo, subhi, atom->tag,
+                                            uind, uinp, pval, sublo, subhi, atom->tag,
                                             atom->nspecial, atom->special,
                                             atom->nspecial15, atom->special15,
                                             eflag, vflag, eflag_atom, vflag_atom,
@@ -1015,14 +1015,14 @@ void PairHippoGPU::umutual2b(double **field, double **fieldp)
   else choose(POLAR);
 
   firstneigh = hippo_gpu_compute_umutual2b(neighbor->ago, inum, nall, atom->x,
-                                            atom->type, amtype, amgroup, rpole,
-                                            uind, uinp, sublo, subhi, atom->tag,
-                                            atom->nspecial, atom->special,
-                                            atom->nspecial15, atom->special15,
-                                            eflag, vflag, eflag_atom, vflag_atom,
-                                            host_start, &ilist, &numneigh, cpu_time,
-                                            success,aewald, off2, atom->q,
-                                            domain->boxlo, domain->prd, &fieldp_pinned);
+                                           atom->type, amtype, amgroup, rpole,
+                                           uind, uinp, pval, sublo, subhi, atom->tag,
+                                           atom->nspecial, atom->special,
+                                           atom->nspecial15, atom->special15,
+                                           eflag, vflag, eflag_atom, vflag_atom,
+                                           host_start, &ilist, &numneigh, cpu_time,
+                                           success,aewald, off2, atom->q,
+                                           domain->boxlo, domain->prd, &fieldp_pinned);
   if (!success)
     error->one(FLERR,"Insufficient memory on accelerator");