diff --git a/lib/gpu/lal_amoeba_ext.cpp b/lib/gpu/lal_amoeba_ext.cpp
index 18e1cf22f8..63ed683833 100644
--- a/lib/gpu/lal_amoeba_ext.cpp
+++ b/lib/gpu/lal_amoeba_ext.cpp
@@ -148,6 +148,10 @@ void amoeba_gpu_compute_umutual2b(int *host_amtype, int *host_amgroup, double **
                              aewald, off2, fieldp_ptr);
 }
 
+void amoeba_gpu_update_fieldp(void **fieldp_ptr) {
+  AMOEBAMF.update_fieldp(fieldp_ptr);
+}
+
 void amoeba_gpu_compute_polar_real(int *host_amtype, int *host_amgroup, double **host_rpole,
                                     double **host_uind, double **host_uinp,
                                     const bool eflag_in, const bool vflag_in,
diff --git a/lib/gpu/lal_base_amoeba.cpp b/lib/gpu/lal_base_amoeba.cpp
index 5b396a641e..781945b77b 100644
--- a/lib/gpu/lal_base_amoeba.cpp
+++ b/lib/gpu/lal_base_amoeba.cpp
@@ -528,8 +528,8 @@ void BaseAmoebaT::compute_umutual2b(int *host_amtype, int *host_amgroup, double
   const int red_blocks=umutual2b(_eflag,_vflag);
 
   // copy field and fieldp from device to host (_fieldp store both arrays, one after another)
-
-  _fieldp.update_host(_max_fieldp_size*8,false);
+  // NOTE: move this step to update_fieldp() to delay device-host transfer
+  //_fieldp.update_host(_max_fieldp_size*8,false);
 }
 
 // ---------------------------------------------------------------------------
diff --git a/lib/gpu/lal_base_amoeba.h b/lib/gpu/lal_base_amoeba.h
index 7f9777061c..f439e2945f 100644
--- a/lib/gpu/lal_base_amoeba.h
+++ b/lib/gpu/lal_base_amoeba.h
@@ -183,6 +183,13 @@ class BaseAmoeba {
                const double off2_polar, double *charge, const int nlocal, double *boxlo,
                double *prd, void **tep_ptr);
 
+  // copy field and fieldp from device to host after umutual2b
+  virtual void update_fieldp(void **fieldp_ptr) {
+    *fieldp_ptr=_fieldp.host.begin();
+     // _fieldp store both arrays, one after another
+    _fieldp.update_host(_max_fieldp_size*8,false);
+  }
+  
   // -------------------------- DEVICE DATA -------------------------
 
   /// Device Properties and Atom and Neighbor storage
diff --git a/lib/gpu/lal_hippo.cpp b/lib/gpu/lal_hippo.cpp
index f62c46aaec..3065bfefd4 100644
--- a/lib/gpu/lal_hippo.cpp
+++ b/lib/gpu/lal_hippo.cpp
@@ -549,8 +549,8 @@ void HippoT::compute_umutual2b(int *host_amtype, int *host_amgroup, double **hos
   const int red_blocks=umutual2b(this->_eflag,this->_vflag);
 
   // copy field and fieldp from device to host (_fieldp store both arrays, one after another)
-
-  this->_fieldp.update_host(this->_max_fieldp_size*8,false);
+  // NOTE: move this step to update_fieldp() to delay device-host transfer
+  //this->_fieldp.update_host(this->_max_fieldp_size*8,false);
 }
 
 // ---------------------------------------------------------------------------
diff --git a/lib/gpu/lal_hippo_ext.cpp b/lib/gpu/lal_hippo_ext.cpp
index 9d3d845ad0..e7deaddbf3 100644
--- a/lib/gpu/lal_hippo_ext.cpp
+++ b/lib/gpu/lal_hippo_ext.cpp
@@ -179,6 +179,10 @@ void hippo_gpu_compute_umutual2b(int *host_amtype, int *host_amgroup, double **h
                             aewald, off2, fieldp_ptr);
 }
 
+void hippo_gpu_update_fieldp(void **fieldp_ptr) {
+  HIPPOMF.update_fieldp(fieldp_ptr);
+}
+
 void hippo_gpu_compute_polar_real(int *host_amtype, int *host_amgroup, double **host_rpole,
                                   double **host_uind, double **host_uinp, double *host_pval,
                                   const bool eflag_in, const bool vflag_in,
diff --git a/src/GPU/pair_amoeba_gpu.cpp b/src/GPU/pair_amoeba_gpu.cpp
index fd9d99e56c..1376a6bd12 100644
--- a/src/GPU/pair_amoeba_gpu.cpp
+++ b/src/GPU/pair_amoeba_gpu.cpp
@@ -82,6 +82,8 @@ void amoeba_gpu_compute_umutual2b(int *host_amtype, int *host_amgroup,
               double **host_rpole, double **host_uind, double **host_uinp,
               const double aewald, const double off2, void **fieldp_ptr);
 
+void amoeba_gpu_update_fieldp(void **fieldp_ptr);
+
 void amoeba_gpu_compute_polar_real(int *host_amtype, int *host_amgroup,
               double **host_rpole, double **host_uind, double **host_uinp,
               const bool eflag, const bool vflag, const bool eatom, const bool vatom,
@@ -844,6 +846,72 @@ void PairAmoebaGPU::udirect2b_cpu()
   }
 }
 
+/* ----------------------------------------------------------------------
+   ufield0c = mutual induction via Ewald sum
+   ufield0c computes the mutual electrostatic field due to
+   induced dipole moments via Ewald summation
+------------------------------------------------------------------------- */
+
+void PairAmoebaGPU::ufield0c(double **field, double **fieldp)
+{
+  int i,j;
+  double term;
+
+  // zero field,fieldp for owned and ghost atoms
+
+  int nlocal = atom->nlocal;
+  int nall = nlocal + atom->nghost;
+
+  for (i = 0; i < nall; i++) {
+    for (j = 0; j < 3; j++) {
+      field[i][j] = 0.0;
+      fieldp[i][j] = 0.0;
+    }
+  }
+
+  // get the real space portion of the mutual field first
+
+  if (polar_rspace_flag) umutual2b(field,fieldp);
+
+  // get the reciprocal space part of the mutual field
+
+  if (polar_kspace_flag) umutual1(field,fieldp);
+
+  // add the self-energy portion of the mutual field
+
+  term = (4.0/3.0) * aewald*aewald*aewald / MY_PIS;
+  for (i = 0; i < nlocal; i++) {
+    for (j = 0; j < 3; j++) {
+      field[i][j] += term*uind[i][j];
+      fieldp[i][j] += term*uinp[i][j];
+    }
+  }
+
+  // accumulate the field and fieldp values from the real space portion from umutual2b() on the GPU
+  //   field and fieldp may already have some nonzero values from kspace (umutual1 and self)
+
+  amoeba_gpu_update_fieldp(&fieldp_pinned);
+  
+  int inum = atom->nlocal;
+  double *field_ptr = (double *)fieldp_pinned;
+
+  for (int i = 0; i < nlocal; i++) {
+    int idx = 4*i;
+    field[i][0] += field_ptr[idx];
+    field[i][1] += field_ptr[idx+1];
+    field[i][2] += field_ptr[idx+2];
+  }
+
+  double* fieldp_ptr = (double *)fieldp_pinned;
+  fieldp_ptr += 4*inum;
+  for (int i = 0; i < nlocal; i++) {
+    int idx = 4*i;
+    fieldp[i][0] += fieldp_ptr[idx];
+    fieldp[i][1] += fieldp_ptr[idx+1];
+    fieldp[i][2] += fieldp_ptr[idx+2];
+  }
+}
+
 /* ----------------------------------------------------------------------
    umutual2b = Ewald real mutual field via list
    umutual2b computes the real space contribution of the induced
@@ -881,7 +949,7 @@ void PairAmoebaGPU::umutual2b(double **field, double **fieldp)
 
   amoeba_gpu_compute_umutual2b(amtype, amgroup, rpole, uind, uinp,
                                aewald, off2, &fieldp_pinned);
-
+/*
   // accumulate the field and fieldp values from the GPU lib
   //   field and fieldp may already have some nonzero values from kspace (umutual1)
 
@@ -903,6 +971,7 @@ void PairAmoebaGPU::umutual2b(double **field, double **fieldp)
     fieldp[i][1] += fieldp_ptr[idx+1];
     fieldp[i][2] += fieldp_ptr[idx+2];
   }
+*/  
 }
 
 /* ----------------------------------------------------------------------
diff --git a/src/GPU/pair_amoeba_gpu.h b/src/GPU/pair_amoeba_gpu.h
index e0210faa68..e419ccd1a1 100644
--- a/src/GPU/pair_amoeba_gpu.h
+++ b/src/GPU/pair_amoeba_gpu.h
@@ -39,6 +39,7 @@ class PairAmoebaGPU : public PairAmoeba {
   virtual void multipole_real();
   virtual void udirect2b(double **, double **);
   virtual void umutual2b(double **, double **);
+  virtual void ufield0c(double **, double **);
   virtual void polar_real();
 
  private:
diff --git a/src/GPU/pair_hippo_gpu.cpp b/src/GPU/pair_hippo_gpu.cpp
index 535be0c160..41c1355fbb 100644
--- a/src/GPU/pair_hippo_gpu.cpp
+++ b/src/GPU/pair_hippo_gpu.cpp
@@ -100,6 +100,8 @@ void hippo_gpu_compute_umutual2b(int *host_amtype, int *host_amgroup,
               double **host_rpole, double **host_uind, double **host_uinp, double *host_pval,
               const double aewald, const double off2, void **fieldp_ptr);
 
+void hippo_gpu_update_fieldp(void **fieldp_ptr);
+
 void hippo_gpu_compute_polar_real(int *host_amtype, int *host_amgroup,
               double **host_rpole, double **host_uind, double **host_uinp, double *host_pval,
               const bool eflag, const bool vflag, const bool eatom, const bool vatom,
@@ -983,6 +985,72 @@ void PairHippoGPU::udirect2b_cpu()
   }
 }
 
+/* ----------------------------------------------------------------------
+   ufield0c = mutual induction via Ewald sum
+   ufield0c computes the mutual electrostatic field due to
+   induced dipole moments via Ewald summation
+------------------------------------------------------------------------- */
+
+void PairHippoGPU::ufield0c(double **field, double **fieldp)
+{
+  int i,j;
+  double term;
+
+  // zero field,fieldp for owned and ghost atoms
+
+  int nlocal = atom->nlocal;
+  int nall = nlocal + atom->nghost;
+
+  for (i = 0; i < nall; i++) {
+    for (j = 0; j < 3; j++) {
+      field[i][j] = 0.0;
+      fieldp[i][j] = 0.0;
+    }
+  }
+
+  // get the real space portion of the mutual field first
+
+  if (polar_rspace_flag) umutual2b(field,fieldp);
+
+  // get the reciprocal space part of the mutual field
+
+  if (polar_kspace_flag) umutual1(field,fieldp);
+
+  // add the self-energy portion of the mutual field
+
+  term = (4.0/3.0) * aewald*aewald*aewald / MY_PIS;
+  for (i = 0; i < nlocal; i++) {
+    for (j = 0; j < 3; j++) {
+      field[i][j] += term*uind[i][j];
+      fieldp[i][j] += term*uinp[i][j];
+    }
+  }
+
+  // accumulate the field and fieldp values from real-space portion from umutual2b() on the GPU
+  //   field and fieldp may already have some nonzero values from kspace (umutual1 and self)
+
+  hippo_gpu_update_fieldp(&fieldp_pinned);
+  
+  int inum = atom->nlocal;
+  double *field_ptr = (double *)fieldp_pinned;
+
+  for (int i = 0; i < nlocal; i++) {
+    int idx = 4*i;
+    field[i][0] += field_ptr[idx];
+    field[i][1] += field_ptr[idx+1];
+    field[i][2] += field_ptr[idx+2];
+  }
+
+  double* fieldp_ptr = (double *)fieldp_pinned;
+  fieldp_ptr += 4*inum;
+  for (int i = 0; i < nlocal; i++) {
+    int idx = 4*i;
+    fieldp[i][0] += fieldp_ptr[idx];
+    fieldp[i][1] += fieldp_ptr[idx+1];
+    fieldp[i][2] += fieldp_ptr[idx+2];
+  }
+}
+
 /* ----------------------------------------------------------------------
    umutual2b = Ewald real mutual field via list
    umutual2b computes the real space contribution of the induced
@@ -1021,7 +1089,7 @@ void PairHippoGPU::umutual2b(double **field, double **fieldp)
   double *pval = atom->dvector[index_pval];
   hippo_gpu_compute_umutual2b(amtype, amgroup, rpole, uind, uinp, pval,
                               aewald, off2, &fieldp_pinned);
-
+/*
   // accumulate the field and fieldp values from the GPU lib
   //   field and fieldp may already have some nonzero values from kspace (umutual1)
 
@@ -1043,6 +1111,7 @@ void PairHippoGPU::umutual2b(double **field, double **fieldp)
     fieldp[i][1] += fieldp_ptr[idx+1];
     fieldp[i][2] += fieldp_ptr[idx+2];
   }
+*/
 }
 
 /* ----------------------------------------------------------------------
diff --git a/src/GPU/pair_hippo_gpu.h b/src/GPU/pair_hippo_gpu.h
index c7a4e75ebe..1ed1c3299d 100644
--- a/src/GPU/pair_hippo_gpu.h
+++ b/src/GPU/pair_hippo_gpu.h
@@ -40,6 +40,7 @@ class PairHippoGPU : public PairAmoeba {
   virtual void multipole_real();
   virtual void udirect2b(double **, double **);
   virtual void umutual2b(double **, double **);
+  virtual void ufield0c(double **, double **);
   virtual void polar_real();
 
  private: