diff --git a/lib/gpu/lal_amoeba.cu b/lib/gpu/lal_amoeba.cu
index 105f18cfa8..d67fa4f869 100644
--- a/lib/gpu/lal_amoeba.cu
+++ b/lib/gpu/lal_amoeba.cu
@@ -1646,7 +1646,7 @@ __kernel void k_fphi_uind(const __global numtyp4 *restrict x_,
     //numtyp4 ix; fetch4(ix,ii,pos_tex); //x_[i];
     acctyp fdip_buf[32];
 
-    int j,k,m;    
+    int j,k;    
     int nlpts = (bsorder-1) / 2;
 
     // extract the permanent multipole field at each site
diff --git a/lib/gpu/lal_amoeba_ext.cpp b/lib/gpu/lal_amoeba_ext.cpp
index 95b7237e46..f91b76f688 100644
--- a/lib/gpu/lal_amoeba_ext.cpp
+++ b/lib/gpu/lal_amoeba_ext.cpp
@@ -164,15 +164,16 @@ void amoeba_gpu_compute_polar_real(int *host_amtype, int *host_amgroup, double *
 
 void amoeba_gpu_fphi_uind(const int inum_full, const int bsorder,
                           double ***host_thetai1, double ***host_thetai2,
-                          double ***host_thetai3, int** igrid,
-                          double *host_grid_brick_start, double ****host_grid_brick,
+                          double ***host_thetai3, int** igrid, double ****host_grid_brick,
                           void **host_fdip_phi1, void **host_fdip_phi2, void **host_fdip_sum_phi,
-                          int nzlo_out, int nzhi_out, int nylo_out, int nyhi_out,
-                          int nxlo_out, int nxhi_out, bool& first_iteration) {
+                          const int nzlo_out, const int nzhi_out,
+                          const int nylo_out, const int nyhi_out,
+                          const int nxlo_out, const int nxhi_out,
+                          bool& first_iteration) {
    AMOEBAMF.compute_fphi_uind(inum_full, bsorder, host_thetai1, host_thetai2,
-                          host_thetai3, igrid, host_grid_brick_start, host_grid_brick, host_fdip_phi1,
-                          host_fdip_phi2, host_fdip_sum_phi, nzlo_out, nzhi_out,
-                          nylo_out, nyhi_out,  nxlo_out, nxhi_out, first_iteration);
+                              host_thetai3, igrid, host_grid_brick, host_fdip_phi1,
+                              host_fdip_phi2, host_fdip_sum_phi, nzlo_out, nzhi_out,
+                              nylo_out, nyhi_out, nxlo_out, nxhi_out, first_iteration);
 }
 
 void amoeba_setup_fft(const int numel, const int element_type) {
diff --git a/lib/gpu/lal_base_amoeba.cpp b/lib/gpu/lal_base_amoeba.cpp
index 05b830d773..dfd5565f1e 100644
--- a/lib/gpu/lal_base_amoeba.cpp
+++ b/lib/gpu/lal_base_amoeba.cpp
@@ -186,8 +186,10 @@ void BaseAmoebaT::clear_atomic() {
   _igrid.clear();
   _fdip_phi1.clear();
   _fdip_phi2.clear();
-  _cgrid_brick.clear();
   _fdip_sum_phi.clear();
+  _cgrid_brick.clear();
+  hview_cgrid.clear();
+
   dev_nspecial15.clear();
   dev_special15.clear();
   dev_special15_t.clear();
@@ -563,10 +565,9 @@ template <class numtyp, class acctyp>
 void BaseAmoebaT::precompute_induce(const int inum_full, const int bsorder,
                                     double ***host_thetai1, double ***host_thetai2,
                                     double ***host_thetai3, int** host_igrid,
-                                    double* host_grid_brick_start, double**** host_grid_brick,
-                                    int nzlo_out, int nzhi_out,
-                                    int nylo_out, int nyhi_out,
-                                    int nxlo_out, int nxhi_out) {
+                                    const int nzlo_out, const int nzhi_out,
+                                    const int nylo_out, const int nyhi_out,
+                                    const int nxlo_out, const int nxhi_out) {
   
   _bsorder = bsorder;
 
@@ -642,34 +643,8 @@ void BaseAmoebaT::precompute_induce(const int inum_full, const int bsorder,
     dview_int[idx+1] = host_igrid[i][1];
     dview_int[idx+2] = host_igrid[i][2];
   }
-  ucl_copy(_igrid, dview_int, false);  
-}
+  ucl_copy(_igrid, dview_int, false);
 
-// ---------------------------------------------------------------------------
-// fphi_uind = induced potential from grid
-// fphi_uind extracts the induced dipole potential from the particle mesh Ewald grid
-// ---------------------------------------------------------------------------
-
-template <class numtyp, class acctyp>
-void BaseAmoebaT::compute_fphi_uind(const int inum_full, const int bsorder,
-                                    double ***host_thetai1, double ***host_thetai2,
-                                    double ***host_thetai3, int** host_igrid,
-                                    double *host_grid_brick_start, double ****host_grid_brick,
-                                    void** host_fdip_phi1, void **host_fdip_phi2, void **host_fdip_sum_phi,
-                                    int nzlo_out, int nzhi_out, int nylo_out, int nyhi_out,
-                                    int nxlo_out, int nxhi_out, bool& first_iteration)
-{
-  // allocation/resize and transfers before the first iteration
-  
-  if (first_iteration) {
-    precompute_induce(inum_full, bsorder, host_thetai1, host_thetai2, host_thetai3,
-                      host_igrid, host_grid_brick_start, host_grid_brick, nzlo_out, nzhi_out,
-                      nylo_out, nyhi_out, nxlo_out, nxhi_out);
-    first_iteration = false;
-  }
-
-  // update the cgrid_brick with data host
-  
   _nzlo_out = nzlo_out;
   _nzhi_out = nzhi_out;
   _nylo_out = nylo_out;
@@ -681,8 +656,47 @@ void BaseAmoebaT::compute_fphi_uind(const int inum_full, const int bsorder,
   _ngridx = nxhi_out - nxlo_out + 1;
   _num_grid_points = _ngridx * _ngridy * _ngridz;
 
-  UCL_H_Vec<double> hview_cgrid;
-  hview_cgrid.alloc(_num_grid_points*2, *(this->ucl_device), UCL_READ_WRITE);
+  int numel = _num_grid_points*2;
+  if (_cgrid_brick.cols() == 0) {
+    hview_cgrid.alloc(numel, *(this->ucl_device), UCL_READ_WRITE);
+    _cgrid_brick.alloc(numel, *(this->ucl_device), UCL_READ_ONLY);
+  } else if (numel > _cgrid_brick.cols()) {
+    hview_cgrid.resize(numel);
+    _cgrid_brick.resize(numel);
+  }
+}
+
+// ---------------------------------------------------------------------------
+// fphi_uind = induced potential from grid
+// fphi_uind extracts the induced dipole potential from the particle mesh Ewald grid
+// ---------------------------------------------------------------------------
+
+template <class numtyp, class acctyp>
+void BaseAmoebaT::compute_fphi_uind(const int inum_full, const int bsorder,
+                                    double ***host_thetai1, double ***host_thetai2,
+                                    double ***host_thetai3, int** host_igrid,
+                                    double ****host_grid_brick,
+                                    void** host_fdip_phi1,
+                                    void **host_fdip_phi2,
+                                    void **host_fdip_sum_phi,
+                                    const int nzlo_out, const int nzhi_out,
+                                    const int nylo_out, const int nyhi_out,
+                                    const int nxlo_out, const int nxhi_out,
+                                    bool& first_iteration)
+{
+  // TODO: find out why this alloc helps makes the cgrid_brick ucl_copy work
+  UCL_H_Vec<numtyp> hview;
+  hview.alloc(1, *(this->ucl_device), UCL_READ_ONLY);
+  
+  // allocation/resize and transfers before the first iteration
+  
+  if (first_iteration) {
+    precompute_induce(inum_full, bsorder, host_thetai1, host_thetai2,
+                      host_thetai3, host_igrid, nzlo_out, nzhi_out,
+                      nylo_out,  nyhi_out, nxlo_out, nxhi_out);
+    first_iteration = false;
+  }
+
   int n = 0;
   for (int iz = nzlo_out; iz <= nzhi_out; iz++)
     for (int iy = nylo_out; iy <= nyhi_out; iy++)
@@ -691,10 +705,7 @@ void BaseAmoebaT::compute_fphi_uind(const int inum_full, const int bsorder,
         hview_cgrid[n+1] = host_grid_brick[iz][iy][ix][1];
         n += 2;
       }
-  //hview_cgrid.view(host_grid_brick_start, _num_grid_points*2, *(this->ucl_device));
-  _cgrid_brick.alloc(_num_grid_points*2, *(this->ucl_device), UCL_READ_ONLY);
-  ucl_copy(_cgrid_brick,hview_cgrid,false);
-
+  ucl_copy(_cgrid_brick, hview_cgrid, false);
 
   const int red_blocks = fphi_uind();
 
diff --git a/lib/gpu/lal_base_amoeba.h b/lib/gpu/lal_base_amoeba.h
index c2c2a2d93d..a4a7a8d1a7 100644
--- a/lib/gpu/lal_base_amoeba.h
+++ b/lib/gpu/lal_base_amoeba.h
@@ -153,10 +153,9 @@ class BaseAmoeba {
   virtual void precompute_induce(const int inum_full, const int bsorder,
                                  double ***host_thetai1, double ***host_thetai2,
                                  double ***host_thetai3, int** igrid,
-                                 double *host_grid_brick_start, double ****host_grid_brick,
-                                 int nzlo_out, int nzhi_out,
-                                 int nylo_out, int nyhi_out,
-                                 int nxlo_out, int nxhi_out);
+                                 const int nzlo_out, const int nzhi_out,
+                                 const int nylo_out, const int nyhi_out,
+                                 const int nxlo_out, const int nxhi_out);
 
   /// Compute multipole real-space with device neighboring
   virtual int** compute_multipole_real(const int ago, const int inum_full, const int nall,
@@ -183,10 +182,12 @@ class BaseAmoeba {
   virtual void compute_fphi_uind(const int inum_full, const int bsorder,
                                  double ***host_thetai1, double ***host_thetai2,
                                  double ***host_thetai3, int** igrid,
-                                 double *host_grid_brick_start, double ****host_grid_brick,
+                                 double ****host_grid_brick,
                                  void **host_fdip_phi1, void **host_fdip_phi2, void **host_fdip_sum_phi,
-                                 int nzlo_out, int nzhi_out, int nylo_out, int nyhi_out,
-                                 int nxlo_out, int nxhi_out, bool& first_iteration);
+                                 const int nzlo_out, const int nzhi_out,
+                                 const int nylo_out, const int nyhi_out,
+                                 const int nxlo_out, const int nxhi_out,
+                                 bool& first_iteration);
 
   /// Compute polar real-space with device neighboring
   virtual void compute_polar_real(int *host_amtype, int *host_amgroup, double **host_rpole,
@@ -255,7 +256,9 @@ class BaseAmoeba {
   int _nmax, _max_tep_size, _max_fieldp_size;
 
   int _bsorder;
-  UCL_D_Vec<numtyp> _thetai1, _thetai2, _thetai3, _cgrid_brick;
+  UCL_D_Vec<numtyp> _thetai1, _thetai2, _thetai3;
+  UCL_H_Vec<numtyp> hview_cgrid;
+  UCL_D_Vec<numtyp> _cgrid_brick;
   UCL_D_Vec<int> _igrid;
   UCL_Vector<numtyp,numtyp> _fdip_phi1, _fdip_phi2, _fdip_sum_phi;
   int _max_thetai_size;