diff --git a/lib/gpu/pppm_gpu_memory.cpp b/lib/gpu/pppm_gpu_memory.cpp
index 08b0a792cb..8304d85199 100644
--- a/lib/gpu/pppm_gpu_memory.cpp
+++ b/lib/gpu/pppm_gpu_memory.cpp
@@ -45,7 +45,11 @@ int PPPMGPUMemoryT::bytes_per_atom() const {
 }
 
 template <class numtyp, class acctyp>
-bool PPPMGPUMemoryT::init(const int nlocal, const int nall, FILE *_screen) {
+bool PPPMGPUMemoryT::init(const int nlocal, const int nall, FILE *_screen,
+                          const int order, const int nxlo_out,
+                          const int nylo_out, const int nzlo_out,
+                          const int nxhi_out, const int nyhi_out,
+                          const int nzhi_out, double **rho_coeff) {
   screen=_screen;
 
   if (!device->init(*ans,true,false,nlocal,nall))
@@ -68,6 +72,21 @@ bool PPPMGPUMemoryT::init(const int nlocal, const int nall, FILE *_screen) {
   _allocated=true;
   _max_bytes=0;
   _max_an_bytes=ans->gpu_bytes();
+  
+  _order=order;
+  _nxlo_out=nxlo_out;
+  _nylo_out=nylo_out;
+  _nzlo_out=nzlo_out;
+  _nxhi_out=nxhi_out;
+  _nyhi_out=nyhi_out;
+  _nzhi_out=nzhi_out;
+  
+  int n2lo=(1-order)/2;
+  int numel=order*( order/2 - n2lo + 1 );
+  d_rho_coeff.alloc(numel,*ucl_device,UCL_READ_ONLY);
+  UCL_H_Vec<double> view;
+  view.view(rho_coeff[0]+n2lo,numel,*ucl_device);
+  ucl_copy(d_rho_coeff,view,true);
 
   return true;
 }
@@ -98,8 +117,10 @@ void PPPMGPUMemoryT::clear() {
 // ---------------------------------------------------------------------------
 template <class numtyp, class acctyp>
 void PPPMGPUMemoryT::compute(const int ago, const int nlocal, const int nall,
-                             double **host_x, int *host_type,
-                             bool &success, double *host_q) {
+                             double **host_x, int *host_type, bool &success,
+                             double *host_q, double *boxlo, 
+                             const double delxinv, const double delyinv,
+                             const double delzinv) {
   acc_timers();
   if (nlocal==0) {
     zero_timers();
@@ -132,6 +153,12 @@ void PPPMGPUMemoryT::compute(const int ago, const int nlocal, const int nall,
 
   int ainum=this->ans->inum();
   int anall=this->atom->nall();
+  numtyp f_boxlo_x=boxlo[0];
+  numtyp f_boxlo_y=boxlo[1];
+  numtyp f_boxlo_z=boxlo[2];
+  numtyp f_delxinv=delxinv;
+  numtyp f_delyinv=delyinv;
+  numtyp f_delzinv=delzinv;
 
 //  this->time_pair.start();
 //  this->k_pair.set_size(GX,BX);
diff --git a/lib/gpu/pppm_gpu_memory.h b/lib/gpu/pppm_gpu_memory.h
index 596ed41ae4..e4f43831af 100644
--- a/lib/gpu/pppm_gpu_memory.h
+++ b/lib/gpu/pppm_gpu_memory.h
@@ -37,7 +37,10 @@ class PPPMGPUMemory {
   virtual ~PPPMGPUMemory();
 
   /// Clear any previous data and set up for a new LAMMPS run
-  bool init(const int nlocal, const int nall, FILE *screen);
+  bool init(const int nlocal, const int nall, FILE *screen, const int order,
+            const int nxlo_out, const int nylo_out, const int nzlo_out,
+            const int nxhi_out, const int nyhi_out, const int nzhi_out,
+            double **rho_coeff);
 
   /// Check if there is enough storage for atom arrays and realloc if not
   /** \param success set to false if insufficient memory **/
@@ -75,8 +78,9 @@ class PPPMGPUMemory {
     ans->zero_timers();
   }
 
-  void compute(const int ago, const int nlocal, const int nall, double **host_x,
-               int *host_type, bool &success, double *charge);
+  void compute(const int ago,const int nlocal,const int nall,double **host_x,
+               int *host_type,bool &success,double *charge,double *boxlo,
+               const double delxinv,const double delyinv,const double delzinv);
 
   // -------------------------- DEVICE DATA ------------------------- 
 
@@ -98,6 +102,15 @@ class PPPMGPUMemory {
   PairGPUAtom<numtyp,acctyp> *atom;
 
 
+  // --------------------------- GRID DATA --------------------------
+
+  UCL_H_Vec<numtyp> *h_brick;
+  UCL_D_Vec<numtyp> *d_brick;
+  
+  // -------------------------- STENCIL DATA -------------------------
+  UCL_D_Vec<numtyp> d_rho_coeff;
+  int _order, _nxlo_out, _nylo_out, _nzlo_out, _nxhi_out, _nyhi_out, _nzhi_out;
+
   // ------------------------ FORCE/ENERGY DATA -----------------------
 
   PairGPUAns<numtyp,acctyp> *ans;
diff --git a/lib/gpu/pppm_l_gpu.cpp b/lib/gpu/pppm_l_gpu.cpp
index bd7c6a51f3..99a8b12122 100644
--- a/lib/gpu/pppm_l_gpu.cpp
+++ b/lib/gpu/pppm_l_gpu.cpp
@@ -28,7 +28,10 @@ static PPPMGPUMemory<PRECISION,ACC_PRECISION> PPPMF;
 // ---------------------------------------------------------------------------
 // Allocate memory on host and device and copy constants to device
 // ---------------------------------------------------------------------------
-bool pppm_gpu_init(const int nlocal, const int nall, FILE *screen) {
+bool pppm_gpu_init(const int nlocal, const int nall, FILE *screen,
+                   const int order, const int nxlo_out, const int nylo_out,
+                   const int nzlo_out, const int nxhi_out, const int nyhi_out,
+                   const int nzhi_out, double **rho_coeff) {
   PPPMF.clear();
   int first_gpu=PPPMF.device->first_device();
   int last_gpu=PPPMF.device->last_device();
@@ -48,7 +51,8 @@ bool pppm_gpu_init(const int nlocal, const int nall, FILE *screen) {
   }
 
   if (world_me==0) {
-    bool init_ok=PPPMF.init(nlocal,nall,screen);
+    bool init_ok=PPPMF.init(nlocal,nall,screen,order,nxlo_out,nylo_out,
+                            nzlo_out,nxhi_out,nyhi_out,nzhi_out,rho_coeff);
     if (!init_ok)
       return false;
   }
@@ -67,7 +71,8 @@ bool pppm_gpu_init(const int nlocal, const int nall, FILE *screen) {
       fflush(screen);
     }
     if (gpu_rank==i && world_me!=0) {
-      bool init_ok=PPPMF.init(nlocal,nall,screen);
+      bool init_ok=PPPMF.init(nlocal,nall,screen,order,nxlo_out,nylo_out,
+                              nzlo_out,nxhi_out,nyhi_out,nzhi_out,rho_coeff);
       if (!init_ok)
         return false;
     }
@@ -86,8 +91,10 @@ void pppm_gpu_clear() {
 
 void pppm_gpu_compute(const int ago, const int nlocal, const int nall,
                       double **host_x, int *host_type, bool &success,
-                      double *host_q) {
-  PPPMF.compute(ago,nlocal,nall,host_x,host_type,success,host_q);
+                      double *host_q, double *boxlo, const double delxinv,
+                      const double delyinv, const double delzinv) {
+  PPPMF.compute(ago,nlocal,nall,host_x,host_type,success,host_q,boxlo,delxinv,
+                delyinv,delzinv);
 }
 
 double pppm_gpu_bytes() {
diff --git a/src/GPU/pppm_gpu.cpp b/src/GPU/pppm_gpu.cpp
index 2ea85b164e..c93972beb9 100644
--- a/src/GPU/pppm_gpu.cpp
+++ b/src/GPU/pppm_gpu.cpp
@@ -37,11 +37,15 @@
 
 // External functions from cuda library for atom decomposition
 
-bool pppm_gpu_init(const int nlocal, const int nall, FILE *screen);
+bool pppm_gpu_init(const int nlocal, const int nall, FILE *screen,
+                   const int order, const int nxlo_out, const int nylo_out,
+                   const int nzlo_out, const int nxhi_out, const int nyhi_out,
+                   const int nzhi_out, double **rho_coeff);
 void pppm_gpu_clear();
 void pppm_gpu_compute(const int ago, const int nlocal, const int nall,
                       double **host_x, int *host_type, bool &success,
-                      double *host_q);
+                      double *host_q, double *boxlo, const double delxinv,
+                      const double delyinv, const double delzinv);
 double pppm_gpu_bytes();
 
 using namespace LAMMPS_NS;
@@ -112,8 +116,8 @@ std::cout << "DEBUG_TIMES: " << time1 << " " << time2 << " " << time3
 void PPPMGPU::init()
 {
   if (me == 0) {
-    if (screen) fprintf(screen,"PPPMGPU initialization ...\n");
-    if (logfile) fprintf(logfile,"PPPMGPU initialization ...\n");
+    if (screen) fprintf(screen,"PPPM initialization ...\n");
+    if (logfile) fprintf(logfile,"PPPM initialization ...\n");
   }
 
   // error check
@@ -134,7 +138,7 @@ void PPPMGPU::init()
 
   if (order > MAXORDER) {
     char str[128];
-    sprintf(str,"PPPMGPU order cannot be greater than %d",MAXORDER);
+    sprintf(str,"PPPM order cannot be greater than %d",MAXORDER);
     error->all(str);
   }
 
@@ -220,7 +224,7 @@ void PPPMGPU::init()
     set_grid();
 
     if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET)
-      error->all("PPPMGPU grid is too large");
+      error->all("PPPM grid is too large");
 
     // global indices of PPPMGPU grid range from 0 to N-1
     // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of
@@ -391,7 +395,7 @@ void PPPMGPU::init()
     order--;
   }
 
-  if (order == 0) error->all("PPPMGPU order has been reduced to 0");
+  if (order == 0) error->all("PPPM order has been reduced to 0");
 
   // decomposition of FFT mesh
   // global indices range from 0 to N-1
@@ -492,7 +496,8 @@ void PPPMGPU::init()
   compute_rho_coeff();
 
   bool init_ok = pppm_gpu_init(atom->nlocal, atom->nlocal+atom->nghost,
-                               screen);
+                               screen, order, nxlo_out, nylo_out, nzlo_out,
+                               nxhi_out, nyhi_out, nzhi_out, rho_coeff);
   if (!init_ok)
     error->one("Insufficient memory on accelerator (or no fix gpu).\n"); 
 time1=0; time2=0; time3=0;
@@ -659,7 +664,8 @@ void PPPMGPU::compute(int eflag, int vflag)
 {
   bool success = true;
   pppm_gpu_compute(neighbor->ago, atom->nlocal, atom->nlocal + atom->nghost,
-                   atom->x, atom->type, success, atom->q);
+                   atom->x, atom->type, success, atom->q, domain->boxlo,
+                   delxinv, delyinv, delzinv);
   if (!success)
     error->one("Out of memory on GPGPU");