diff --git a/lib/gpu/pppm_gpu_kernel.cu b/lib/gpu/pppm_gpu_kernel.cu
index 4cee6f13a7..8211dd84a6 100644
--- a/lib/gpu/pppm_gpu_kernel.cu
+++ b/lib/gpu/pppm_gpu_kernel.cu
@@ -167,7 +167,7 @@ __kernel void make_rho(__global numtyp4 *x_, __global numtyp *q_,
 
   __syncthreads();
 
-  int bt=BLOCK_ID_X/BLOCK_PENCILS;
+  int bt=BLOCK_ID_X*BLOCK_PENCILS+pid;
   int ny=bt%npts_y;
   int nz=bt/npts_y;
   int y_start=0;
diff --git a/lib/gpu/pppm_gpu_memory.cpp b/lib/gpu/pppm_gpu_memory.cpp
index d348df2d2f..0ee11a3f0f 100644
--- a/lib/gpu/pppm_gpu_memory.cpp
+++ b/lib/gpu/pppm_gpu_memory.cpp
@@ -258,7 +258,7 @@ int PPPMGPUMemoryT::compute(const int ago, const int nlocal, const int nall,
   f_brick_z=boxlo[2]+(_nzlo_out-_nlower+shift)/delzinv;
 
   BX=block_size();
-  GX=_npts_y*_npts_z*8;
+  GX=static_cast<int>(ceil(static_cast<double>(_npts_y*_npts_z)/8));
   k_make_rho.set_size(GX,BX);
   k_make_rho.run(&atom->dev_x.begin(), &atom->dev_q.begin(),
                  &d_brick_counts.begin(), &d_brick_atoms.begin(),