diff --git a/lib/gpu/pppm_gpu_kernel.cu b/lib/gpu/pppm_gpu_kernel.cu
index 2e9a6ac71f..d9092be85c 100644
--- a/lib/gpu/pppm_gpu_kernel.cu
+++ b/lib/gpu/pppm_gpu_kernel.cu
@@ -20,8 +20,8 @@
 
 #define MAX_STENCIL 8
 #define BLOCK_1D 64
-#define BLOCK_PENCILS 8
-#define PENCIL_SIZE 8
+#define BLOCK_PENCILS 2
+#define PENCIL_SIZE 32
 
 #ifdef _DOUBLE_DOUBLE
 #define numtyp double
diff --git a/lib/gpu/pppm_gpu_memory.cpp b/lib/gpu/pppm_gpu_memory.cpp
index 247c87785e..a2c7da44b4 100644
--- a/lib/gpu/pppm_gpu_memory.cpp
+++ b/lib/gpu/pppm_gpu_memory.cpp
@@ -250,7 +250,7 @@ int PPPMGPUMemoryT::spread(const int ago, const int nlocal, const int nall,
   time_rho.start();
 
   BX=block_size();
-  GX=static_cast<int>(ceil(static_cast<double>(_npts_y*_npts_z)/8));
+  GX=static_cast<int>(ceil(static_cast<double>(_npts_y*_npts_z)/2));
   k_make_rho.set_size(GX,BX);
   k_make_rho.run(&atom->dev_x.begin(), &atom->dev_q.begin(),
                  &d_brick_counts.begin(), &d_brick_atoms.begin(),