diff --git a/lib/gpu/pppm_gpu_kernel.cu b/lib/gpu/pppm_gpu_kernel.cu index 2e9a6ac71f..d9092be85c 100644 --- a/lib/gpu/pppm_gpu_kernel.cu +++ b/lib/gpu/pppm_gpu_kernel.cu @@ -20,8 +20,8 @@ #define MAX_STENCIL 8 #define BLOCK_1D 64 -#define BLOCK_PENCILS 8 -#define PENCIL_SIZE 8 +#define BLOCK_PENCILS 2 +#define PENCIL_SIZE 32 #ifdef _DOUBLE_DOUBLE #define numtyp double diff --git a/lib/gpu/pppm_gpu_memory.cpp b/lib/gpu/pppm_gpu_memory.cpp index 247c87785e..a2c7da44b4 100644 --- a/lib/gpu/pppm_gpu_memory.cpp +++ b/lib/gpu/pppm_gpu_memory.cpp @@ -250,7 +250,7 @@ int PPPMGPUMemoryT::spread(const int ago, const int nlocal, const int nall, time_rho.start(); BX=block_size(); - GX=static_cast(ceil(static_cast(_npts_y*_npts_z)/8)); + GX=static_cast(ceil(static_cast(_npts_y*_npts_z)/2)); k_make_rho.set_size(GX,BX); k_make_rho.run(&atom->dev_x.begin(), &atom->dev_q.begin(), &d_brick_counts.begin(), &d_brick_atoms.begin(),