diff --git a/lib/gpu/pppm_gpu_kernel.cu b/lib/gpu/pppm_gpu_kernel.cu index 4cee6f13a7..8211dd84a6 100644 --- a/lib/gpu/pppm_gpu_kernel.cu +++ b/lib/gpu/pppm_gpu_kernel.cu @@ -167,7 +167,7 @@ __kernel void make_rho(__global numtyp4 *x_, __global numtyp *q_, __syncthreads(); - int bt=BLOCK_ID_X/BLOCK_PENCILS; + int bt=BLOCK_ID_X*BLOCK_PENCILS+pid; int ny=bt%npts_y; int nz=bt/npts_y; int y_start=0; diff --git a/lib/gpu/pppm_gpu_memory.cpp b/lib/gpu/pppm_gpu_memory.cpp index d348df2d2f..0ee11a3f0f 100644 --- a/lib/gpu/pppm_gpu_memory.cpp +++ b/lib/gpu/pppm_gpu_memory.cpp @@ -258,7 +258,7 @@ int PPPMGPUMemoryT::compute(const int ago, const int nlocal, const int nall, f_brick_z=boxlo[2]+(_nzlo_out-_nlower+shift)/delzinv; BX=block_size(); - GX=_npts_y*_npts_z*8; + GX=static_cast(ceil(static_cast(_npts_y*_npts_z)/8)); k_make_rho.set_size(GX,BX); k_make_rho.run(&atom->dev_x.begin(), &atom->dev_q.begin(), &d_brick_counts.begin(), &d_brick_atoms.begin(),