From c4e10709d3dc53b883fdfbb6c677df8182ed2fee Mon Sep 17 00:00:00 2001 From: "W. Michael Brown" Date: Sun, 13 Feb 2011 18:46:28 -0500 Subject: [PATCH] 3 Working charge spreading kernels for PPPM. --- lib/gpu/pppm_gpu_kernel.cu | 118 +++++++++++++++++++------------------ 1 file changed, 62 insertions(+), 56 deletions(-) diff --git a/lib/gpu/pppm_gpu_kernel.cu b/lib/gpu/pppm_gpu_kernel.cu index 666d5c88d4..563396f4c5 100644 --- a/lib/gpu/pppm_gpu_kernel.cu +++ b/lib/gpu/pppm_gpu_kernel.cu @@ -234,73 +234,79 @@ __kernel void make_rho2(__global numtyp4 *x_, __global numtyp *q_, const numtyp delzinv, const int order, const numtyp delvolinv) { __local numtyp rho_coeff[MAX_STENCIL*MAX_STENCIL]; + __local int nx,ny,x_start,y_start,x_stop,y_stop,nlow2; - int nx=BLOCK_ID_X; - int ny=BLOCK_ID_Y; int tx=THREAD_ID_X; - if (tx=nlocal_x) x_stop-=nx-nlocal_x+1; if (ny>=nlocal_y) y_stop-=ny-nlocal_y+1; + } + + if (tx=nlocal_z) - z_stop-=nz-nlocal_z+1; - - for (int n=z_start; n= 0; k--) { + rho1d_1 = rho_coeff[k*order+(order-m-1)] + rho1d_1*dy; + rho1d_0 = rho_coeff[k*order+(order-l-1)] + rho1d_0*dx; + } + + for (int n=0; n= 0; k--) { + for (int k = order-1; k >= 0; k--) rho1d_2 = rho_coeff[k*order+n] + rho1d_2*dz; - rho1d_1 = rho_coeff[k*order+m] + rho1d_1*dy; - rho1d_0 = rho_coeff[k*order+l] + rho1d_0*dx; - } - numtyp y0 = z0*rho1d_2; - numtyp x0 = y0*rho1d_1; - brick[pt]=p.x; - } - } - } - } - } - } + numtyp x0 = y0*rho1d_1; + ans[n]+=x0*rho1d_0; + } + } + } + } + } + for (int n=0; n